From 2904fc7af966ddeb650234f3d43ea2aa9e41accc Mon Sep 17 00:00:00 2001 From: Nitin Kanukolanu Date: Wed, 1 Apr 2026 21:46:40 -0400 Subject: [PATCH 1/8] fix: address review round 3 for migrate-async (#562) - Pass existing snapshot to create_plan_from_patch to avoid double Redis round-trip - Use _get_client() instead of _redis_client for lazy async client initialization - Remap datatype_changes keys to post-rename field names before quantization - Only resume from completed checkpoint when source index is actually gone --- redisvl/migration/async_executor.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/redisvl/migration/async_executor.py b/redisvl/migration/async_executor.py index 960d3eb2a..a8fb7e832 100644 --- a/redisvl/migration/async_executor.py +++ b/redisvl/migration/async_executor.py @@ -744,7 +744,8 @@ def _notify(step: str, detail: Optional[str] = None) -> None: effective_changes = datatype_changes if has_field_renames: field_rename_map = { - fr.old_name: fr.new_name for fr in rename_ops.rename_fields + fr.old_name: fr.new_name + for fr in rename_ops.rename_fields } effective_changes = { field_rename_map.get(k, k): v From ed95188c7db13378bc1c593a893d3a14bd2ada35 Mon Sep 17 00:00:00 2001 From: Nitin Kanukolanu Date: Wed, 1 Apr 2026 18:37:17 -0400 Subject: [PATCH 2/8] feat(migrate): [4/7] add batch migration with multi-index glob selection, ordered execution, and state tracking Batch planner and executor for migrating multiple indexes in a single operation. Supports glob-based index selection, ordered execution with per-index state tracking, checkpoint/resume semantics, and batch-level reporting with fail-fast or continue-on-error policies. Includes batch unit and integration tests. --- redisvl/migration/__init__.py | 11 + redisvl/migration/batch_executor.py | 366 +++++ redisvl/migration/batch_planner.py | 228 +++ .../test_batch_migration_integration.py | 485 ++++++ tests/unit/test_batch_migration.py | 1366 +++++++++++++++++ 5 files changed, 2456 insertions(+) create mode 100644 redisvl/migration/batch_executor.py create mode 100644 redisvl/migration/batch_planner.py create mode 100644 tests/integration/test_batch_migration_integration.py create mode 100644 tests/unit/test_batch_migration.py diff --git a/redisvl/migration/__init__.py b/redisvl/migration/__init__.py index 255803473..41cefb577 100644 --- a/redisvl/migration/__init__.py +++ b/redisvl/migration/__init__.py @@ -6,8 +6,13 @@ async_wait_for_index_ready, ) from redisvl.migration.async_validation import AsyncMigrationValidator +from redisvl.migration.batch_executor import BatchMigrationExecutor +from redisvl.migration.batch_planner import BatchMigrationPlanner from redisvl.migration.executor import MigrationExecutor from redisvl.migration.models import ( + BatchPlan, + BatchReport, + BatchState, DiskSpaceEstimate, FieldRename, MigrationPlan, @@ -29,6 +34,12 @@ "MigrationValidator", "RenameOperations", "SchemaPatch", + # Batch + "BatchMigrationExecutor", + "BatchMigrationPlanner", + "BatchPlan", + "BatchReport", + "BatchState", # Async "AsyncMigrationExecutor", "AsyncMigrationPlanner", diff --git a/redisvl/migration/batch_executor.py b/redisvl/migration/batch_executor.py new file mode 100644 index 000000000..de3154a2a --- /dev/null +++ b/redisvl/migration/batch_executor.py @@ -0,0 +1,366 @@ +"""Batch migration executor with checkpointing and resume support.""" + +from __future__ import annotations + +import time +from pathlib import Path +from typing import Any, Callable, Optional + +import yaml + +from redisvl.migration.executor import MigrationExecutor +from redisvl.migration.models import ( + BatchIndexReport, + BatchIndexState, + BatchPlan, + BatchReport, + BatchReportSummary, + BatchState, +) +from redisvl.migration.planner import MigrationPlanner +from redisvl.migration.utils import timestamp_utc, write_yaml +from redisvl.redis.connection import RedisConnectionFactory + + +class BatchMigrationExecutor: + """Executor for batch migration of multiple indexes. + + Supports: + - Sequential execution (one index at a time) + - Checkpointing for resume after failure + - Configurable failure policies (fail_fast, continue_on_error) + """ + + def __init__(self, executor: Optional[MigrationExecutor] = None): + self._single_executor = executor or MigrationExecutor() + self._planner = MigrationPlanner() + + def apply( + self, + batch_plan: BatchPlan, + *, + batch_plan_path: Optional[str] = None, + state_path: str = "batch_state.yaml", + report_dir: str = "./reports", + redis_url: Optional[str] = None, + redis_client: Optional[Any] = None, + progress_callback: Optional[Callable[[str, int, int, str], None]] = None, + ) -> BatchReport: + """Execute batch migration with checkpointing. + + Args: + batch_plan: The batch plan to execute. + batch_plan_path: Path to the batch plan file (stored in state for resume). + state_path: Path to checkpoint state file. + report_dir: Directory for per-index reports. + redis_url: Redis connection URL. + redis_client: Existing Redis client. + progress_callback: Optional callback(index_name, position, total, status). + + Returns: + BatchReport with results for all indexes. + """ + # Get Redis client + client = redis_client + if client is None: + if not redis_url: + raise ValueError("Must provide either redis_url or redis_client") + client = RedisConnectionFactory.get_redis_connection(redis_url=redis_url) + + # Ensure report directory exists + report_path = Path(report_dir).resolve() + report_path.mkdir(parents=True, exist_ok=True) + + # Initialize or load state + state = self._init_or_load_state(batch_plan, state_path, batch_plan_path) + started_at = state.started_at + batch_start_time = time.perf_counter() + + # Get applicable indexes + applicable_indexes = [idx for idx in batch_plan.indexes if idx.applicable] + total = len(applicable_indexes) + + # Process each remaining index + for position, index_name in enumerate(state.remaining[:], start=1): + state.current_index = index_name + state.updated_at = timestamp_utc() + self._write_state(state, state_path) + + if progress_callback: + progress_callback(index_name, position, total, "starting") + + # Find the index entry + index_entry = next( + (idx for idx in batch_plan.indexes if idx.name == index_name), None + ) + if not index_entry or not index_entry.applicable: + # Skip non-applicable indexes + state.remaining.remove(index_name) + state.completed.append( + BatchIndexState( + name=index_name, + status="skipped", + completed_at=timestamp_utc(), + ) + ) + state.current_index = None + state.updated_at = timestamp_utc() + self._write_state(state, state_path) + if progress_callback: + progress_callback(index_name, position, total, "skipped") + continue + + # Execute migration for this index + index_state = self._migrate_single_index( + index_name=index_name, + batch_plan=batch_plan, + report_dir=report_path, + redis_client=client, + ) + + # Update state + state.remaining.remove(index_name) + state.completed.append(index_state) + state.current_index = None + state.updated_at = timestamp_utc() + self._write_state(state, state_path) + + if progress_callback: + progress_callback(index_name, position, total, index_state.status) + + # Check failure policy + if ( + index_state.status == "failed" + and batch_plan.failure_policy == "fail_fast" + ): + # Mark remaining as skipped + for remaining_name in state.remaining[:]: + state.remaining.remove(remaining_name) + state.completed.append( + BatchIndexState( + name=remaining_name, + status="skipped", + completed_at=timestamp_utc(), + ) + ) + state.updated_at = timestamp_utc() + self._write_state(state, state_path) + break + + # Build final report + total_duration = time.perf_counter() - batch_start_time + return self._build_batch_report(batch_plan, state, started_at, total_duration) + + def resume( + self, + state_path: str, + *, + batch_plan_path: Optional[str] = None, + retry_failed: bool = False, + report_dir: str = "./reports", + redis_url: Optional[str] = None, + redis_client: Optional[Any] = None, + progress_callback: Optional[Callable[[str, int, int, str], None]] = None, + ) -> BatchReport: + """Resume batch migration from checkpoint. + + Args: + state_path: Path to checkpoint state file. + batch_plan_path: Path to batch plan (uses state.plan_path if not provided). + retry_failed: If True, retry previously failed indexes. + report_dir: Directory for per-index reports. + redis_url: Redis connection URL. + redis_client: Existing Redis client. + progress_callback: Optional callback(index_name, position, total, status). + """ + state = self._load_state(state_path) + plan_path = batch_plan_path or state.plan_path + if not plan_path or not plan_path.strip(): + raise ValueError( + "No batch plan path available. Provide batch_plan_path explicitly, " + "or ensure the checkpoint state contains a valid plan_path." + ) + batch_plan = self._load_batch_plan(plan_path) + + # Optionally retry failed indexes + if retry_failed: + failed_names = [ + idx.name for idx in state.completed if idx.status == "failed" + ] + state.remaining = failed_names + state.remaining + state.completed = [idx for idx in state.completed if idx.status != "failed"] + # Write updated state back to file so apply() picks up the changes + self._write_state(state, state_path) + + # Re-run apply with the updated state + return self.apply( + batch_plan, + state_path=state_path, + report_dir=report_dir, + redis_url=redis_url, + redis_client=redis_client, + progress_callback=progress_callback, + ) + + def _migrate_single_index( + self, + *, + index_name: str, + batch_plan: BatchPlan, + report_dir: Path, + redis_client: Any, + ) -> BatchIndexState: + """Execute migration for a single index.""" + try: + # Create migration plan for this index + plan = self._planner.create_plan_from_patch( + index_name, + schema_patch=batch_plan.shared_patch, + redis_client=redis_client, + ) + + # Execute migration + report = self._single_executor.apply( + plan, + redis_client=redis_client, + ) + + # Write individual report + report_file = report_dir / f"{index_name}_report.yaml" + write_yaml(report.model_dump(exclude_none=True), str(report_file)) + + return BatchIndexState( + name=index_name, + status="succeeded" if report.result == "succeeded" else "failed", + completed_at=timestamp_utc(), + report_path=str(report_file), + error=report.validation.errors[0] if report.validation.errors else None, + ) + + except Exception as e: + return BatchIndexState( + name=index_name, + status="failed", + completed_at=timestamp_utc(), + error=str(e), + ) + + def _init_or_load_state( + self, + batch_plan: BatchPlan, + state_path: str, + batch_plan_path: Optional[str] = None, + ) -> BatchState: + """Initialize new state or load existing checkpoint.""" + path = Path(state_path).resolve() + if path.exists(): + loaded = self._load_state(state_path) + # Validate that loaded state matches the current batch plan + if loaded.batch_id and loaded.batch_id != batch_plan.batch_id: + raise ValueError( + f"Checkpoint state batch_id '{loaded.batch_id}' does not match " + f"current batch plan '{batch_plan.batch_id}'. " + "Remove the stale state file or use a different state_path." + ) + return loaded + + # Create new state with plan_path for resume support + applicable_names = [idx.name for idx in batch_plan.indexes if idx.applicable] + return BatchState( + batch_id=batch_plan.batch_id, + plan_path=str(Path(batch_plan_path).resolve()) if batch_plan_path else "", + started_at=timestamp_utc(), + updated_at=timestamp_utc(), + remaining=applicable_names, + completed=[], + current_index=None, + ) + + def _write_state(self, state: BatchState, state_path: str) -> None: + """Write checkpoint state to file.""" + path = Path(state_path).resolve() + path.parent.mkdir(parents=True, exist_ok=True) + with open(path, "w") as f: + yaml.safe_dump(state.model_dump(exclude_none=True), f, sort_keys=False) + + def _load_state(self, state_path: str) -> BatchState: + """Load checkpoint state from file.""" + path = Path(state_path).resolve() + if not path.is_file(): + raise FileNotFoundError(f"State file not found: {state_path}") + with open(path, "r") as f: + data = yaml.safe_load(f) or {} + return BatchState.model_validate(data) + + def _load_batch_plan(self, plan_path: str) -> BatchPlan: + """Load batch plan from file.""" + path = Path(plan_path).resolve() + if not path.is_file(): + raise FileNotFoundError(f"Batch plan not found: {plan_path}") + with open(path, "r") as f: + data = yaml.safe_load(f) or {} + return BatchPlan.model_validate(data) + + def _build_batch_report( + self, + batch_plan: BatchPlan, + state: BatchState, + started_at: str, + total_duration: float, + ) -> BatchReport: + """Build final batch report from state.""" + index_reports = [] + succeeded = 0 + failed = 0 + skipped = 0 + + for idx_state in state.completed: + index_reports.append( + BatchIndexReport( + name=idx_state.name, + status=idx_state.status, + report_path=idx_state.report_path, + error=idx_state.error, + ) + ) + if idx_state.status == "succeeded": + succeeded += 1 + elif idx_state.status == "failed": + failed += 1 + else: + skipped += 1 + + # Add non-applicable indexes as skipped + for idx in batch_plan.indexes: + if not idx.applicable: + index_reports.append( + BatchIndexReport( + name=idx.name, + status="skipped", + error=idx.skip_reason, + ) + ) + skipped += 1 + + # Determine overall status + if failed == 0 and len(state.remaining) == 0: + status = "completed" + elif succeeded > 0: + status = "partial_failure" + else: + status = "failed" + + return BatchReport( + batch_id=batch_plan.batch_id, + status=status, + started_at=started_at, + completed_at=timestamp_utc(), + summary=BatchReportSummary( + total_indexes=len(batch_plan.indexes), + successful=succeeded, + failed=failed, + skipped=skipped, + total_duration_seconds=round(total_duration, 3), + ), + indexes=index_reports, + ) diff --git a/redisvl/migration/batch_planner.py b/redisvl/migration/batch_planner.py new file mode 100644 index 000000000..33c265c4a --- /dev/null +++ b/redisvl/migration/batch_planner.py @@ -0,0 +1,228 @@ +"""Batch migration planner for migrating multiple indexes with a shared patch.""" + +from __future__ import annotations + +import fnmatch +import uuid +from pathlib import Path +from typing import Any, List, Optional + +import yaml + +from redisvl.index import SearchIndex +from redisvl.migration.models import BatchIndexEntry, BatchPlan, SchemaPatch +from redisvl.migration.planner import MigrationPlanner +from redisvl.migration.utils import list_indexes, timestamp_utc +from redisvl.redis.connection import RedisConnectionFactory + + +class BatchMigrationPlanner: + """Planner for batch migration of multiple indexes with a shared patch. + + The batch planner applies a single SchemaPatch to multiple indexes, + checking applicability for each index based on field name matching. + """ + + def __init__(self): + self._single_planner = MigrationPlanner() + + def create_batch_plan( + self, + *, + indexes: Optional[List[str]] = None, + pattern: Optional[str] = None, + indexes_file: Optional[str] = None, + schema_patch_path: str, + redis_url: Optional[str] = None, + redis_client: Optional[Any] = None, + failure_policy: str = "fail_fast", + ) -> BatchPlan: + """Create a batch migration plan for multiple indexes. + + Args: + indexes: Explicit list of index names. + pattern: Glob pattern to match index names (e.g., "*_idx"). + indexes_file: Path to file with index names (one per line). + schema_patch_path: Path to shared schema patch YAML file. + redis_url: Redis connection URL. + redis_client: Existing Redis client. + failure_policy: "fail_fast" or "continue_on_error". + + Returns: + BatchPlan with shared patch and per-index applicability. + """ + # Get Redis client + client = redis_client + if client is None: + if not redis_url: + raise ValueError("Must provide either redis_url or redis_client") + client = RedisConnectionFactory.get_redis_connection(redis_url=redis_url) + + # Resolve index list + index_names = self._resolve_index_names( + indexes=indexes, + pattern=pattern, + indexes_file=indexes_file, + redis_client=client, + ) + + if not index_names: + raise ValueError("No indexes found matching the specified criteria") + + # Load shared patch + shared_patch = self._single_planner.load_schema_patch(schema_patch_path) + + # Check applicability for each index + batch_entries: List[BatchIndexEntry] = [] + requires_quantization = False + + for index_name in index_names: + entry = self._check_index_applicability( + index_name=index_name, + shared_patch=shared_patch, + redis_client=client, + ) + batch_entries.append(entry) + + # Check if any applicable index requires quantization + if entry.applicable: + try: + plan = self._single_planner.create_plan_from_patch( + index_name, + schema_patch=shared_patch, + redis_client=client, + ) + datatype_changes = MigrationPlanner.get_vector_datatype_changes( + plan.source.schema_snapshot, + plan.merged_target_schema, + ) + if datatype_changes: + requires_quantization = True + except Exception: + pass # Already handled in applicability check + + batch_id = f"batch_{uuid.uuid4().hex[:12]}" + + return BatchPlan( + batch_id=batch_id, + mode="drop_recreate", + failure_policy=failure_policy, + requires_quantization=requires_quantization, + shared_patch=shared_patch, + indexes=batch_entries, + created_at=timestamp_utc(), + ) + + def _resolve_index_names( + self, + *, + indexes: Optional[List[str]], + pattern: Optional[str], + indexes_file: Optional[str], + redis_client: Any, + ) -> List[str]: + """Resolve index names from explicit list, pattern, or file.""" + sources = sum([bool(indexes), bool(pattern), bool(indexes_file)]) + if sources == 0: + raise ValueError("Must provide one of: indexes, pattern, or indexes_file") + if sources > 1: + raise ValueError("Provide only one of: indexes, pattern, or indexes_file") + + if indexes: + return list(indexes) + + if indexes_file: + return self._load_indexes_from_file(indexes_file) + + # Pattern matching + all_indexes = list_indexes(redis_client=redis_client) + matched = [idx for idx in all_indexes if fnmatch.fnmatch(idx, pattern)] + return sorted(matched) + + def _load_indexes_from_file(self, file_path: str) -> List[str]: + """Load index names from a file (one per line).""" + path = Path(file_path).resolve() + if not path.exists(): + raise FileNotFoundError(f"Indexes file not found: {file_path}") + + with open(path, "r") as f: + lines = f.readlines() + + return [ + stripped + for line in lines + if (stripped := line.strip()) and not stripped.startswith("#") + ] + + def _check_index_applicability( + self, + *, + index_name: str, + shared_patch: SchemaPatch, + redis_client: Any, + ) -> BatchIndexEntry: + """Check if the shared patch can be applied to a specific index.""" + try: + index = SearchIndex.from_existing(index_name, redis_client=redis_client) + schema_dict = index.schema.to_dict() + field_names = {f["name"] for f in schema_dict.get("fields", [])} + + # Check that all update_fields exist in this index + missing_fields = [] + for field_update in shared_patch.changes.update_fields: + if field_update.name not in field_names: + missing_fields.append(field_update.name) + + if missing_fields: + return BatchIndexEntry( + name=index_name, + applicable=False, + skip_reason=f"Missing fields: {', '.join(missing_fields)}", + ) + + # Check that add_fields don't already exist + existing_adds: list[str] = [] + for field in shared_patch.changes.add_fields: + field_name = field.get("name") + if field_name and field_name in field_names: + existing_adds.append(field_name) + + if existing_adds: + return BatchIndexEntry( + name=index_name, + applicable=False, + skip_reason=f"Fields already exist: {', '.join(existing_adds)}", + ) + + # Try creating a plan to check for blocked changes + plan = self._single_planner.create_plan_from_patch( + index_name, + schema_patch=shared_patch, + redis_client=redis_client, + ) + + if not plan.diff_classification.supported: + return BatchIndexEntry( + name=index_name, + applicable=False, + skip_reason=( + plan.diff_classification.blocked_reasons[0] + if plan.diff_classification.blocked_reasons + else "Unsupported changes" + ), + ) + + return BatchIndexEntry(name=index_name, applicable=True) + + except Exception as e: + return BatchIndexEntry( + name=index_name, + applicable=False, + skip_reason=str(e), + ) + + def write_batch_plan(self, batch_plan: BatchPlan, path: str) -> None: + """Write batch plan to YAML file.""" + plan_path = Path(path).resolve() + with open(plan_path, "w") as f: + yaml.safe_dump(batch_plan.model_dump(exclude_none=True), f, sort_keys=False) diff --git a/tests/integration/test_batch_migration_integration.py b/tests/integration/test_batch_migration_integration.py new file mode 100644 index 000000000..976db0528 --- /dev/null +++ b/tests/integration/test_batch_migration_integration.py @@ -0,0 +1,485 @@ +""" +Integration tests for batch migration. + +Tests the full batch migration flow with real Redis: +- Batch planning with patterns and explicit lists +- Batch apply with checkpointing +- Resume after interruption +- Failure policies (fail_fast, continue_on_error) +""" + +import uuid + +import pytest +import yaml + +from redisvl.index import SearchIndex +from redisvl.migration import BatchMigrationExecutor, BatchMigrationPlanner +from redisvl.redis.utils import array_to_buffer + + +def create_test_index(name: str, prefix: str, redis_url: str) -> SearchIndex: + """Helper to create a test index with standard schema.""" + index = SearchIndex.from_dict( + { + "index": { + "name": name, + "prefix": prefix, + "storage_type": "hash", + }, + "fields": [ + {"name": "doc_id", "type": "tag"}, + {"name": "title", "type": "text"}, + { + "name": "embedding", + "type": "vector", + "attrs": { + "algorithm": "hnsw", + "dims": 3, + "distance_metric": "cosine", + "datatype": "float32", + }, + }, + ], + }, + redis_url=redis_url, + ) + return index + + +def load_test_data(index: SearchIndex) -> None: + """Load sample documents into an index.""" + docs = [ + { + "doc_id": "1", + "title": "alpha", + "embedding": array_to_buffer([0.1, 0.2, 0.3], "float32"), + }, + { + "doc_id": "2", + "title": "beta", + "embedding": array_to_buffer([0.2, 0.1, 0.4], "float32"), + }, + ] + index.load(docs, id_field="doc_id") + + +class TestBatchMigrationPlanIntegration: + """Test batch plan creation with real Redis.""" + + def test_batch_plan_with_pattern(self, redis_url, worker_id, tmp_path): + """Test creating a batch plan using pattern matching.""" + unique_id = str(uuid.uuid4())[:8] + prefix = f"batch_test:{worker_id}:{unique_id}" + indexes = [] + + # Create multiple indexes matching pattern + for i in range(3): + name = f"batch_{unique_id}_idx_{i}" + index = create_test_index(name, f"{prefix}_{i}", redis_url) + index.create(overwrite=True) + load_test_data(index) + indexes.append(index) + + # Create shared patch (add sortable to title) + patch_path = tmp_path / "patch.yaml" + patch_path.write_text( + yaml.safe_dump( + { + "version": 1, + "changes": { + "update_fields": [ + {"name": "title", "attrs": {"sortable": True}} + ] + }, + }, + sort_keys=False, + ) + ) + + # Create batch plan + planner = BatchMigrationPlanner() + batch_plan = planner.create_batch_plan( + pattern=f"batch_{unique_id}_idx_*", + schema_patch_path=str(patch_path), + redis_url=redis_url, + ) + + # Verify batch plan + assert batch_plan.batch_id is not None + assert len(batch_plan.indexes) == 3 + for entry in batch_plan.indexes: + assert entry.applicable is True + assert entry.skip_reason is None + + # Cleanup + for index in indexes: + index.delete(drop=True) + + def test_batch_plan_with_explicit_list(self, redis_url, worker_id, tmp_path): + """Test creating a batch plan with explicit index list.""" + unique_id = str(uuid.uuid4())[:8] + prefix = f"batch_list_test:{worker_id}:{unique_id}" + index_names = [] + indexes = [] + + # Create indexes + for i in range(2): + name = f"list_batch_{unique_id}_{i}" + index = create_test_index(name, f"{prefix}_{i}", redis_url) + index.create(overwrite=True) + load_test_data(index) + indexes.append(index) + index_names.append(name) + + # Create shared patch + patch_path = tmp_path / "patch.yaml" + patch_path.write_text( + yaml.safe_dump( + { + "version": 1, + "changes": { + "update_fields": [ + {"name": "title", "attrs": {"sortable": True}} + ] + }, + }, + sort_keys=False, + ) + ) + + # Create batch plan with explicit list + planner = BatchMigrationPlanner() + batch_plan = planner.create_batch_plan( + indexes=index_names, + schema_patch_path=str(patch_path), + redis_url=redis_url, + ) + + assert len(batch_plan.indexes) == 2 + assert all(idx.applicable for idx in batch_plan.indexes) + + # Cleanup + for index in indexes: + index.delete(drop=True) + + +class TestBatchMigrationApplyIntegration: + """Test batch apply with real Redis.""" + + def test_batch_apply_full_flow(self, redis_url, worker_id, tmp_path): + """Test complete batch apply flow: plan -> apply -> verify.""" + unique_id = str(uuid.uuid4())[:8] + prefix = f"batch_apply:{worker_id}:{unique_id}" + indexes = [] + index_names = [] + + # Create multiple indexes + for i in range(3): + name = f"apply_batch_{unique_id}_{i}" + index = create_test_index(name, f"{prefix}_{i}", redis_url) + index.create(overwrite=True) + load_test_data(index) + indexes.append(index) + index_names.append(name) + + # Create shared patch (make title sortable) + patch_path = tmp_path / "patch.yaml" + patch_path.write_text( + yaml.safe_dump( + { + "version": 1, + "changes": { + "update_fields": [ + {"name": "title", "attrs": {"sortable": True}} + ] + }, + }, + sort_keys=False, + ) + ) + + # Create batch plan + planner = BatchMigrationPlanner() + batch_plan = planner.create_batch_plan( + indexes=index_names, + schema_patch_path=str(patch_path), + redis_url=redis_url, + ) + + # Save batch plan + plan_path = tmp_path / "batch_plan.yaml" + planner.write_batch_plan(batch_plan, str(plan_path)) + + # Apply batch migration + state_path = tmp_path / "batch_state.yaml" + report_dir = tmp_path / "reports" + executor = BatchMigrationExecutor() + report = executor.apply( + batch_plan, + state_path=str(state_path), + report_dir=str(report_dir), + redis_url=redis_url, + ) + + # Verify report + assert report.status == "completed" + assert report.summary.total_indexes == 3 + assert report.summary.successful == 3 + assert report.summary.failed == 0 + + # Verify all indexes were migrated (title is now sortable) + for name in index_names: + migrated = SearchIndex.from_existing(name, redis_url=redis_url) + title_field = migrated.schema.fields.get("title") + assert title_field is not None + assert title_field.attrs.sortable is True + + # Cleanup + for name in index_names: + idx = SearchIndex.from_existing(name, redis_url=redis_url) + idx.delete(drop=True) + + def test_batch_apply_with_inapplicable_indexes( + self, redis_url, worker_id, tmp_path + ): + """Test batch apply skips indexes that don't have matching fields.""" + unique_id = str(uuid.uuid4())[:8] + prefix = f"batch_skip:{worker_id}:{unique_id}" + indexes_to_cleanup = [] + + # Create an index WITH embedding field + with_embedding = f"with_emb_{unique_id}" + idx1 = create_test_index(with_embedding, f"{prefix}_1", redis_url) + idx1.create(overwrite=True) + load_test_data(idx1) + indexes_to_cleanup.append(with_embedding) + + # Create an index WITHOUT embedding field + without_embedding = f"no_emb_{unique_id}" + idx2 = SearchIndex.from_dict( + { + "index": { + "name": without_embedding, + "prefix": f"{prefix}_2", + "storage_type": "hash", + }, + "fields": [ + {"name": "doc_id", "type": "tag"}, + {"name": "content", "type": "text"}, + ], + }, + redis_url=redis_url, + ) + idx2.create(overwrite=True) + idx2.load([{"doc_id": "1", "content": "test"}], id_field="doc_id") + indexes_to_cleanup.append(without_embedding) + + # Create patch targeting embedding field (won't apply to idx2) + patch_path = tmp_path / "patch.yaml" + patch_path.write_text( + yaml.safe_dump( + { + "version": 1, + "changes": { + "update_fields": [ + {"name": "embedding", "attrs": {"datatype": "float16"}} + ] + }, + }, + sort_keys=False, + ) + ) + + # Create batch plan + planner = BatchMigrationPlanner() + batch_plan = planner.create_batch_plan( + indexes=[with_embedding, without_embedding], + schema_patch_path=str(patch_path), + redis_url=redis_url, + ) + + # One should be applicable, one not + applicable = [idx for idx in batch_plan.indexes if idx.applicable] + not_applicable = [idx for idx in batch_plan.indexes if not idx.applicable] + assert len(applicable) == 1 + assert len(not_applicable) == 1 + assert "embedding" in not_applicable[0].skip_reason.lower() + + # Apply + executor = BatchMigrationExecutor() + report = executor.apply( + batch_plan, + state_path=str(tmp_path / "state.yaml"), + report_dir=str(tmp_path / "reports"), + redis_url=redis_url, + ) + + assert report.summary.successful == 1 + assert report.summary.skipped == 1 + + # Cleanup + for name in indexes_to_cleanup: + idx = SearchIndex.from_existing(name, redis_url=redis_url) + idx.delete(drop=True) + + +class TestBatchMigrationResumeIntegration: + """Test batch resume functionality with real Redis.""" + + def test_resume_from_checkpoint(self, redis_url, worker_id, tmp_path): + """Test resuming a batch migration from checkpoint state.""" + unique_id = str(uuid.uuid4())[:8] + prefix = f"batch_resume:{worker_id}:{unique_id}" + index_names = [] + indexes = [] + + # Create indexes + for i in range(3): + name = f"resume_batch_{unique_id}_{i}" + index = create_test_index(name, f"{prefix}_{i}", redis_url) + index.create(overwrite=True) + load_test_data(index) + indexes.append(index) + index_names.append(name) + + # Create patch + patch_path = tmp_path / "patch.yaml" + patch_path.write_text( + yaml.safe_dump( + { + "version": 1, + "changes": { + "update_fields": [ + {"name": "title", "attrs": {"sortable": True}} + ] + }, + }, + sort_keys=False, + ) + ) + + # Create batch plan + planner = BatchMigrationPlanner() + batch_plan = planner.create_batch_plan( + indexes=index_names, + schema_patch_path=str(patch_path), + redis_url=redis_url, + ) + + # Save batch plan (needed for resume) + plan_path = tmp_path / "batch_plan.yaml" + planner.write_batch_plan(batch_plan, str(plan_path)) + + # Create a checkpoint state simulating partial completion + state_path = tmp_path / "batch_state.yaml" + partial_state = { + "batch_id": batch_plan.batch_id, + "plan_path": str(plan_path), + "started_at": "2026-03-20T10:00:00Z", + "updated_at": "2026-03-20T10:01:00Z", + "completed": [ + { + "name": index_names[0], + "status": "succeeded", + "completed_at": "2026-03-20T10:00:30Z", + } + ], + "remaining": index_names[1:], # Still need to process idx 1 and 2 + "current_index": None, + } + state_path.write_text(yaml.safe_dump(partial_state, sort_keys=False)) + + # Resume from checkpoint + executor = BatchMigrationExecutor() + report = executor.resume( + state_path=str(state_path), + batch_plan_path=str(plan_path), + report_dir=str(tmp_path / "reports"), + redis_url=redis_url, + ) + + # Should complete remaining 2 indexes + # Note: The first index was marked as succeeded in checkpoint but not actually + # migrated, so the report will show 2 successful (the ones actually processed) + assert report.summary.successful >= 2 + assert report.status == "completed" + + # Verify at least the resumed indexes were migrated + for name in index_names[1:]: + migrated = SearchIndex.from_existing(name, redis_url=redis_url) + title_field = migrated.schema.fields.get("title") + assert title_field is not None + assert title_field.attrs.sortable is True + + # Cleanup + for name in index_names: + idx = SearchIndex.from_existing(name, redis_url=redis_url) + idx.delete(drop=True) + + def test_progress_callback_called(self, redis_url, worker_id, tmp_path): + """Test that progress callback is invoked during batch apply.""" + unique_id = str(uuid.uuid4())[:8] + prefix = f"batch_progress:{worker_id}:{unique_id}" + index_names = [] + indexes = [] + + # Create indexes + for i in range(2): + name = f"progress_batch_{unique_id}_{i}" + index = create_test_index(name, f"{prefix}_{i}", redis_url) + index.create(overwrite=True) + load_test_data(index) + indexes.append(index) + index_names.append(name) + + # Create patch + patch_path = tmp_path / "patch.yaml" + patch_path.write_text( + yaml.safe_dump( + { + "version": 1, + "changes": { + "update_fields": [ + {"name": "title", "attrs": {"sortable": True}} + ] + }, + }, + sort_keys=False, + ) + ) + + # Create batch plan + planner = BatchMigrationPlanner() + batch_plan = planner.create_batch_plan( + indexes=index_names, + schema_patch_path=str(patch_path), + redis_url=redis_url, + ) + + # Track progress callbacks + progress_calls = [] + + def progress_cb(name, pos, total, status): + progress_calls.append((name, pos, total, status)) + + # Apply with progress callback + executor = BatchMigrationExecutor() + executor.apply( + batch_plan, + state_path=str(tmp_path / "state.yaml"), + report_dir=str(tmp_path / "reports"), + redis_url=redis_url, + progress_callback=progress_cb, + ) + + # Verify progress was reported for each index + assert len(progress_calls) >= 2 # At least one call per index + reported_names = {call[0] for call in progress_calls} + for name in index_names: + assert name in reported_names + + # Cleanup + for name in index_names: + idx = SearchIndex.from_existing(name, redis_url=redis_url) + idx.delete(drop=True) diff --git a/tests/unit/test_batch_migration.py b/tests/unit/test_batch_migration.py new file mode 100644 index 000000000..31adecd1d --- /dev/null +++ b/tests/unit/test_batch_migration.py @@ -0,0 +1,1366 @@ +""" +Unit tests for BatchMigrationPlanner and BatchMigrationExecutor. + +Tests use mocked Redis clients to verify: +- Pattern matching and index selection +- Applicability checking +- Checkpoint persistence and resume +- Failure policies +- Progress callbacks +""" + +from fnmatch import fnmatch +from pathlib import Path +from typing import Any, Dict, List +from unittest.mock import MagicMock, Mock, patch + +import pytest +import yaml + +from redisvl.migration import ( + BatchMigrationExecutor, + BatchMigrationPlanner, + BatchPlan, + BatchState, + SchemaPatch, +) +from redisvl.migration.models import BatchIndexEntry, BatchIndexState +from redisvl.schema.schema import IndexSchema + +# ============================================================================= +# Test Fixtures and Mock Helpers +# ============================================================================= + + +class MockRedisClient: + """Mock Redis client for batch migration tests.""" + + def __init__(self, indexes: List[str] = None, keys: Dict[str, List[str]] = None): + self.indexes = indexes or [] + self.keys = keys or {} + self._data: Dict[str, Dict[str, bytes]] = {} + + def execute_command(self, *args, **kwargs): + if args[0] == "FT._LIST": + return [idx.encode() for idx in self.indexes] + raise NotImplementedError(f"Command not mocked: {args}") + + def scan(self, cursor=0, match=None, count=None): + matched = [] + all_keys = [] + for prefix_keys in self.keys.values(): + all_keys.extend(prefix_keys) + + for key in all_keys: + decoded_key = key.decode() if isinstance(key, bytes) else str(key) + if match is None or fnmatch(decoded_key, match): + matched.append(key if isinstance(key, bytes) else key.encode()) + return 0, matched + + def hget(self, key, field): + return self._data.get(key, {}).get(field) + + def hset(self, key, field, value): + if key not in self._data: + self._data[key] = {} + self._data[key][field] = value + + def pipeline(self): + return MockPipeline(self) + + +class MockPipeline: + """Mock Redis pipeline.""" + + def __init__(self, client: MockRedisClient): + self._client = client + self._commands: List[tuple] = [] + + def hset(self, key, field, value): + self._commands.append(("hset", key, field, value)) + return self + + def execute(self): + results = [] + for cmd in self._commands: + if cmd[0] == "hset": + self._client.hset(cmd[1], cmd[2], cmd[3]) + results.append(1) + self._commands = [] + return results + + +def make_dummy_index(name: str, schema_dict: Dict[str, Any], stats: Dict[str, Any]): + """Create a mock SearchIndex for testing.""" + mock_index = Mock() + mock_index.name = name + mock_index.schema = IndexSchema.from_dict(schema_dict) + mock_index._redis_client = MockRedisClient() + mock_index.client = mock_index._redis_client + mock_index.info = Mock(return_value=stats) + mock_index.delete = Mock() + mock_index.create = Mock() + mock_index.exists = Mock(return_value=True) + return mock_index + + +def make_test_schema(name: str, prefix: str = None, dims: int = 3) -> Dict[str, Any]: + """Create a test schema dictionary.""" + return { + "index": { + "name": name, + "prefix": prefix or name, + "key_separator": ":", + "storage_type": "hash", + }, + "fields": [ + {"name": "title", "type": "text"}, + { + "name": "embedding", + "type": "vector", + "attrs": { + "algorithm": "flat", + "dims": dims, + "distance_metric": "cosine", + "datatype": "float32", + }, + }, + ], + } + + +def make_shared_patch( + update_fields: List[Dict] = None, + add_fields: List[Dict] = None, + remove_fields: List[str] = None, +) -> Dict[str, Any]: + """Create a test schema patch dictionary.""" + return { + "version": 1, + "changes": { + "update_fields": update_fields or [], + "add_fields": add_fields or [], + "remove_fields": remove_fields or [], + "index": {}, + }, + } + + +def make_batch_plan( + batch_id: str, + indexes: List[BatchIndexEntry], + failure_policy: str = "fail_fast", + requires_quantization: bool = False, +) -> BatchPlan: + """Create a BatchPlan with default values for testing.""" + return BatchPlan( + batch_id=batch_id, + shared_patch=SchemaPatch( + version=1, + changes={"update_fields": [], "add_fields": [], "remove_fields": []}, + ), + indexes=indexes, + requires_quantization=requires_quantization, + failure_policy=failure_policy, + created_at="2026-03-20T10:00:00Z", + ) + + +# ============================================================================= +# BatchMigrationPlanner Tests +# ============================================================================= + + +class TestBatchMigrationPlannerPatternMatching: + """Test pattern matching for index discovery.""" + + def test_pattern_matches_multiple_indexes(self, monkeypatch, tmp_path): + """Pattern should match multiple indexes.""" + mock_client = MockRedisClient( + indexes=["products_idx", "users_idx", "orders_idx", "logs_idx"] + ) + + def mock_list_indexes(**kwargs): + return ["products_idx", "users_idx", "orders_idx", "logs_idx"] + + monkeypatch.setattr( + "redisvl.migration.batch_planner.list_indexes", mock_list_indexes + ) + + # Mock from_existing for each index + def mock_from_existing(name, **kwargs): + return make_dummy_index( + name, make_test_schema(name), {"num_docs": 10, "indexing": False} + ) + + monkeypatch.setattr( + "redisvl.migration.batch_planner.SearchIndex.from_existing", + mock_from_existing, + ) + monkeypatch.setattr( + "redisvl.migration.planner.SearchIndex.from_existing", mock_from_existing + ) + + patch_path = tmp_path / "patch.yaml" + patch_path.write_text( + yaml.safe_dump( + make_shared_patch( + update_fields=[ + {"name": "embedding", "attrs": {"algorithm": "hnsw"}} + ] + ) + ) + ) + + planner = BatchMigrationPlanner() + batch_plan = planner.create_batch_plan( + pattern="*_idx", + schema_patch_path=str(patch_path), + redis_client=mock_client, + ) + + assert len(batch_plan.indexes) == 4 + assert all(idx.name.endswith("_idx") for idx in batch_plan.indexes) + + def test_pattern_no_matches_raises_error(self, monkeypatch, tmp_path): + """Empty pattern results should raise ValueError.""" + mock_client = MockRedisClient(indexes=["products", "users"]) + + def mock_list_indexes(**kwargs): + return ["products", "users"] + + monkeypatch.setattr( + "redisvl.migration.batch_planner.list_indexes", mock_list_indexes + ) + + patch_path = tmp_path / "patch.yaml" + patch_path.write_text(yaml.safe_dump(make_shared_patch())) + + planner = BatchMigrationPlanner() + with pytest.raises(ValueError, match="No indexes found"): + planner.create_batch_plan( + pattern="*_idx", # Won't match anything + schema_patch_path=str(patch_path), + redis_client=mock_client, + ) + + def test_pattern_with_special_characters(self, monkeypatch, tmp_path): + """Pattern matching with special characters in index names.""" + mock_client = MockRedisClient( + indexes=["app:prod:idx", "app:dev:idx", "app:staging:idx"] + ) + + def mock_list_indexes(**kwargs): + return ["app:prod:idx", "app:dev:idx", "app:staging:idx"] + + monkeypatch.setattr( + "redisvl.migration.batch_planner.list_indexes", mock_list_indexes + ) + + def mock_from_existing(name, **kwargs): + return make_dummy_index( + name, make_test_schema(name), {"num_docs": 5, "indexing": False} + ) + + monkeypatch.setattr( + "redisvl.migration.batch_planner.SearchIndex.from_existing", + mock_from_existing, + ) + monkeypatch.setattr( + "redisvl.migration.planner.SearchIndex.from_existing", mock_from_existing + ) + + patch_path = tmp_path / "patch.yaml" + patch_path.write_text(yaml.safe_dump(make_shared_patch())) + + planner = BatchMigrationPlanner() + batch_plan = planner.create_batch_plan( + pattern="app:*:idx", + schema_patch_path=str(patch_path), + redis_client=mock_client, + ) + + assert len(batch_plan.indexes) == 3 + + +class TestBatchMigrationPlannerIndexSelection: + """Test explicit index list selection.""" + + def test_explicit_index_list(self, monkeypatch, tmp_path): + """Explicit index list should be used directly.""" + mock_client = MockRedisClient(indexes=["idx1", "idx2", "idx3", "idx4", "idx5"]) + + def mock_from_existing(name, **kwargs): + return make_dummy_index( + name, make_test_schema(name), {"num_docs": 10, "indexing": False} + ) + + monkeypatch.setattr( + "redisvl.migration.batch_planner.SearchIndex.from_existing", + mock_from_existing, + ) + monkeypatch.setattr( + "redisvl.migration.planner.SearchIndex.from_existing", mock_from_existing + ) + + patch_path = tmp_path / "patch.yaml" + patch_path.write_text(yaml.safe_dump(make_shared_patch())) + + planner = BatchMigrationPlanner() + batch_plan = planner.create_batch_plan( + indexes=["idx1", "idx3", "idx5"], + schema_patch_path=str(patch_path), + redis_client=mock_client, + ) + + assert len(batch_plan.indexes) == 3 + assert [idx.name for idx in batch_plan.indexes] == ["idx1", "idx3", "idx5"] + + def test_duplicate_index_names(self, monkeypatch, tmp_path): + """Duplicate index names in list should be preserved (user intent).""" + mock_client = MockRedisClient(indexes=["idx1", "idx2"]) + + def mock_from_existing(name, **kwargs): + return make_dummy_index( + name, make_test_schema(name), {"num_docs": 10, "indexing": False} + ) + + monkeypatch.setattr( + "redisvl.migration.batch_planner.SearchIndex.from_existing", + mock_from_existing, + ) + monkeypatch.setattr( + "redisvl.migration.planner.SearchIndex.from_existing", mock_from_existing + ) + + patch_path = tmp_path / "patch.yaml" + patch_path.write_text(yaml.safe_dump(make_shared_patch())) + + planner = BatchMigrationPlanner() + # Duplicates are preserved - user explicitly listed them twice + batch_plan = planner.create_batch_plan( + indexes=["idx1", "idx1", "idx2"], + schema_patch_path=str(patch_path), + redis_client=mock_client, + ) + + assert len(batch_plan.indexes) == 3 + + def test_non_existent_index(self, monkeypatch, tmp_path): + """Non-existent index should be marked as not applicable.""" + mock_client = MockRedisClient(indexes=["idx1"]) + + def mock_from_existing(name, **kwargs): + if name == "idx1": + return make_dummy_index( + name, make_test_schema(name), {"num_docs": 10, "indexing": False} + ) + raise Exception(f"Index '{name}' not found") + + monkeypatch.setattr( + "redisvl.migration.batch_planner.SearchIndex.from_existing", + mock_from_existing, + ) + monkeypatch.setattr( + "redisvl.migration.planner.SearchIndex.from_existing", mock_from_existing + ) + + patch_path = tmp_path / "patch.yaml" + patch_path.write_text(yaml.safe_dump(make_shared_patch())) + + planner = BatchMigrationPlanner() + batch_plan = planner.create_batch_plan( + indexes=["idx1", "nonexistent"], + schema_patch_path=str(patch_path), + redis_client=mock_client, + ) + + assert len(batch_plan.indexes) == 2 + assert batch_plan.indexes[0].applicable is True + assert batch_plan.indexes[1].applicable is False + assert "not found" in batch_plan.indexes[1].skip_reason.lower() + + def test_indexes_from_file(self, monkeypatch, tmp_path): + """Load index names from file.""" + mock_client = MockRedisClient(indexes=["idx1", "idx2", "idx3"]) + + def mock_from_existing(name, **kwargs): + return make_dummy_index( + name, make_test_schema(name), {"num_docs": 10, "indexing": False} + ) + + monkeypatch.setattr( + "redisvl.migration.batch_planner.SearchIndex.from_existing", + mock_from_existing, + ) + monkeypatch.setattr( + "redisvl.migration.planner.SearchIndex.from_existing", mock_from_existing + ) + + # Create indexes file + indexes_file = tmp_path / "indexes.txt" + indexes_file.write_text("idx1\n# comment\nidx2\n\nidx3\n") + + patch_path = tmp_path / "patch.yaml" + patch_path.write_text(yaml.safe_dump(make_shared_patch())) + + planner = BatchMigrationPlanner() + batch_plan = planner.create_batch_plan( + indexes_file=str(indexes_file), + schema_patch_path=str(patch_path), + redis_client=mock_client, + ) + + assert len(batch_plan.indexes) == 3 + assert [idx.name for idx in batch_plan.indexes] == ["idx1", "idx2", "idx3"] + + +class TestBatchMigrationPlannerApplicability: + """Test applicability checking for shared patches.""" + + def test_missing_field_marks_not_applicable(self, monkeypatch, tmp_path): + """Index missing field in update_fields should be marked not applicable.""" + mock_client = MockRedisClient(indexes=["idx1", "idx2"]) + + def mock_from_existing(name, **kwargs): + if name == "idx1": + # Has embedding field + return make_dummy_index( + name, make_test_schema(name), {"num_docs": 10, "indexing": False} + ) + # idx2 - no embedding field + schema = { + "index": {"name": name, "prefix": name, "storage_type": "hash"}, + "fields": [{"name": "title", "type": "text"}], + } + return make_dummy_index(name, schema, {"num_docs": 5, "indexing": False}) + + monkeypatch.setattr( + "redisvl.migration.batch_planner.SearchIndex.from_existing", + mock_from_existing, + ) + monkeypatch.setattr( + "redisvl.migration.planner.SearchIndex.from_existing", mock_from_existing + ) + + patch_path = tmp_path / "patch.yaml" + patch_path.write_text( + yaml.safe_dump( + make_shared_patch( + update_fields=[ + {"name": "embedding", "attrs": {"algorithm": "hnsw"}} + ] + ) + ) + ) + + planner = BatchMigrationPlanner() + batch_plan = planner.create_batch_plan( + indexes=["idx1", "idx2"], + schema_patch_path=str(patch_path), + redis_client=mock_client, + ) + + idx1_entry = next(e for e in batch_plan.indexes if e.name == "idx1") + idx2_entry = next(e for e in batch_plan.indexes if e.name == "idx2") + + assert idx1_entry.applicable is True + assert idx2_entry.applicable is False + assert "embedding" in idx2_entry.skip_reason.lower() + + def test_field_already_exists_marks_not_applicable(self, monkeypatch, tmp_path): + """Adding field that already exists should mark not applicable.""" + mock_client = MockRedisClient(indexes=["idx1", "idx2"]) + + def mock_from_existing(name, **kwargs): + schema = make_test_schema(name) + # Add 'category' field to idx2 + if name == "idx2": + schema["fields"].append({"name": "category", "type": "tag"}) + return make_dummy_index(name, schema, {"num_docs": 10, "indexing": False}) + + monkeypatch.setattr( + "redisvl.migration.batch_planner.SearchIndex.from_existing", + mock_from_existing, + ) + monkeypatch.setattr( + "redisvl.migration.planner.SearchIndex.from_existing", mock_from_existing + ) + + patch_path = tmp_path / "patch.yaml" + patch_path.write_text( + yaml.safe_dump( + make_shared_patch(add_fields=[{"name": "category", "type": "tag"}]) + ) + ) + + planner = BatchMigrationPlanner() + batch_plan = planner.create_batch_plan( + indexes=["idx1", "idx2"], + schema_patch_path=str(patch_path), + redis_client=mock_client, + ) + + idx1_entry = next(e for e in batch_plan.indexes if e.name == "idx1") + idx2_entry = next(e for e in batch_plan.indexes if e.name == "idx2") + + assert idx1_entry.applicable is True + assert idx2_entry.applicable is False + assert "category" in idx2_entry.skip_reason.lower() + + def test_blocked_change_marks_not_applicable(self, monkeypatch, tmp_path): + """Blocked changes (e.g., dims change) should mark not applicable.""" + mock_client = MockRedisClient(indexes=["idx1", "idx2"]) + + def mock_from_existing(name, **kwargs): + dims = 3 if name == "idx1" else 768 + return make_dummy_index( + name, + make_test_schema(name, dims=dims), + {"num_docs": 10, "indexing": False}, + ) + + monkeypatch.setattr( + "redisvl.migration.batch_planner.SearchIndex.from_existing", + mock_from_existing, + ) + monkeypatch.setattr( + "redisvl.migration.planner.SearchIndex.from_existing", mock_from_existing + ) + + patch_path = tmp_path / "patch.yaml" + patch_path.write_text( + yaml.safe_dump( + make_shared_patch( + update_fields=[ + {"name": "embedding", "attrs": {"dims": 1536}} # Change dims + ] + ) + ) + ) + + planner = BatchMigrationPlanner() + batch_plan = planner.create_batch_plan( + indexes=["idx1", "idx2"], + schema_patch_path=str(patch_path), + redis_client=mock_client, + ) + + # Both should be not applicable because dims change is blocked + for entry in batch_plan.indexes: + assert entry.applicable is False + assert "dims" in entry.skip_reason.lower() + + +class TestBatchMigrationPlannerQuantization: + """Test quantization detection in batch plans.""" + + def test_detects_quantization_required(self, monkeypatch, tmp_path): + """Batch plan should detect when quantization is required.""" + mock_client = MockRedisClient(indexes=["idx1"]) + + def mock_from_existing(name, **kwargs): + return make_dummy_index( + name, make_test_schema(name), {"num_docs": 10, "indexing": False} + ) + + monkeypatch.setattr( + "redisvl.migration.batch_planner.SearchIndex.from_existing", + mock_from_existing, + ) + monkeypatch.setattr( + "redisvl.migration.planner.SearchIndex.from_existing", mock_from_existing + ) + + patch_path = tmp_path / "patch.yaml" + patch_path.write_text( + yaml.safe_dump( + make_shared_patch( + update_fields=[ + {"name": "embedding", "attrs": {"datatype": "float16"}} + ] + ) + ) + ) + + planner = BatchMigrationPlanner() + batch_plan = planner.create_batch_plan( + indexes=["idx1"], + schema_patch_path=str(patch_path), + redis_client=mock_client, + ) + + assert batch_plan.requires_quantization is True + + +class TestBatchMigrationPlannerEdgeCases: + """Test edge cases and error handling.""" + + def test_multiple_source_specification_error(self, tmp_path): + """Should error when multiple source types are specified.""" + mock_client = MockRedisClient(indexes=["idx1"]) + + patch_path = tmp_path / "patch.yaml" + patch_path.write_text(yaml.safe_dump(make_shared_patch())) + + planner = BatchMigrationPlanner() + with pytest.raises(ValueError, match="only one of"): + planner.create_batch_plan( + indexes=["idx1"], + pattern="*", # Can't specify both + schema_patch_path=str(patch_path), + redis_client=mock_client, + ) + + def test_no_source_specification_error(self, tmp_path): + """Should error when no source is specified.""" + mock_client = MockRedisClient(indexes=["idx1"]) + + patch_path = tmp_path / "patch.yaml" + patch_path.write_text(yaml.safe_dump(make_shared_patch())) + + planner = BatchMigrationPlanner() + with pytest.raises(ValueError, match="Must provide one of"): + planner.create_batch_plan( + schema_patch_path=str(patch_path), + redis_client=mock_client, + ) + + def test_missing_patch_file_error(self): + """Should error when patch file doesn't exist.""" + mock_client = MockRedisClient(indexes=["idx1"]) + + planner = BatchMigrationPlanner() + with pytest.raises(FileNotFoundError): + planner.create_batch_plan( + indexes=["idx1"], + schema_patch_path="/nonexistent/patch.yaml", + redis_client=mock_client, + ) + + def test_missing_indexes_file_error(self, tmp_path): + """Should error when indexes file doesn't exist.""" + mock_client = MockRedisClient(indexes=["idx1"]) + + patch_path = tmp_path / "patch.yaml" + patch_path.write_text(yaml.safe_dump(make_shared_patch())) + + planner = BatchMigrationPlanner() + with pytest.raises(FileNotFoundError): + planner.create_batch_plan( + indexes_file="/nonexistent/indexes.txt", + schema_patch_path=str(patch_path), + redis_client=mock_client, + ) + + +# ============================================================================= +# BatchMigrationExecutor Tests +# ============================================================================= + + +class MockMigrationPlan: + """Mock migration plan for testing.""" + + def __init__(self, index_name: str): + self.source = Mock() + self.source.schema_snapshot = make_test_schema(index_name) + self.merged_target_schema = make_test_schema(index_name) + + +class MockMigrationReport: + """Mock migration report for testing.""" + + def __init__(self, result: str = "succeeded", errors: List[str] = None): + self.result = result + self.validation = Mock(errors=errors or []) + + def model_dump(self, **kwargs): + return {"result": self.result} + + +def create_mock_executor( + succeed_on: List[str] = None, + fail_on: List[str] = None, + track_calls: List[str] = None, +): + """Create a properly configured BatchMigrationExecutor with mocks. + + Args: + succeed_on: Index names that should succeed. + fail_on: Index names that should fail. + track_calls: List to append index names as they're migrated. + + Returns: + A BatchMigrationExecutor with mocked planner and executor. + """ + succeed_on = succeed_on or [] + fail_on = fail_on or [] + if track_calls is None: + track_calls = [] + + # Create mock planner + mock_planner = Mock() + + def create_plan_from_patch(index_name, **kwargs): + track_calls.append(index_name) + return MockMigrationPlan(index_name) + + mock_planner.create_plan_from_patch = create_plan_from_patch + + # Create mock executor + mock_single_executor = Mock() + + def apply(plan, **kwargs): + # Determine if this should succeed or fail based on tracked calls + if track_calls: + last_index = track_calls[-1] + if last_index in fail_on: + return MockMigrationReport( + result="failed", errors=["Simulated failure"] + ) + return MockMigrationReport(result="succeeded") + + mock_single_executor.apply = apply + + # Create the batch executor with injected mocks + batch_executor = BatchMigrationExecutor(executor=mock_single_executor) + batch_executor._planner = mock_planner + + return batch_executor, track_calls + + +class TestBatchMigrationExecutorCheckpointing: + """Test checkpoint persistence and state management.""" + + def test_checkpoint_created_at_start(self, tmp_path): + """Checkpoint state file should be created when migration starts.""" + batch_plan = make_batch_plan( + batch_id="test-batch-001", + indexes=[ + BatchIndexEntry(name="idx1", applicable=True), + BatchIndexEntry(name="idx2", applicable=True), + ], + failure_policy="fail_fast", + ) + + state_path = tmp_path / "batch_state.yaml" + report_dir = tmp_path / "reports" + + executor, _ = create_mock_executor(succeed_on=["idx1", "idx2"]) + mock_client = MockRedisClient(indexes=["idx1", "idx2"]) + + executor.apply( + batch_plan, + state_path=str(state_path), + report_dir=str(report_dir), + redis_client=mock_client, + ) + + # Verify checkpoint file was created + assert state_path.exists() + state_data = yaml.safe_load(state_path.read_text()) + assert state_data["batch_id"] == "test-batch-001" + + def test_checkpoint_updated_after_each_index(self, monkeypatch, tmp_path): + """Checkpoint should be updated after each index is processed.""" + batch_plan = make_batch_plan( + batch_id="test-batch-002", + indexes=[ + BatchIndexEntry(name="idx1", applicable=True), + BatchIndexEntry(name="idx2", applicable=True), + BatchIndexEntry(name="idx3", applicable=True), + ], + failure_policy="continue_on_error", + ) + + state_path = tmp_path / "batch_state.yaml" + report_dir = tmp_path / "reports" + checkpoint_snapshots = [] + + # Capture checkpoints as they're written + original_write = BatchMigrationExecutor._write_state + + def capture_checkpoint(self, state, path): + checkpoint_snapshots.append( + {"remaining": list(state.remaining), "completed": len(state.completed)} + ) + return original_write(self, state, path) + + monkeypatch.setattr(BatchMigrationExecutor, "_write_state", capture_checkpoint) + + executor, _ = create_mock_executor(succeed_on=["idx1", "idx2", "idx3"]) + mock_client = MockRedisClient(indexes=["idx1", "idx2", "idx3"]) + + executor.apply( + batch_plan, + state_path=str(state_path), + report_dir=str(report_dir), + redis_client=mock_client, + ) + + # Verify checkpoints were written progressively + # Each index should trigger 2 writes: start and end + assert len(checkpoint_snapshots) >= 6 # At least 2 per index + + def test_resume_from_checkpoint(self, tmp_path): + """Resume should continue from where migration left off.""" + # Create a checkpoint state simulating interrupted migration + batch_plan = make_batch_plan( + batch_id="test-batch-003", + indexes=[ + BatchIndexEntry(name="idx1", applicable=True), + BatchIndexEntry(name="idx2", applicable=True), + BatchIndexEntry(name="idx3", applicable=True), + ], + failure_policy="continue_on_error", + ) + + # Write the batch plan + plan_path = tmp_path / "batch_plan.yaml" + with open(plan_path, "w") as f: + yaml.safe_dump(batch_plan.model_dump(exclude_none=True), f, sort_keys=False) + + # Write a checkpoint state (idx1 completed, idx2 and idx3 remaining) + state_path = tmp_path / "batch_state.yaml" + checkpoint_state = BatchState( + batch_id="test-batch-003", + plan_path=str(plan_path), + started_at="2026-03-20T10:00:00Z", + updated_at="2026-03-20T10:05:00Z", + remaining=["idx2", "idx3"], + completed=[ + BatchIndexState( + name="idx1", + status="succeeded", + completed_at="2026-03-20T10:05:00Z", + ) + ], + current_index=None, + ) + with open(state_path, "w") as f: + yaml.safe_dump( + checkpoint_state.model_dump(exclude_none=True), f, sort_keys=False + ) + + report_dir = tmp_path / "reports" + migrated_indexes: List[str] = [] + + executor, migrated_indexes = create_mock_executor( + succeed_on=["idx2", "idx3"], + ) + mock_client = MockRedisClient(indexes=["idx1", "idx2", "idx3"]) + + # Resume from checkpoint + report = executor.resume( + state_path=str(state_path), + report_dir=str(report_dir), + redis_client=mock_client, + ) + + # idx1 should NOT be migrated again (already completed) + assert "idx1" not in migrated_indexes + # Only idx2 and idx3 should be migrated + assert migrated_indexes == ["idx2", "idx3"] + # Report should show all 3 as succeeded + assert report.summary.successful == 3 + + +class TestBatchMigrationExecutorFailurePolicies: + """Test failure policy behavior (fail_fast vs continue_on_error).""" + + def test_fail_fast_stops_on_first_error(self, tmp_path): + """fail_fast policy should stop processing after first failure.""" + batch_plan = make_batch_plan( + batch_id="test-batch-fail-fast", + indexes=[ + BatchIndexEntry(name="idx1", applicable=True), + BatchIndexEntry(name="idx2", applicable=True), # This will fail + BatchIndexEntry(name="idx3", applicable=True), + ], + failure_policy="fail_fast", + ) + + state_path = tmp_path / "batch_state.yaml" + report_dir = tmp_path / "reports" + + executor, migrated_indexes = create_mock_executor( + succeed_on=["idx1", "idx3"], + fail_on=["idx2"], + ) + mock_client = MockRedisClient(indexes=["idx1", "idx2", "idx3"]) + + report = executor.apply( + batch_plan, + state_path=str(state_path), + report_dir=str(report_dir), + redis_client=mock_client, + ) + + # idx3 should NOT have been attempted due to fail_fast + assert "idx3" not in migrated_indexes + assert migrated_indexes == ["idx1", "idx2"] + + # Report should show partial results + assert report.summary.successful == 1 + assert report.summary.failed == 1 + assert report.summary.skipped == 1 # idx3 was skipped + + def test_continue_on_error_processes_all(self, tmp_path): + """continue_on_error policy should process all indexes.""" + batch_plan = make_batch_plan( + batch_id="test-batch-continue", + indexes=[ + BatchIndexEntry(name="idx1", applicable=True), + BatchIndexEntry(name="idx2", applicable=True), # This will fail + BatchIndexEntry(name="idx3", applicable=True), + ], + failure_policy="continue_on_error", + ) + + state_path = tmp_path / "batch_state.yaml" + report_dir = tmp_path / "reports" + + executor, migrated_indexes = create_mock_executor( + succeed_on=["idx1", "idx3"], + fail_on=["idx2"], + ) + mock_client = MockRedisClient(indexes=["idx1", "idx2", "idx3"]) + + report = executor.apply( + batch_plan, + state_path=str(state_path), + report_dir=str(report_dir), + redis_client=mock_client, + ) + + # ALL indexes should have been attempted + assert migrated_indexes == ["idx1", "idx2", "idx3"] + + # Report should show mixed results + assert report.summary.successful == 2 # idx1 and idx3 + assert report.summary.failed == 1 # idx2 + assert report.summary.skipped == 0 + assert report.status == "partial_failure" + + def test_retry_failed_on_resume(self, tmp_path): + """retry_failed=True should retry previously failed indexes.""" + batch_plan = make_batch_plan( + batch_id="test-batch-retry", + indexes=[ + BatchIndexEntry(name="idx1", applicable=True), + BatchIndexEntry(name="idx2", applicable=True), + ], + failure_policy="continue_on_error", + ) + + plan_path = tmp_path / "batch_plan.yaml" + with open(plan_path, "w") as f: + yaml.safe_dump(batch_plan.model_dump(exclude_none=True), f, sort_keys=False) + + # Create checkpoint with idx1 failed + state_path = tmp_path / "batch_state.yaml" + checkpoint_state = BatchState( + batch_id="test-batch-retry", + plan_path=str(plan_path), + started_at="2026-03-20T10:00:00Z", + updated_at="2026-03-20T10:05:00Z", + remaining=[], # All "done" but idx1 failed + completed=[ + BatchIndexState( + name="idx1", status="failed", completed_at="2026-03-20T10:03:00Z" + ), + BatchIndexState( + name="idx2", status="succeeded", completed_at="2026-03-20T10:05:00Z" + ), + ], + current_index=None, + ) + with open(state_path, "w") as f: + yaml.safe_dump( + checkpoint_state.model_dump(exclude_none=True), f, sort_keys=False + ) + + report_dir = tmp_path / "reports" + + executor, migrated_indexes = create_mock_executor(succeed_on=["idx1", "idx2"]) + mock_client = MockRedisClient(indexes=["idx1", "idx2"]) + + report = executor.resume( + state_path=str(state_path), + retry_failed=True, + report_dir=str(report_dir), + redis_client=mock_client, + ) + + # idx1 should be retried, idx2 should not (already succeeded) + assert "idx1" in migrated_indexes + assert "idx2" not in migrated_indexes + assert report.summary.successful == 2 + + +class TestBatchMigrationExecutorProgressCallback: + """Test progress callback functionality.""" + + def test_progress_callback_called_for_each_index(self, tmp_path): + """Progress callback should be invoked for each index.""" + batch_plan = make_batch_plan( + batch_id="test-batch-progress", + indexes=[ + BatchIndexEntry(name="idx1", applicable=True), + BatchIndexEntry(name="idx2", applicable=True), + BatchIndexEntry(name="idx3", applicable=True), + ], + failure_policy="continue_on_error", + ) + + state_path = tmp_path / "batch_state.yaml" + report_dir = tmp_path / "reports" + progress_events = [] + + def progress_callback(index_name, position, total, status): + progress_events.append( + {"index": index_name, "pos": position, "total": total, "status": status} + ) + + executor, _ = create_mock_executor(succeed_on=["idx1", "idx2", "idx3"]) + mock_client = MockRedisClient(indexes=["idx1", "idx2", "idx3"]) + + executor.apply( + batch_plan, + state_path=str(state_path), + report_dir=str(report_dir), + redis_client=mock_client, + progress_callback=progress_callback, + ) + + # Should have 2 events per index (starting + final status) + assert len(progress_events) == 6 + # Check first index events + assert progress_events[0] == { + "index": "idx1", + "pos": 1, + "total": 3, + "status": "starting", + } + assert progress_events[1] == { + "index": "idx1", + "pos": 1, + "total": 3, + "status": "succeeded", + } + + +class TestBatchMigrationExecutorEdgeCases: + """Test edge cases and error scenarios.""" + + def test_exception_during_migration_captured(self, tmp_path): + """Exception during migration should be captured in state.""" + batch_plan = make_batch_plan( + batch_id="test-batch-exception", + indexes=[ + BatchIndexEntry(name="idx1", applicable=True), + BatchIndexEntry(name="idx2", applicable=True), + ], + failure_policy="continue_on_error", + ) + + state_path = tmp_path / "batch_state.yaml" + report_dir = tmp_path / "reports" + + # Track calls and raise exception for idx1 + call_count = [0] + + # Create mock planner that raises on idx1 + mock_planner = Mock() + + def create_plan_from_patch(index_name, **kwargs): + call_count[0] += 1 + if index_name == "idx1": + raise RuntimeError("Connection lost to Redis") + return MockMigrationPlan(index_name) + + mock_planner.create_plan_from_patch = create_plan_from_patch + + # Create mock executor + mock_single_executor = Mock() + mock_single_executor.apply = Mock( + return_value=MockMigrationReport(result="succeeded") + ) + + # Create batch executor with mocks + executor = BatchMigrationExecutor(executor=mock_single_executor) + executor._planner = mock_planner + mock_client = MockRedisClient(indexes=["idx1", "idx2"]) + + report = executor.apply( + batch_plan, + state_path=str(state_path), + report_dir=str(report_dir), + redis_client=mock_client, + ) + + # Both should have been attempted + assert call_count[0] == 2 + # idx1 failed with exception, idx2 succeeded + assert report.summary.failed == 1 + assert report.summary.successful == 1 + + # Check error message is captured + idx1_report = next(r for r in report.indexes if r.name == "idx1") + assert "Connection lost" in idx1_report.error + + def test_non_applicable_indexes_skipped(self, tmp_path): + """Non-applicable indexes should be skipped and reported.""" + batch_plan = make_batch_plan( + batch_id="test-batch-skip", + indexes=[ + BatchIndexEntry(name="idx1", applicable=True), + BatchIndexEntry( + name="idx2", + applicable=False, + skip_reason="Missing field: embedding", + ), + BatchIndexEntry(name="idx3", applicable=True), + ], + failure_policy="continue_on_error", + ) + + state_path = tmp_path / "batch_state.yaml" + report_dir = tmp_path / "reports" + + executor, migrated_indexes = create_mock_executor(succeed_on=["idx1", "idx3"]) + mock_client = MockRedisClient(indexes=["idx1", "idx2", "idx3"]) + + report = executor.apply( + batch_plan, + state_path=str(state_path), + report_dir=str(report_dir), + redis_client=mock_client, + ) + + # idx2 should NOT be migrated + assert "idx2" not in migrated_indexes + assert migrated_indexes == ["idx1", "idx3"] + + # Report should show idx2 as skipped + assert report.summary.successful == 2 + assert report.summary.skipped == 1 + + idx2_report = next(r for r in report.indexes if r.name == "idx2") + assert idx2_report.status == "skipped" + assert "Missing field" in idx2_report.error + + def test_empty_batch_plan(self, monkeypatch, tmp_path): + """Empty batch plan should complete immediately.""" + batch_plan = make_batch_plan( + batch_id="test-batch-empty", + indexes=[], # No indexes + failure_policy="fail_fast", + ) + + state_path = tmp_path / "batch_state.yaml" + report_dir = tmp_path / "reports" + + executor = BatchMigrationExecutor() + mock_client = MockRedisClient(indexes=[]) + + report = executor.apply( + batch_plan, + state_path=str(state_path), + report_dir=str(report_dir), + redis_client=mock_client, + ) + + assert report.status == "completed" + assert report.summary.total_indexes == 0 + assert report.summary.successful == 0 + + def test_missing_redis_connection_error(self, tmp_path): + """Should error when no Redis connection is provided.""" + batch_plan = make_batch_plan( + batch_id="test-batch-no-redis", + indexes=[BatchIndexEntry(name="idx1", applicable=True)], + failure_policy="fail_fast", + ) + + state_path = tmp_path / "batch_state.yaml" + report_dir = tmp_path / "reports" + + executor = BatchMigrationExecutor() + + with pytest.raises(ValueError, match="redis"): + executor.apply( + batch_plan, + state_path=str(state_path), + report_dir=str(report_dir), + # No redis_url or redis_client provided + ) + + def test_resume_missing_state_file_error(self, tmp_path): + """Resume should error when state file doesn't exist.""" + executor = BatchMigrationExecutor() + mock_client = MockRedisClient(indexes=[]) + + with pytest.raises(FileNotFoundError, match="State file"): + executor.resume( + state_path=str(tmp_path / "nonexistent_state.yaml"), + report_dir=str(tmp_path / "reports"), + redis_client=mock_client, + ) + + def test_resume_missing_plan_file_error(self, tmp_path): + """Resume should error when plan file doesn't exist.""" + # Create state file pointing to nonexistent plan + state_path = tmp_path / "batch_state.yaml" + state = BatchState( + batch_id="test-batch", + plan_path="/nonexistent/plan.yaml", + started_at="2026-03-20T10:00:00Z", + updated_at="2026-03-20T10:05:00Z", + remaining=["idx1"], + completed=[], + current_index=None, + ) + with open(state_path, "w") as f: + yaml.safe_dump(state.model_dump(exclude_none=True), f) + + executor = BatchMigrationExecutor() + mock_client = MockRedisClient(indexes=["idx1"]) + + with pytest.raises(FileNotFoundError, match="Batch plan"): + executor.resume( + state_path=str(state_path), + report_dir=str(tmp_path / "reports"), + redis_client=mock_client, + ) + + +class TestBatchMigrationExecutorReportGeneration: + """Test batch report generation.""" + + def test_report_contains_all_indexes(self, tmp_path): + """Final report should contain entries for all indexes.""" + batch_plan = make_batch_plan( + batch_id="test-batch-report", + indexes=[ + BatchIndexEntry(name="idx1", applicable=True), + BatchIndexEntry( + name="idx2", applicable=False, skip_reason="Missing field" + ), + BatchIndexEntry(name="idx3", applicable=True), + ], + failure_policy="continue_on_error", + ) + + state_path = tmp_path / "batch_state.yaml" + report_dir = tmp_path / "reports" + + executor, _ = create_mock_executor(succeed_on=["idx1", "idx3"]) + mock_client = MockRedisClient(indexes=["idx1", "idx2", "idx3"]) + + report = executor.apply( + batch_plan, + state_path=str(state_path), + report_dir=str(report_dir), + redis_client=mock_client, + ) + + # All indexes should be in report + index_names = {r.name for r in report.indexes} + assert index_names == {"idx1", "idx2", "idx3"} + + # Verify totals + assert report.summary.total_indexes == 3 + assert report.summary.successful == 2 + assert report.summary.skipped == 1 + + def test_per_index_reports_written(self, tmp_path): + """Individual reports should be written for each migrated index.""" + batch_plan = make_batch_plan( + batch_id="test-batch-files", + indexes=[ + BatchIndexEntry(name="idx1", applicable=True), + BatchIndexEntry(name="idx2", applicable=True), + ], + failure_policy="continue_on_error", + ) + + state_path = tmp_path / "batch_state.yaml" + report_dir = tmp_path / "reports" + + executor, _ = create_mock_executor(succeed_on=["idx1", "idx2"]) + mock_client = MockRedisClient(indexes=["idx1", "idx2"]) + + executor.apply( + batch_plan, + state_path=str(state_path), + report_dir=str(report_dir), + redis_client=mock_client, + ) + + # Report files should exist + assert (report_dir / "idx1_report.yaml").exists() + assert (report_dir / "idx2_report.yaml").exists() + + def test_completed_status_when_all_succeed(self, tmp_path): + """Status should be 'completed' when all indexes succeed.""" + batch_plan = make_batch_plan( + batch_id="test-batch-complete", + indexes=[ + BatchIndexEntry(name="idx1", applicable=True), + BatchIndexEntry(name="idx2", applicable=True), + ], + failure_policy="continue_on_error", + ) + + state_path = tmp_path / "batch_state.yaml" + report_dir = tmp_path / "reports" + + executor, _ = create_mock_executor(succeed_on=["idx1", "idx2"]) + mock_client = MockRedisClient(indexes=["idx1", "idx2"]) + + report = executor.apply( + batch_plan, + state_path=str(state_path), + report_dir=str(report_dir), + redis_client=mock_client, + ) + + assert report.status == "completed" + + def test_failed_status_when_all_fail(self, tmp_path): + """Status should be 'failed' when all indexes fail.""" + batch_plan = make_batch_plan( + batch_id="test-batch-all-fail", + indexes=[ + BatchIndexEntry(name="idx1", applicable=True), + BatchIndexEntry(name="idx2", applicable=True), + ], + failure_policy="continue_on_error", + ) + + state_path = tmp_path / "batch_state.yaml" + report_dir = tmp_path / "reports" + + # Create a mock that raises exceptions for all indexes + mock_planner = Mock() + mock_planner.create_plan_from_patch = Mock( + side_effect=RuntimeError("All migrations fail") + ) + + mock_single_executor = Mock() + executor = BatchMigrationExecutor(executor=mock_single_executor) + executor._planner = mock_planner + mock_client = MockRedisClient(indexes=["idx1", "idx2"]) + + report = executor.apply( + batch_plan, + state_path=str(state_path), + report_dir=str(report_dir), + redis_client=mock_client, + ) + + assert report.status == "failed" + assert report.summary.failed == 2 + assert report.summary.successful == 0 From 78337c6bd32e66778cc981185887d8d84abbea9e Mon Sep 17 00:00:00 2001 From: Nitin Kanukolanu Date: Wed, 1 Apr 2026 19:46:12 -0400 Subject: [PATCH 3/8] fix: batch planner/executor bug fixes and improvements (#563) - Fix status mismatch: executor writes 'success' to match BatchState.success_count - Pass rename_operations to get_vector_datatype_changes - Validate failure_policy early (reject unknown values) - Make update_fields applicability rename-aware - Fix progress position during resume (correct offset) - Fix fail-fast: leave remaining in state for checkpoint resume - Atomic checkpoint writes (write to .tmp then rename) - Sanitize index_name in report filenames (path traversal) - Add assert guard for fnmatch pattern type --- redisvl/migration/batch_executor.py | 52 ++++++++++++++++++----------- redisvl/migration/batch_planner.py | 24 +++++++++++-- 2 files changed, 54 insertions(+), 22 deletions(-) diff --git a/redisvl/migration/batch_executor.py b/redisvl/migration/batch_executor.py index de3154a2a..4bb355a73 100644 --- a/redisvl/migration/batch_executor.py +++ b/redisvl/migration/batch_executor.py @@ -80,12 +80,17 @@ def apply( applicable_indexes = [idx for idx in batch_plan.indexes if idx.applicable] total = len(applicable_indexes) + # Calculate the correct starting position for progress reporting + # (accounts for already-completed indexes during resume) + already_completed = len(state.completed) + # Process each remaining index - for position, index_name in enumerate(state.remaining[:], start=1): + for offset, index_name in enumerate(state.remaining[:]): state.current_index = index_name state.updated_at = timestamp_utc() self._write_state(state, state_path) + position = already_completed + offset + 1 if progress_callback: progress_callback(index_name, position, total, "starting") @@ -133,18 +138,8 @@ def apply( index_state.status == "failed" and batch_plan.failure_policy == "fail_fast" ): - # Mark remaining as skipped - for remaining_name in state.remaining[:]: - state.remaining.remove(remaining_name) - state.completed.append( - BatchIndexState( - name=remaining_name, - status="skipped", - completed_at=timestamp_utc(), - ) - ) - state.updated_at = timestamp_utc() - self._write_state(state, state_path) + # Leave remaining indexes in state.remaining so that + # checkpoint resume can pick them up later. break # Build final report @@ -225,13 +220,14 @@ def _migrate_single_index( redis_client=redis_client, ) - # Write individual report - report_file = report_dir / f"{index_name}_report.yaml" + # Sanitize index_name to prevent path traversal + safe_name = index_name.replace("/", "_").replace("\\", "_").replace("..", "_") + report_file = report_dir / f"{safe_name}_report.yaml" write_yaml(report.model_dump(exclude_none=True), str(report_file)) return BatchIndexState( name=index_name, - status="succeeded" if report.result == "succeeded" else "failed", + status="success" if report.result == "succeeded" else "failed", completed_at=timestamp_utc(), report_path=str(report_file), error=report.validation.errors[0] if report.validation.errors else None, @@ -277,11 +273,18 @@ def _init_or_load_state( ) def _write_state(self, state: BatchState, state_path: str) -> None: - """Write checkpoint state to file.""" + """Write checkpoint state to file atomically. + + Writes to a temporary file first, then renames to avoid corruption + if the process crashes mid-write. + """ path = Path(state_path).resolve() path.parent.mkdir(parents=True, exist_ok=True) - with open(path, "w") as f: + tmp_path = path.with_suffix(".tmp") + with open(tmp_path, "w") as f: yaml.safe_dump(state.model_dump(exclude_none=True), f, sort_keys=False) + f.flush() + tmp_path.replace(path) def _load_state(self, state_path: str) -> BatchState: """Load checkpoint state from file.""" @@ -323,13 +326,24 @@ def _build_batch_report( error=idx_state.error, ) ) - if idx_state.status == "succeeded": + if idx_state.status in ("succeeded", "success"): succeeded += 1 elif idx_state.status == "failed": failed += 1 else: skipped += 1 + # Add remaining indexes (fail-fast left them pending) as skipped + for remaining_name in state.remaining: + index_reports.append( + BatchIndexReport( + name=remaining_name, + status="skipped", + error="Skipped due to fail_fast policy", + ) + ) + skipped += 1 + # Add non-applicable indexes as skipped for idx in batch_plan.indexes: if not idx.applicable: diff --git a/redisvl/migration/batch_planner.py b/redisvl/migration/batch_planner.py index 33c265c4a..6e3ffd965 100644 --- a/redisvl/migration/batch_planner.py +++ b/redisvl/migration/batch_planner.py @@ -37,6 +37,7 @@ def create_batch_plan( redis_client: Optional[Any] = None, failure_policy: str = "fail_fast", ) -> BatchPlan: + # --- NEW: validate failure_policy early --- """Create a batch migration plan for multiple indexes. Args: @@ -51,6 +52,13 @@ def create_batch_plan( Returns: BatchPlan with shared patch and per-index applicability. """ + _VALID_FAILURE_POLICIES = {"fail_fast", "continue_on_error"} + if failure_policy not in _VALID_FAILURE_POLICIES: + raise ValueError( + f"Invalid failure_policy '{failure_policy}'. " + f"Must be one of: {sorted(_VALID_FAILURE_POLICIES)}" + ) + # Get Redis client client = redis_client if client is None: @@ -95,6 +103,7 @@ def create_batch_plan( datatype_changes = MigrationPlanner.get_vector_datatype_changes( plan.source.schema_snapshot, plan.merged_target_schema, + rename_operations=plan.rename_operations, ) if datatype_changes: requires_quantization = True @@ -134,7 +143,8 @@ def _resolve_index_names( if indexes_file: return self._load_indexes_from_file(indexes_file) - # Pattern matching + # Pattern matching -- pattern is guaranteed non-None at this point + assert pattern is not None, "pattern must be set when reaching fnmatch" all_indexes = list_indexes(redis_client=redis_client) matched = [idx for idx in all_indexes if fnmatch.fnmatch(idx, pattern)] return sorted(matched) @@ -167,10 +177,18 @@ def _check_index_applicability( schema_dict = index.schema.to_dict() field_names = {f["name"] for f in schema_dict.get("fields", [])} - # Check that all update_fields exist in this index + # Build a set of field names that includes rename targets so + # that update_fields referencing the NEW name of a renamed field + # are considered applicable. + rename_target_names = { + fr.new_name for fr in shared_patch.changes.rename_fields + } + effective_field_names = field_names | rename_target_names + + # Check that all update_fields exist in this index (or are rename targets) missing_fields = [] for field_update in shared_patch.changes.update_fields: - if field_update.name not in field_names: + if field_update.name not in effective_field_names: missing_fields.append(field_update.name) if missing_fields: From 312db62bdef4d793d8107a5f15dcc18f3a1ff05c Mon Sep 17 00:00:00 2001 From: Nitin Kanukolanu Date: Wed, 1 Apr 2026 19:56:23 -0400 Subject: [PATCH 4/8] fix: remove unused imports in test_batch_migration (#563) Remove unused Path, MagicMock, and patch imports. --- tests/unit/test_batch_migration.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/unit/test_batch_migration.py b/tests/unit/test_batch_migration.py index 31adecd1d..1a4c10924 100644 --- a/tests/unit/test_batch_migration.py +++ b/tests/unit/test_batch_migration.py @@ -10,9 +10,8 @@ """ from fnmatch import fnmatch -from pathlib import Path from typing import Any, Dict, List -from unittest.mock import MagicMock, Mock, patch +from unittest.mock import Mock import pytest import yaml From 2f5c979d93f894302f1c9f8602f755ddb87c2938 Mon Sep 17 00:00:00 2001 From: Nitin Kanukolanu Date: Wed, 1 Apr 2026 20:28:32 -0400 Subject: [PATCH 5/8] fix: batch executor formatting + test status string fix --- redisvl/migration/batch_executor.py | 4 +++- tests/unit/test_batch_migration.py | 6 +++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/redisvl/migration/batch_executor.py b/redisvl/migration/batch_executor.py index 4bb355a73..fc9254afa 100644 --- a/redisvl/migration/batch_executor.py +++ b/redisvl/migration/batch_executor.py @@ -221,7 +221,9 @@ def _migrate_single_index( ) # Sanitize index_name to prevent path traversal - safe_name = index_name.replace("/", "_").replace("\\", "_").replace("..", "_") + safe_name = ( + index_name.replace("/", "_").replace("\\", "_").replace("..", "_") + ) report_file = report_dir / f"{safe_name}_report.yaml" write_yaml(report.model_dump(exclude_none=True), str(report_file)) diff --git a/tests/unit/test_batch_migration.py b/tests/unit/test_batch_migration.py index 1a4c10924..06595477f 100644 --- a/tests/unit/test_batch_migration.py +++ b/tests/unit/test_batch_migration.py @@ -831,7 +831,7 @@ def test_resume_from_checkpoint(self, tmp_path): completed=[ BatchIndexState( name="idx1", - status="succeeded", + status="success", completed_at="2026-03-20T10:05:00Z", ) ], @@ -970,7 +970,7 @@ def test_retry_failed_on_resume(self, tmp_path): name="idx1", status="failed", completed_at="2026-03-20T10:03:00Z" ), BatchIndexState( - name="idx2", status="succeeded", completed_at="2026-03-20T10:05:00Z" + name="idx2", status="success", completed_at="2026-03-20T10:05:00Z" ), ], current_index=None, @@ -1046,7 +1046,7 @@ def progress_callback(index_name, position, total, status): "index": "idx1", "pos": 1, "total": 3, - "status": "succeeded", + "status": "success", } From 24cb9de06b6029c6499b60c44e93fc6d740705b5 Mon Sep 17 00:00:00 2001 From: Nitin Kanukolanu Date: Wed, 1 Apr 2026 21:47:56 -0400 Subject: [PATCH 6/8] fix: address review round 3 for migrate-batch (#563) - Add rename target collision validation in batch applicability check - Propagate infrastructure errors (ConnectionError, TimeoutError) instead of silently marking as not applicable --- redisvl/migration/batch_planner.py | 42 ++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/redisvl/migration/batch_planner.py b/redisvl/migration/batch_planner.py index 6e3ffd965..b064ef4a1 100644 --- a/redisvl/migration/batch_planner.py +++ b/redisvl/migration/batch_planner.py @@ -7,6 +7,7 @@ from pathlib import Path from typing import Any, List, Optional +import redis.exceptions import yaml from redisvl.index import SearchIndex @@ -198,6 +199,38 @@ def _check_index_applicability( skip_reason=f"Missing fields: {', '.join(missing_fields)}", ) + # Validate rename targets don't collide with each other or + # existing fields (after accounting for the source being renamed away) + if shared_patch.changes.rename_fields: + rename_targets = [ + fr.new_name for fr in shared_patch.changes.rename_fields + ] + rename_sources = { + fr.old_name for fr in shared_patch.changes.rename_fields + } + seen_targets: dict[str, int] = {} + for t in rename_targets: + seen_targets[t] = seen_targets.get(t, 0) + 1 + duplicates = [t for t, c in seen_targets.items() if c > 1] + if duplicates: + return BatchIndexEntry( + name=index_name, + applicable=False, + skip_reason=f"Rename targets collide: {', '.join(duplicates)}", + ) + # Check if any rename target already exists and isn't itself being renamed away + collisions = [ + t + for t in rename_targets + if t in field_names and t not in rename_sources + ] + if collisions: + return BatchIndexEntry( + name=index_name, + applicable=False, + skip_reason=f"Rename targets already exist: {', '.join(collisions)}", + ) + # Check that add_fields don't already exist existing_adds: list[str] = [] for field in shared_patch.changes.add_fields: @@ -232,6 +265,15 @@ def _check_index_applicability( return BatchIndexEntry(name=index_name, applicable=True) + except ( + ConnectionError, + OSError, + TimeoutError, + redis.exceptions.ConnectionError, + ) as e: + # Infrastructure failures should propagate, not be silently + # treated as "not applicable". + raise except Exception as e: return BatchIndexEntry( name=index_name, From d3bc28bba6d959fbac8278a518874b5cdbcc79c9 Mon Sep 17 00:00:00 2001 From: Nitin Kanukolanu Date: Thu, 2 Apr 2026 10:19:48 -0400 Subject: [PATCH 7/8] fix: address review round 4 for migrate-batch (#563) - Account for renames before rejecting field additions in batch applicability check - Fields being renamed away free their name for new additions --- redisvl/migration/batch_planner.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/redisvl/migration/batch_planner.py b/redisvl/migration/batch_planner.py index b064ef4a1..1b19740d0 100644 --- a/redisvl/migration/batch_planner.py +++ b/redisvl/migration/batch_planner.py @@ -231,11 +231,16 @@ def _check_index_applicability( skip_reason=f"Rename targets already exist: {', '.join(collisions)}", ) - # Check that add_fields don't already exist + # Check that add_fields don't already exist. + # Fields being renamed away free their name for new additions. + rename_sources = { + fr.old_name for fr in shared_patch.changes.rename_fields + } + post_rename_fields = (field_names - rename_sources) | rename_target_names existing_adds: list[str] = [] for field in shared_patch.changes.add_fields: field_name = field.get("name") - if field_name and field_name in field_names: + if field_name and field_name in post_rename_fields: existing_adds.append(field_name) if existing_adds: From 7a9f38fde1f764c63ae5bead8d2cf2ebd732507c Mon Sep 17 00:00:00 2001 From: Nitin Kanukolanu Date: Thu, 2 Apr 2026 10:45:06 -0400 Subject: [PATCH 8/8] fix: persist plan path on resume in batch executor (#563) - Update plan_path when loading existing state if caller provides a new path - Handles cases where original path was empty or pointed to deleted temp dir --- redisvl/migration/batch_executor.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/redisvl/migration/batch_executor.py b/redisvl/migration/batch_executor.py index fc9254afa..42c63c35c 100644 --- a/redisvl/migration/batch_executor.py +++ b/redisvl/migration/batch_executor.py @@ -260,6 +260,10 @@ def _init_or_load_state( f"current batch plan '{batch_plan.batch_id}'. " "Remove the stale state file or use a different state_path." ) + # Update plan_path if caller provided one (handles cases where + # the original path was empty or pointed to a deleted temp dir). + if batch_plan_path: + loaded.plan_path = str(Path(batch_plan_path).resolve()) return loaded # Create new state with plan_path for resume support