fix: executor bug fixes and improvements (#561)

nkanu17 · nkanu17 · commit 8fc8be7300c2 · 2026-04-01T20:28:12.000-04:00
- Fix partial key renames: fail fast on collision instead of continuing
- Fix completed checkpoint: resume from post-drop state for index recreation
- Pass rename_operations to get_vector_datatype_changes for renamed vectors
- Add warning when SCAN falls back to '*' (full keyspace scan)
- Warn when field rename overwrites existing destination field
- Fix has_prefix_change falsy check for empty string prefixes
- Add logger to migration utils
diff --git a/redisvl/migration/executor.py b/redisvl/migration/executor.py
@@ -271,16 +271,20 @@ def _rename_keys(
                 logger.warning(f"Error in rename batch: {e}")
                 raise
 
+            # Fail fast on collisions to avoid partial renames across batches.
+            # Keys already renamed in THIS batch are not rolled back -- caller
+            # can inspect the error to understand which keys moved.
+            if collisions:
+                raise RuntimeError(
+                    f"Prefix rename aborted after {renamed} successful rename(s): "
+                    f"{len(collisions)} destination key(s) already exist "
+                    f"(first 5: {collisions[:5]}). This would overwrite existing data. "
+                    f"Remove conflicting keys or choose a different prefix."
+                )
+
             if progress_callback:
                 progress_callback(min(i + pipeline_size, total), total)
 
-        if collisions:
-            raise RuntimeError(
-                f"Prefix rename aborted: {len(collisions)} destination key(s) already exist "
-                f"(first 5: {collisions[:5]}). This would overwrite existing data. "
-                f"Remove conflicting keys or choose a different prefix."
-            )
-
         return renamed
 
     def _rename_field_in_hash(
@@ -305,17 +309,29 @@ def _rename_field_in_hash(
         for i in range(0, total, pipeline_size):
             batch = keys[i : i + pipeline_size]
 
-            # First, get all old field values
+            # First, get old field values AND check if destination exists
             pipe = client.pipeline(transaction=False)
             for key in batch:
                 pipe.hget(key, old_name)
-            values = pipe.execute()
+                pipe.hexists(key, new_name)
+            raw_results = pipe.execute()
+            # Interleaved: [hget_0, hexists_0, hget_1, hexists_1, ...]
+            values = raw_results[0::2]
+            dest_exists = raw_results[1::2]
 
             # Now set new field and delete old
             pipe = client.pipeline(transaction=False)
             batch_ops = 0
-            for key, value in zip(batch, values):
+            for key, value, exists in zip(batch, values, dest_exists):
                 if value is not None:
+                    if exists:
+                        logger.warning(
+                            "Field '%s' already exists in key '%s'; "
+                            "overwriting with value from '%s'",
+                            new_name,
+                            key,
+                            old_name,
+                        )
                     pipe.hset(key, new_name, value)
                     pipe.hdel(key, old_name)
                     batch_ops += 1
@@ -448,8 +464,12 @@ def apply(
                         plan.source.index_name,
                     )
                 elif existing_checkpoint.status == "completed":
+                    # Quantization completed before the crash -- still need
+                    # to resume from post-drop state (index recreation).
+                    resuming_from_checkpoint = True
                     logger.info(
-                        "Checkpoint at %s is already completed, ignoring",
+                        "Checkpoint at %s is already completed; resuming "
+                        "index recreation from post-drop state",
                         checkpoint_path,
                     )
                 else:
@@ -510,12 +530,14 @@ def apply(
 
         # Check if we need to re-encode vectors for datatype changes
         datatype_changes = MigrationPlanner.get_vector_datatype_changes(
-            plan.source.schema_snapshot, plan.merged_target_schema
+            plan.source.schema_snapshot,
+            plan.merged_target_schema,
+            rename_operations=plan.rename_operations,
         )
 
         # Check for rename operations
         rename_ops = plan.rename_operations
-        has_prefix_change = bool(rename_ops.change_prefix)
+        has_prefix_change = rename_ops.change_prefix is not None
         has_field_renames = bool(rename_ops.rename_fields)
         needs_quantization = bool(datatype_changes) and storage_type != "json"
         needs_enumeration = needs_quantization or has_prefix_change or has_field_renames
diff --git a/redisvl/migration/utils.py b/redisvl/migration/utils.py
@@ -1,12 +1,17 @@
 from __future__ import annotations
 
 import json
+import logging
 import time
 from pathlib import Path
 from typing import Any, Callable, Dict, List, Optional, Tuple
 
 import yaml
 
+from redisvl.utils.log import get_logger
+
+logger = get_logger(__name__)
+
 from redisvl.index import SearchIndex
 from redisvl.migration.models import (
     AOF_HSET_OVERHEAD_BYTES,
@@ -83,11 +88,19 @@ def normalize_keys(keys: List[str]) -> List[str]:
 def build_scan_match_patterns(prefixes: List[str], key_separator: str) -> List[str]:
     """Build SCAN patterns for all configured prefixes."""
     if not prefixes:
+        logger.warning(
+            "No prefixes provided for SCAN pattern. "
+            "Using '*' which will scan the entire keyspace."
+        )
         return ["*"]
 
     patterns = set()
     for prefix in prefixes:
         if not prefix:
+            logger.warning(
+                "Empty prefix in prefix list. "
+                "Using '*' which will scan the entire keyspace."
+            )
             return ["*"]
         if key_separator and not prefix.endswith(key_separator):
             patterns.add(f"{prefix}{key_separator}*")