From 8f218062cbc07e35ebc757504b88925b941a379b Mon Sep 17 00:00:00 2001
From: Kay Robbins <robbins.kay@gmail.com>
Date: Tue, 16 Jun 2026 13:43:06 -0500
Subject: [PATCH 1/2] Fixed KeyMap issue with pandas 3.0.0

---
 hed/tools/analysis/key_map.py        |   6 +-
 spec_tests/hed-schemas               |   2 +-
 spec_tests/hed-tests                 |   2 +-
 tests/tools/analysis/test_key_map.py | 132 +++++++++++++++++++++++++++
 4 files changed, 139 insertions(+), 3 deletions(-)

diff --git a/hed/tools/analysis/key_map.py b/hed/tools/analysis/key_map.py
index 0ca57c14..675dffac 100644
--- a/hed/tools/analysis/key_map.py
+++ b/hed/tools/analysis/key_map.py
@@ -146,7 +146,11 @@ def _remap(self, df):
         # Key series now contains row_number: hash for each row in the dataframe
 
         # Add a column containing the mapped index for each row
-        map_series = pd.Series(self.map_dict)  # map_series is hash:row_index for each entry in the map_dict index
+        # Use explicit index/data to ensure pandas 3.0+ compatibility
+        map_series = pd.Series(
+            data=list(self.map_dict.values()),
+            index=list(self.map_dict.keys())
+        )  # map_series is hash:row_index for each entry in the map_dict index
         key_values = key_series.map(map_series)  # key_values is df_row_number:map_dict_index
         # e.g. a key_value entry of 0:79 means row 0 maps to row 79 in the map_dict
 
diff --git a/spec_tests/hed-schemas b/spec_tests/hed-schemas
index 73ecb358..0a4a7b61 160000
--- a/spec_tests/hed-schemas
+++ b/spec_tests/hed-schemas
@@ -1 +1 @@
-Subproject commit 73ecb358f61d7a470e1da5a73a10661db588071a
+Subproject commit 0a4a7b613b82a9b09955bba9a22a36791d1de035
diff --git a/spec_tests/hed-tests b/spec_tests/hed-tests
index 8c35600d..ea4237ca 160000
--- a/spec_tests/hed-tests
+++ b/spec_tests/hed-tests
@@ -1 +1 @@
-Subproject commit 8c35600d13ee65c65bd634490f2fb1f0ff0514f1
+Subproject commit ea4237cacb3bf2cad784e50808d370a9d0251c76
diff --git a/tests/tools/analysis/test_key_map.py b/tests/tools/analysis/test_key_map.py
index 0beb4425..09e7e621 100644
--- a/tests/tools/analysis/test_key_map.py
+++ b/tests/tools/analysis/test_key_map.py
@@ -163,6 +163,138 @@ def test_update_map_not_unique(self):
         self.assertEqual(len(t_map.col_map.columns), 4, "update should produce correct number of columns")
         self.assertEqual(len(t_map.col_map), len(t_map.count_dict), "update should produce the correct number of rows")
 
+    def test_remap_numeric_keys_simple(self):
+        """Test remap with simple numeric keys (pandas 3.0 compatibility)."""
+        # Create a simple KeyMap with numeric keys
+        key_map = KeyMap(['col1'], ['result'])
+        
+        # Create a mapping DataFrame with numeric keys
+        map_df = pd.DataFrame({
+            'col1': [1, 2, 3],
+            'result': ['one', 'two', 'three']
+        })
+        key_map.update(map_df)
+        
+        # Create test data with numeric values
+        test_df = pd.DataFrame({
+            'col1': [1, 2, 1, 3, 2]
+        })
+        
+        # This should not raise ValueError on pandas 3.0.3
+        df_result, missing = key_map.remap(test_df)
+        
+        self.assertEqual(len(df_result), 5, "remap should preserve number of rows")
+        self.assertEqual(df_result.iloc[0]['result'], 'one', "remap should map 1 to 'one'")
+        self.assertEqual(df_result.iloc[1]['result'], 'two', "remap should map 2 to 'two'")
+        self.assertEqual(df_result.iloc[2]['result'], 'one', "remap should map 1 to 'one'")
+        self.assertEqual(df_result.iloc[3]['result'], 'three', "remap should map 3 to 'three'")
+        self.assertFalse(missing, "remap should not have missing keys")
+
+    def test_remap_numeric_keys_as_strings(self):
+        """Test remap with numeric keys stored as strings (common case)."""
+        key_map = KeyMap(['test_code'], ['test_label'])
+        
+        # Create a mapping where numeric keys are stored as strings
+        map_df = pd.DataFrame({
+            'test_code': ['1', '2', '3', '4'],
+            'test_label': ['low', 'medium', 'high', 'critical']
+        })
+        key_map.update(map_df)
+        
+        # Create test data with numeric values as strings
+        test_df = pd.DataFrame({
+            'test_code': ['1', '2', '3', '1', '4', '2']
+        })
+        
+        df_result, missing = key_map.remap(test_df)
+        
+        self.assertEqual(len(df_result), 6, "remap should preserve number of rows")
+        self.assertEqual(df_result.iloc[0]['test_label'], 'low')
+        self.assertEqual(df_result.iloc[1]['test_label'], 'medium')
+        self.assertEqual(df_result.iloc[2]['test_label'], 'high')
+        self.assertEqual(df_result.iloc[4]['test_label'], 'critical')
+        self.assertFalse(missing, "remap should not have missing keys")
+
+    def test_remap_numeric_keys_with_na(self):
+        """Test remap with numeric keys including n/a values."""
+        key_map = KeyMap(['value'], ['category'])
+        
+        # Create mapping with numeric and string keys
+        map_df = pd.DataFrame({
+            'value': ['1', '2', '3'],
+            'category': ['cat_a', 'cat_b', 'cat_c']
+        })
+        key_map.update(map_df)
+        
+        # Create test data with n/a values
+        test_df = pd.DataFrame({
+            'value': ['1', '2', 'n/a', '3', 'n/a']
+        })
+        
+        df_result, missing = key_map.remap(test_df)
+        
+        self.assertEqual(len(df_result), 5, "remap should preserve number of rows")
+        self.assertEqual(df_result.iloc[0]['category'], 'cat_a')
+        self.assertEqual(df_result.iloc[2]['category'], 'n/a', "remap should map n/a to n/a")
+        self.assertEqual(df_result.iloc[3]['category'], 'cat_c')
+
+    def test_remap_multiple_numeric_keys_cascade(self):
+        """Test remap with multiple numeric keys cascading (the pandas 3.0.3 failing case)."""
+        # This is the exact scenario from pandas_fail.md that was failing
+        key_map = KeyMap(['test', 'response_accuracy'], ['result'])
+        
+        # Create mapping for multiple key combination
+        map_df = pd.DataFrame({
+            'test': ['1', '2'],
+            'response_accuracy': ['correct', 'correct'],
+            'result': ['correct_left', 'correct_right']
+        })
+        key_map.update(map_df)
+        
+        # Create test data matching the failure scenario
+        test_df = pd.DataFrame({
+            'test': ['1', '2', 'n/a', '3', '4', '5'],
+            'response_accuracy': ['correct', 'correct', 'correct', 'n/a', 'correct', 'correct']
+        })
+        
+        # This was the failing line: map_series = pd.Series(self.map_dict)
+        # Should work now with explicit index/data parameters
+        df_result, missing = key_map.remap(test_df)
+        
+        self.assertEqual(len(df_result), 6, "remap should preserve number of rows")
+        self.assertEqual(df_result.iloc[0]['result'], 'correct_left')
+        self.assertEqual(df_result.iloc[1]['result'], 'correct_right')
+        # Rows with missing key combinations should get 'n/a'
+        self.assertEqual(df_result.iloc[2]['result'], 'n/a')
+        self.assertEqual(df_result.iloc[3]['result'], 'n/a')
+
+    def test_remap_large_numeric_key_dict(self):
+        """Test remap with a large dictionary of numeric keys to ensure Series construction works."""
+        key_map = KeyMap(['event_id'], ['event_name'])
+        
+        # Create a large mapping with numeric event IDs
+        size = 100
+        map_data = {
+            'event_id': [str(i) for i in range(size)],
+            'event_name': [f'event_{i}' for i in range(size)]
+        }
+        map_df = pd.DataFrame(map_data)
+        key_map.update(map_df)
+        
+        # Create test data with random event IDs
+        test_data = {
+            'event_id': [str(i % 50) for i in range(200)]  # Use first 50 event IDs
+        }
+        test_df = pd.DataFrame(test_data)
+        
+        df_result, missing = key_map.remap(test_df)
+        
+        self.assertEqual(len(df_result), 200, "remap should preserve number of rows")
+        # Verify some mappings
+        self.assertEqual(df_result.iloc[0]['event_name'], 'event_0')
+        self.assertEqual(df_result.iloc[50]['event_name'], 'event_0')  # 50 % 50 = 0
+        self.assertEqual(df_result.iloc[99]['event_name'], 'event_49')  # 99 % 50 = 49
+
 
 if __name__ == "__main__":
     unittest.main()

From ce981b4753e3ed405b98a7ee6ccdf65cff2f593e Mon Sep 17 00:00:00 2001
From: Kay Robbins <robbins.kay@gmail.com>
Date: Tue, 16 Jun 2026 14:37:44 -0500
Subject: [PATCH 2/2] Addressed copilot comments

---
 hed/tools/analysis/key_map.py        |   3 +-
 tests/tools/analysis/test_key_map.py | 150 ++++++++++++---------------
 2 files changed, 70 insertions(+), 83 deletions(-)

diff --git a/hed/tools/analysis/key_map.py b/hed/tools/analysis/key_map.py
index 675dffac..51b925a9 100644
--- a/hed/tools/analysis/key_map.py
+++ b/hed/tools/analysis/key_map.py
@@ -148,8 +148,7 @@ def _remap(self, df):
         # Add a column containing the mapped index for each row
         # Use explicit index/data to ensure pandas 3.0+ compatibility
         map_series = pd.Series(
-            data=list(self.map_dict.values()),
-            index=list(self.map_dict.keys())
+            data=list(self.map_dict.values()), index=list(self.map_dict.keys())
         )  # map_series is hash:row_index for each entry in the map_dict index
         key_values = key_series.map(map_series)  # key_values is df_row_number:map_dict_index
         # e.g. a key_value entry of 0:79 means row 0 maps to row 79 in the map_dict
diff --git a/tests/tools/analysis/test_key_map.py b/tests/tools/analysis/test_key_map.py
index 09e7e621..6f8a52f1 100644
--- a/tests/tools/analysis/test_key_map.py
+++ b/tests/tools/analysis/test_key_map.py
@@ -166,134 +166,122 @@ def test_update_map_not_unique(self):
     def test_remap_numeric_keys_simple(self):
         """Test remap with simple numeric keys (pandas 3.0 compatibility)."""
         # Create a simple KeyMap with numeric keys
-        key_map = KeyMap(['col1'], ['result'])
-        
+        key_map = KeyMap(["col1"], ["result"])
+
         # Create a mapping DataFrame with numeric keys
-        map_df = pd.DataFrame({
-            'col1': [1, 2, 3],
-            'result': ['one', 'two', 'three']
-        })
+        map_df = pd.DataFrame({"col1": [1, 2, 3], "result": ["one", "two", "three"]})
         key_map.update(map_df)
-        
+
         # Create test data with numeric values
-        test_df = pd.DataFrame({
-            'col1': [1, 2, 1, 3, 2]
-        })
-        
+        test_df = pd.DataFrame({"col1": [1, 2, 1, 3, 2]})
+
         # This should not raise ValueError on pandas 3.0.3
         df_result, missing = key_map.remap(test_df)
-        
+
         self.assertEqual(len(df_result), 5, "remap should preserve number of rows")
-        self.assertEqual(df_result.iloc[0]['result'], 'one', "remap should map 1 to 'one'")
-        self.assertEqual(df_result.iloc[1]['result'], 'two', "remap should map 2 to 'two'")
-        self.assertEqual(df_result.iloc[2]['result'], 'one', "remap should map 1 to 'one'")
-        self.assertEqual(df_result.iloc[3]['result'], 'three', "remap should map 3 to 'three'")
+        self.assertEqual(df_result.iloc[0]["result"], "one", "remap should map 1 to 'one'")
+        self.assertEqual(df_result.iloc[1]["result"], "two", "remap should map 2 to 'two'")
+        self.assertEqual(df_result.iloc[2]["result"], "one", "remap should map 1 to 'one'")
+        self.assertEqual(df_result.iloc[3]["result"], "three", "remap should map 3 to 'three'")
         self.assertFalse(missing, "remap should not have missing keys")
 
     def test_remap_numeric_keys_as_strings(self):
         """Test remap with numeric keys stored as strings (common case)."""
-        key_map = KeyMap(['test_code'], ['test_label'])
-        
+        key_map = KeyMap(["test_code"], ["test_label"])
+
         # Create a mapping where numeric keys are stored as strings
-        map_df = pd.DataFrame({
-            'test_code': ['1', '2', '3', '4'],
-            'test_label': ['low', 'medium', 'high', 'critical']
-        })
+        map_df = pd.DataFrame({"test_code": ["1", "2", "3", "4"], "test_label": ["low", "medium", "high", "critical"]})
         key_map.update(map_df)
-        
+
         # Create test data with numeric values as strings
-        test_df = pd.DataFrame({
-            'test_code': ['1', '2', '3', '1', '4', '2']
-        })
-        
+        test_df = pd.DataFrame({"test_code": ["1", "2", "3", "1", "4", "2"]})
+
         df_result, missing = key_map.remap(test_df)
-        
+
         self.assertEqual(len(df_result), 6, "remap should preserve number of rows")
-        self.assertEqual(df_result.iloc[0]['test_label'], 'low')
-        self.assertEqual(df_result.iloc[1]['test_label'], 'medium')
-        self.assertEqual(df_result.iloc[2]['test_label'], 'high')
-        self.assertEqual(df_result.iloc[4]['test_label'], 'critical')
+        self.assertEqual(df_result.iloc[0]["test_label"], "low")
+        self.assertEqual(df_result.iloc[1]["test_label"], "medium")
+        self.assertEqual(df_result.iloc[2]["test_label"], "high")
+        self.assertEqual(df_result.iloc[4]["test_label"], "critical")
         self.assertFalse(missing, "remap should not have missing keys")
 
     def test_remap_numeric_keys_with_na(self):
         """Test remap with numeric keys including n/a values."""
-        key_map = KeyMap(['value'], ['category'])
-        
+        key_map = KeyMap(["value"], ["category"])
+
         # Create mapping with numeric and string keys
-        map_df = pd.DataFrame({
-            'value': ['1', '2', '3'],
-            'category': ['cat_a', 'cat_b', 'cat_c']
-        })
+        map_df = pd.DataFrame({"value": ["1", "2", "3"], "category": ["cat_a", "cat_b", "cat_c"]})
         key_map.update(map_df)
-        
+
         # Create test data with n/a values
-        test_df = pd.DataFrame({
-            'value': ['1', '2', 'n/a', '3', 'n/a']
-        })
-        
+        test_df = pd.DataFrame({"value": ["1", "2", "n/a", "3", "n/a"]})
+
         df_result, missing = key_map.remap(test_df)
-        
+
         self.assertEqual(len(df_result), 5, "remap should preserve number of rows")
-        self.assertEqual(df_result.iloc[0]['category'], 'cat_a')
-        self.assertEqual(df_result.iloc[2]['category'], 'n/a', "remap should map n/a to n/a")
-        self.assertEqual(df_result.iloc[3]['category'], 'cat_c')
+        self.assertEqual(df_result.iloc[0]["category"], "cat_a")
+        self.assertEqual(df_result.iloc[2]["category"], "n/a", "remap should map n/a to n/a")
+        self.assertEqual(df_result.iloc[3]["category"], "cat_c")
+        self.assertEqual(missing, [2, 4], "remap should report rows with unmapped n/a key")
 
     def test_remap_multiple_numeric_keys_cascade(self):
         """Test remap with multiple numeric keys cascading (the pandas 3.0.3 failing case)."""
-        # This is the exact scenario from pandas_fail.md that was failing
-        key_map = KeyMap(['test', 'response_accuracy'], ['result'])
-        
+        # This is the exact scenario from issue #1329 that was failing
+        key_map = KeyMap(["test", "response_accuracy"], ["result"])
+
         # Create mapping for multiple key combination
-        map_df = pd.DataFrame({
-            'test': ['1', '2'],
-            'response_accuracy': ['correct', 'correct'],
-            'result': ['correct_left', 'correct_right']
-        })
+        map_df = pd.DataFrame(
+            {
+                "test": ["1", "2"],
+                "response_accuracy": ["correct", "correct"],
+                "result": ["correct_left", "correct_right"],
+            }
+        )
         key_map.update(map_df)
-        
+
         # Create test data matching the failure scenario
-        test_df = pd.DataFrame({
-            'test': ['1', '2', 'n/a', '3', '4', '5'],
-            'response_accuracy': ['correct', 'correct', 'correct', 'n/a', 'correct', 'correct']
-        })
-        
+        test_df = pd.DataFrame(
+            {
+                "test": ["1", "2", "n/a", "3", "4", "5"],
+                "response_accuracy": ["correct", "correct", "correct", "n/a", "correct", "correct"],
+            }
+        )
+
         # This was the failing line: map_series = pd.Series(self.map_dict)
         # Should work now with explicit index/data parameters
         df_result, missing = key_map.remap(test_df)
-        
+
         self.assertEqual(len(df_result), 6, "remap should preserve number of rows")
-        self.assertEqual(df_result.iloc[0]['result'], 'correct_left')
-        self.assertEqual(df_result.iloc[1]['result'], 'correct_right')
-        # Rows with missing key combinations should get 'n/a'
-        self.assertEqual(df_result.iloc[2]['result'], 'n/a')
-        self.assertEqual(df_result.iloc[3]['result'], 'n/a')
+        self.assertEqual(df_result.iloc[0]["result"], "correct_left")
+        self.assertEqual(df_result.iloc[1]["result"], "correct_right")
+        # Rows with missing key combinations should get n/a
+        self.assertEqual(df_result.iloc[2]["result"], "n/a")
+        self.assertEqual(df_result.iloc[3]["result"], "n/a")
+        self.assertEqual(missing, [2, 3, 4, 5], "remap should report rows with unmapped key combinations")
 
     def test_remap_large_numeric_key_dict(self):
         """Test remap with a large dictionary of numeric keys to ensure Series construction works."""
-        key_map = KeyMap(['event_id'], ['event_name'])
-        
+        key_map = KeyMap(["event_id"], ["event_name"])
+
         # Create a large mapping with numeric event IDs
         size = 100
-        map_data = {
-            'event_id': [str(i) for i in range(size)],
-            'event_name': [f'event_{i}' for i in range(size)]
-        }
+        map_data = {"event_id": [str(i) for i in range(size)], "event_name": [f"event_{i}" for i in range(size)]}
         map_df = pd.DataFrame(map_data)
         key_map.update(map_df)
-        
+
         # Create test data with random event IDs
         test_data = {
-            'event_id': [str(i % 50) for i in range(200)]  # Use first 50 event IDs
+            "event_id": [str(i % 50) for i in range(200)]  # Use first 50 event IDs
         }
         test_df = pd.DataFrame(test_data)
-        
-        df_result, missing = key_map.remap(test_df)
-        
+
+        df_result, _missing = key_map.remap(test_df)
+
         self.assertEqual(len(df_result), 200, "remap should preserve number of rows")
         # Verify some mappings
-        self.assertEqual(df_result.iloc[0]['event_name'], 'event_0')
-        self.assertEqual(df_result.iloc[50]['event_name'], 'event_0')  # 50 % 50 = 0
-        self.assertEqual(df_result.iloc[99]['event_name'], 'event_49')  # 99 % 50 = 49
+        self.assertEqual(df_result.iloc[0]["event_name"], "event_0")
+        self.assertEqual(df_result.iloc[50]["event_name"], "event_0")  # 50 % 50 = 0
+        self.assertEqual(df_result.iloc[99]["event_name"], "event_49")  # 99 % 50 = 49
 
 
 if __name__ == "__main__":