Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion hed/tools/analysis/key_map.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,10 @@ def _remap(self, df):
# Key series now contains row_number: hash for each row in the dataframe

# Add a column containing the mapped index for each row
map_series = pd.Series(self.map_dict) # map_series is hash:row_index for each entry in the map_dict index
# Use explicit index/data to ensure pandas 3.0+ compatibility
map_series = pd.Series(
data=list(self.map_dict.values()), index=list(self.map_dict.keys())
) # map_series is hash:row_index for each entry in the map_dict index
key_values = key_series.map(map_series) # key_values is df_row_number:map_dict_index
# e.g. a key_value entry of 0:79 means row 0 maps to row 79 in the map_dict

Expand Down
2 changes: 1 addition & 1 deletion spec_tests/hed-schemas
Submodule hed-schemas updated 85 files
+1 −1 .github/workflows/docs.yaml
+2 −2 .github/workflows/links.yaml
+2 −2 .github/workflows/mdformat.yaml
+2 −2 .github/workflows/ruff.yaml
+2 −2 .github/workflows/typos.yaml
+1 −1 .github/workflows/validate_schemas.yaml
+1 −1 .github/workflows/verify_source_branch.yaml
+0 −14,407 library_schemas/testlib/hedjson/HED_testlib_1.0.2.json
+0 −265 library_schemas/testlib/hedjson/HED_testlib_2.0.0.json
+0 −225 library_schemas/testlib/hedjson/HED_testlib_2.1.0.json
+0 −196 library_schemas/testlib/hedjson/HED_testlib_3.0.0.json
+0 −1 library_schemas/testlib/hedtsv/HED_testlib_1.0.2/HED_testlib_1.0.2_AnnotationProperty.tsv
+0 −1 library_schemas/testlib/hedtsv/HED_testlib_1.0.2/HED_testlib_1.0.2_AnnotationPropertyExternal.tsv
+0 −6 library_schemas/testlib/hedtsv/HED_testlib_1.0.2/HED_testlib_1.0.2_AttributeProperty.tsv
+0 −1 library_schemas/testlib/hedtsv/HED_testlib_1.0.2/HED_testlib_1.0.2_DataProperty.tsv
+0 −20 library_schemas/testlib/hedtsv/HED_testlib_1.0.2/HED_testlib_1.0.2_ObjectProperty.tsv
+0 −1 library_schemas/testlib/hedtsv/HED_testlib_1.0.2/HED_testlib_1.0.2_Prefixes.tsv
+0 −1 library_schemas/testlib/hedtsv/HED_testlib_1.0.2/HED_testlib_1.0.2_Sources.tsv
+0 −4 library_schemas/testlib/hedtsv/HED_testlib_1.0.2/HED_testlib_1.0.2_Structure.tsv
+0 −1,111 library_schemas/testlib/hedtsv/HED_testlib_1.0.2/HED_testlib_1.0.2_Tag.tsv
+0 −35 library_schemas/testlib/hedtsv/HED_testlib_1.0.2/HED_testlib_1.0.2_Unit.tsv
+0 −14 library_schemas/testlib/hedtsv/HED_testlib_1.0.2/HED_testlib_1.0.2_UnitClass.tsv
+0 −41 library_schemas/testlib/hedtsv/HED_testlib_1.0.2/HED_testlib_1.0.2_UnitModifier.tsv
+0 −6 library_schemas/testlib/hedtsv/HED_testlib_1.0.2/HED_testlib_1.0.2_ValueClass.tsv
+0 −1 library_schemas/testlib/hedtsv/HED_testlib_2.0.0/HED_testlib_2.0.0_AnnotationProperty.tsv
+0 −1 library_schemas/testlib/hedtsv/HED_testlib_2.0.0/HED_testlib_2.0.0_AnnotationPropertyExternal.tsv
+0 −1 library_schemas/testlib/hedtsv/HED_testlib_2.0.0/HED_testlib_2.0.0_AttributeProperty.tsv
+0 −1 library_schemas/testlib/hedtsv/HED_testlib_2.0.0/HED_testlib_2.0.0_DataProperty.tsv
+0 −1 library_schemas/testlib/hedtsv/HED_testlib_2.0.0/HED_testlib_2.0.0_ObjectProperty.tsv
+0 −1 library_schemas/testlib/hedtsv/HED_testlib_2.0.0/HED_testlib_2.0.0_Prefixes.tsv
+0 −1 library_schemas/testlib/hedtsv/HED_testlib_2.0.0/HED_testlib_2.0.0_Sources.tsv
+0 −4 library_schemas/testlib/hedtsv/HED_testlib_2.0.0/HED_testlib_2.0.0_Structure.tsv
+0 −22 library_schemas/testlib/hedtsv/HED_testlib_2.0.0/HED_testlib_2.0.0_Tag.tsv
+0 −1 library_schemas/testlib/hedtsv/HED_testlib_2.0.0/HED_testlib_2.0.0_Unit.tsv
+0 −1 library_schemas/testlib/hedtsv/HED_testlib_2.0.0/HED_testlib_2.0.0_UnitClass.tsv
+0 −1 library_schemas/testlib/hedtsv/HED_testlib_2.0.0/HED_testlib_2.0.0_UnitModifier.tsv
+0 −1 library_schemas/testlib/hedtsv/HED_testlib_2.0.0/HED_testlib_2.0.0_ValueClass.tsv
+0 −1 library_schemas/testlib/hedtsv/HED_testlib_2.1.0/HED_testlib_2.1.0_AnnotationProperty.tsv
+0 −1 library_schemas/testlib/hedtsv/HED_testlib_2.1.0/HED_testlib_2.1.0_AnnotationPropertyExternal.tsv
+0 −1 library_schemas/testlib/hedtsv/HED_testlib_2.1.0/HED_testlib_2.1.0_AttributeProperty.tsv
+0 −1 library_schemas/testlib/hedtsv/HED_testlib_2.1.0/HED_testlib_2.1.0_DataProperty.tsv
+0 −1 library_schemas/testlib/hedtsv/HED_testlib_2.1.0/HED_testlib_2.1.0_ObjectProperty.tsv
+0 −1 library_schemas/testlib/hedtsv/HED_testlib_2.1.0/HED_testlib_2.1.0_Prefixes.tsv
+0 −1 library_schemas/testlib/hedtsv/HED_testlib_2.1.0/HED_testlib_2.1.0_Sources.tsv
+0 −4 library_schemas/testlib/hedtsv/HED_testlib_2.1.0/HED_testlib_2.1.0_Structure.tsv
+0 −20 library_schemas/testlib/hedtsv/HED_testlib_2.1.0/HED_testlib_2.1.0_Tag.tsv
+0 −1 library_schemas/testlib/hedtsv/HED_testlib_2.1.0/HED_testlib_2.1.0_Unit.tsv
+0 −1 library_schemas/testlib/hedtsv/HED_testlib_2.1.0/HED_testlib_2.1.0_UnitClass.tsv
+0 −1 library_schemas/testlib/hedtsv/HED_testlib_2.1.0/HED_testlib_2.1.0_UnitModifier.tsv
+0 −1 library_schemas/testlib/hedtsv/HED_testlib_2.1.0/HED_testlib_2.1.0_ValueClass.tsv
+0 −1 library_schemas/testlib/hedtsv/HED_testlib_3.0.0/HED_testlib_3.0.0_AnnotationProperty.tsv
+0 −1 library_schemas/testlib/hedtsv/HED_testlib_3.0.0/HED_testlib_3.0.0_AnnotationPropertyExternal.tsv
+0 −1 library_schemas/testlib/hedtsv/HED_testlib_3.0.0/HED_testlib_3.0.0_AttributeProperty.tsv
+0 −1 library_schemas/testlib/hedtsv/HED_testlib_3.0.0/HED_testlib_3.0.0_DataProperty.tsv
+0 −1 library_schemas/testlib/hedtsv/HED_testlib_3.0.0/HED_testlib_3.0.0_ObjectProperty.tsv
+0 −1 library_schemas/testlib/hedtsv/HED_testlib_3.0.0/HED_testlib_3.0.0_Prefixes.tsv
+0 −1 library_schemas/testlib/hedtsv/HED_testlib_3.0.0/HED_testlib_3.0.0_Sources.tsv
+0 −4 library_schemas/testlib/hedtsv/HED_testlib_3.0.0/HED_testlib_3.0.0_Structure.tsv
+0 −16 library_schemas/testlib/hedtsv/HED_testlib_3.0.0/HED_testlib_3.0.0_Tag.tsv
+0 −1 library_schemas/testlib/hedtsv/HED_testlib_3.0.0/HED_testlib_3.0.0_Unit.tsv
+0 −1 library_schemas/testlib/hedtsv/HED_testlib_3.0.0/HED_testlib_3.0.0_UnitClass.tsv
+0 −1 library_schemas/testlib/hedtsv/HED_testlib_3.0.0/HED_testlib_3.0.0_UnitModifier.tsv
+0 −1 library_schemas/testlib/hedtsv/HED_testlib_3.0.0/HED_testlib_3.0.0_ValueClass.tsv
+21 −22 library_schemas/testlib/hedwiki/HED_testlib_2.0.0.mediawiki
+19 −21 library_schemas/testlib/hedwiki/HED_testlib_2.1.0.mediawiki
+1 −1 library_schemas/testlib/hedwiki/HED_testlib_3.0.0.mediawiki
+84 −84 library_schemas/testlib/hedxml/HED_testlib_2.0.0.xml
+53 −67 library_schemas/testlib/hedxml/HED_testlib_2.1.0.xml
+60 −60 library_schemas/testlib/hedxml/HED_testlib_3.0.0.xml
+0 −6,538 schemas_xml_unmerged/HED8.0.0.xml
+0 −7,072 schemas_xml_unmerged/HED8.1.0.xml
+0 −7,297 schemas_xml_unmerged/HED8.2.0.xml
+0 −13,381 schemas_xml_unmerged/HED8.3.0.xml
+0 −13,607 schemas_xml_unmerged/HED8.4.0.xml
+0 −2,145 schemas_xml_unmerged/HED_lang_1.0.0.xml
+0 −3,344 schemas_xml_unmerged/HED_lang_1.1.0.xml
+0 −7,945 schemas_xml_unmerged/HED_score_1.0.0.xml
+0 −6,825 schemas_xml_unmerged/HED_score_1.1.0.xml
+0 −6,836 schemas_xml_unmerged/HED_score_1.2.0.xml
+0 −5,856 schemas_xml_unmerged/HED_score_2.0.0.xml
+0 −7,649 schemas_xml_unmerged/HED_score_2.1.0.xml
+0 −6,538 schemas_xml_unmerged/HED_testlib_1.0.2.xml
+0 −96 schemas_xml_unmerged/HED_testlib_2.0.0.xml
+0 −85 schemas_xml_unmerged/HED_testlib_2.1.0.xml
+0 −68 schemas_xml_unmerged/HED_testlib_3.0.0.xml
120 changes: 120 additions & 0 deletions tests/tools/analysis/test_key_map.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,126 @@ def test_update_map_not_unique(self):
self.assertEqual(len(t_map.col_map.columns), 4, "update should produce correct number of columns")
self.assertEqual(len(t_map.col_map), len(t_map.count_dict), "update should produce the correct number of rows")

def test_remap_numeric_keys_simple(self):
"""Test remap with simple numeric keys (pandas 3.0 compatibility)."""
# Create a simple KeyMap with numeric keys
key_map = KeyMap(["col1"], ["result"])

# Create a mapping DataFrame with numeric keys
map_df = pd.DataFrame({"col1": [1, 2, 3], "result": ["one", "two", "three"]})
key_map.update(map_df)

# Create test data with numeric values
test_df = pd.DataFrame({"col1": [1, 2, 1, 3, 2]})

# This should not raise ValueError on pandas 3.0.3
df_result, missing = key_map.remap(test_df)

self.assertEqual(len(df_result), 5, "remap should preserve number of rows")
self.assertEqual(df_result.iloc[0]["result"], "one", "remap should map 1 to 'one'")
self.assertEqual(df_result.iloc[1]["result"], "two", "remap should map 2 to 'two'")
self.assertEqual(df_result.iloc[2]["result"], "one", "remap should map 1 to 'one'")
self.assertEqual(df_result.iloc[3]["result"], "three", "remap should map 3 to 'three'")
self.assertFalse(missing, "remap should not have missing keys")

def test_remap_numeric_keys_as_strings(self):
"""Test remap with numeric keys stored as strings (common case)."""
key_map = KeyMap(["test_code"], ["test_label"])

# Create a mapping where numeric keys are stored as strings
map_df = pd.DataFrame({"test_code": ["1", "2", "3", "4"], "test_label": ["low", "medium", "high", "critical"]})
key_map.update(map_df)

# Create test data with numeric values as strings
test_df = pd.DataFrame({"test_code": ["1", "2", "3", "1", "4", "2"]})

df_result, missing = key_map.remap(test_df)

self.assertEqual(len(df_result), 6, "remap should preserve number of rows")
self.assertEqual(df_result.iloc[0]["test_label"], "low")
self.assertEqual(df_result.iloc[1]["test_label"], "medium")
self.assertEqual(df_result.iloc[2]["test_label"], "high")
self.assertEqual(df_result.iloc[4]["test_label"], "critical")
self.assertFalse(missing, "remap should not have missing keys")

def test_remap_numeric_keys_with_na(self):
"""Test remap with numeric keys including n/a values."""
key_map = KeyMap(["value"], ["category"])

# Create mapping with numeric and string keys
map_df = pd.DataFrame({"value": ["1", "2", "3"], "category": ["cat_a", "cat_b", "cat_c"]})
key_map.update(map_df)

# Create test data with n/a values
test_df = pd.DataFrame({"value": ["1", "2", "n/a", "3", "n/a"]})

df_result, missing = key_map.remap(test_df)

self.assertEqual(len(df_result), 5, "remap should preserve number of rows")
self.assertEqual(df_result.iloc[0]["category"], "cat_a")
self.assertEqual(df_result.iloc[2]["category"], "n/a", "remap should map n/a to n/a")
self.assertEqual(df_result.iloc[3]["category"], "cat_c")
self.assertEqual(missing, [2, 4], "remap should report rows with unmapped n/a key")

def test_remap_multiple_numeric_keys_cascade(self):
"""Test remap with multiple numeric keys cascading (the pandas 3.0.3 failing case)."""
# This is the exact scenario from issue #1329 that was failing
key_map = KeyMap(["test", "response_accuracy"], ["result"])

# Create mapping for multiple key combination
map_df = pd.DataFrame(
{
"test": ["1", "2"],
"response_accuracy": ["correct", "correct"],
"result": ["correct_left", "correct_right"],
}
)
key_map.update(map_df)

# Create test data matching the failure scenario
test_df = pd.DataFrame(
{
"test": ["1", "2", "n/a", "3", "4", "5"],
"response_accuracy": ["correct", "correct", "correct", "n/a", "correct", "correct"],
}
)

# This was the failing line: map_series = pd.Series(self.map_dict)
# Should work now with explicit index/data parameters
df_result, missing = key_map.remap(test_df)

self.assertEqual(len(df_result), 6, "remap should preserve number of rows")
self.assertEqual(df_result.iloc[0]["result"], "correct_left")
self.assertEqual(df_result.iloc[1]["result"], "correct_right")
# Rows with missing key combinations should get n/a
self.assertEqual(df_result.iloc[2]["result"], "n/a")
self.assertEqual(df_result.iloc[3]["result"], "n/a")
self.assertEqual(missing, [2, 3, 4, 5], "remap should report rows with unmapped key combinations")

def test_remap_large_numeric_key_dict(self):
"""Test remap with a large dictionary of numeric keys to ensure Series construction works."""
key_map = KeyMap(["event_id"], ["event_name"])

# Create a large mapping with numeric event IDs
size = 100
map_data = {"event_id": [str(i) for i in range(size)], "event_name": [f"event_{i}" for i in range(size)]}
map_df = pd.DataFrame(map_data)
key_map.update(map_df)

# Create test data with random event IDs
test_data = {
"event_id": [str(i % 50) for i in range(200)] # Use first 50 event IDs
}
test_df = pd.DataFrame(test_data)

df_result, _missing = key_map.remap(test_df)

self.assertEqual(len(df_result), 200, "remap should preserve number of rows")
# Verify some mappings
self.assertEqual(df_result.iloc[0]["event_name"], "event_0")
self.assertEqual(df_result.iloc[50]["event_name"], "event_0") # 50 % 50 = 0
self.assertEqual(df_result.iloc[99]["event_name"], "event_49") # 99 % 50 = 49


if __name__ == "__main__":
unittest.main()