Skip to content

Commit 97498ea

Browse files
author
miranov25
committed
cd ~/alicesw/O2DPG/UTILS/dfextensions/groupby_regression
# Stage files git add groupby_regression_sliding_window.py git add __init__.py git add tests/test_groupby_regression_sliding_window.py # Commit git commit -m "feat: Phase 7 M7.1 - Sliding window regression - Implement zero-copy accumulator for memory-efficient windowing - Support 3D-6D sparse binned data with integer coordinates - Integrate statsmodels (OLS, WLS, GLM, RLM fitters) - Add comprehensive 31-test suite (30 passing, 1 skipped) - Python 3.9.6 compatible with proper type hints Key features: - Hash-map based neighbor aggregation (no DataFrame replication) - Boundary truncation for edge bins - Quality flags for insufficient statistics - Rich provenance metadata in .attrs - Clear error messages for invalid inputs Tests: 30/30 passing (1 skipped - v4 parity requires v4) Performance: <5 min for 400k rows (numpy backend) Version: 2.1.0 Ready for M7.1 review." # Push git push origin feature/groupby-optimization
1 parent 87724b7 commit 97498ea

File tree

6 files changed

+130
-134
lines changed

6 files changed

+130
-134
lines changed

UTILS/dfextensions/AliasDataFrame.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,11 @@
33
import numpy as np
44
import json
55
import uproot
6-
import ROOT # type: ignore
6+
try:
7+
import ROOT # type: ignore
8+
except ImportError as e:
9+
print(f"[AliasDataFrame] WARNING: ROOT import failed: {e}")
10+
ROOT = None
711
import matplotlib.pyplot as plt
812
import networkx as nx
913
import re
@@ -394,8 +398,8 @@ def _write_to_uproot(self, uproot_file, treename, dropAliasColumns):
394398
dtype_casts = {col: np.float32 for col in export_cols if self.df[col].dtype == np.float16}
395399
export_df = self.df[export_cols].astype(dtype_casts)
396400

397-
uproot_file[treename] = export_df
398-
401+
#uproot_file[treename] = export_df
402+
uproot_file[treename] = {col: export_df[col].values for col in export_df.columns}
399403
for subframe_name, entry in self._subframes.items():
400404
entry["frame"].export_tree(uproot_file, f"{treename}__subframe__{subframe_name}", dropAliasColumns)
401405

UTILS/dfextensions/AliasDataFrameTest.py

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
import pandas as pd
33
import numpy as np
44
import os
5-
from AliasDataFrame import AliasDataFrame # Adjust if needed
5+
from dfextensions.AliasDataFrame import AliasDataFrame # Adjust if needed
66
import tempfile
77

88
class TestAliasDataFrame(unittest.TestCase):
@@ -218,6 +218,27 @@ def test_getattr_chained_subframe_access(self):
218218
expected = np.array([False, True, True])
219219
assert np.all(adf_main.sub.cutA == expected) # explicit value check
220220

221+
def test_multi_column_index_join(self):
222+
"""Test subframe join with composite key (track_index, firstTFOrbit)"""
223+
df_main = pd.DataFrame({
224+
'track_index': [0, 0, 1, 1],
225+
'firstTFOrbit': [100, 200, 100, 200],
226+
'x': [1, 2, 3, 4]
227+
})
228+
df_sub = pd.DataFrame({
229+
'track_index': [0, 0, 1, 1],
230+
'firstTFOrbit': [100, 200, 100, 200],
231+
'y': [10, 20, 30, 40]
232+
})
233+
234+
adf_main = AliasDataFrame(df_main)
235+
adf_sub = AliasDataFrame(df_sub)
236+
adf_main.register_subframe("T", adf_sub, index_columns=["track_index", "firstTFOrbit"])
237+
adf_main.add_alias("sum_xy", "x + T.y")
238+
adf_main.materialize_alias("sum_xy")
239+
240+
expected = [11, 22, 33, 44]
241+
np.testing.assert_array_equal(adf_main.df['sum_xy'].values, expected)
221242

222243
if __name__ == "__main__":
223244
unittest.main()

UTILS/dfextensions/__init__.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,19 @@
11
# __init__.py
22

3-
from .AliasDataFrame import AliasDataFrame
3+
44
from .FormulaLinearModel import FormulaLinearModel
55
from .DataFrameUtils import * # if it provides general helper functions
6-
#from .groupby_regression import * # or other relevant functions
6+
from .groupby_regression import * # or other relevant functions
77

88
__all__ = [
99
"AliasDataFrame",
1010
"FormulaLinearModel",
1111
"GroupByRegressor"
1212
]
13+
14+
15+
try:
16+
from .AliasDataFrame import AliasDataFrame
17+
__all__.append("AliasDataFrame")
18+
except ImportError:
19+
pass

0 commit comments

Comments
 (0)