Skip to content

Commit d9e1f6f

Browse files
committed
initial pre commit
1 parent 56b2532 commit d9e1f6f

37 files changed

Lines changed: 437 additions & 457 deletions

.aiexclude

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,4 +15,4 @@ __pycache__/
1515
*.csv
1616
*.json
1717
*.tsv
18-
*.log
18+
*.log

.githooks/pre-push

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,4 +24,4 @@ if [ $? -ne 0 ]; then
2424
fi
2525

2626
echo "All checks passed. Proceeding with push."
27-
exit 0
27+
exit 0

.githooks/pre-push.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,4 +40,4 @@ if [ $? -ne 0 ]; then
4040
fi
4141

4242
echo "All checks passed. Proceeding with push."
43-
exit 0
43+
exit 0
Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,41 +1,41 @@
11
flowchart TD
22
A[ml_grid_object] --> B[feature_importance_methods]
3-
3+
44
B --> C{Check feature_selection_method}
5-
5+
66
C -->|"anova or None"| D[ANOVA Method]
77
C -->|"markov_blanket"| E[Markov Blanket Method]
8-
8+
99
D --> F[feature_methods.getNfeaturesANOVAF]
1010
E --> G[feature_methods.getNFeaturesMarkovBlanket]
11-
11+
1212
F --> H[Calculate F-values using f_classif]
1313
F --> I[Sort features by F-value]
1414
F --> J[Return top n feature names]
15-
15+
1616
G --> K[Initialize PPIMBC with SVC]
1717
G --> L[Fit and transform training data]
1818
G --> M[Extract top n features from MB]
19-
19+
2020
J --> N[Apply feature selection]
2121
M --> N
22-
22+
2323
N --> O[Filter X_train with selected features]
2424
N --> P[Filter X_test with selected features]
2525
N --> Q[Filter X_test_orig with selected features]
26-
26+
2727
O --> R[Return filtered datasets]
2828
P --> R
2929
Q --> R
30-
30+
3131
subgraph "Input Data"
3232
S[target_n_features]
3333
T[X_train]
3434
U[X_test]
3535
V[y_train]
3636
W[X_test_orig]
3737
end
38-
38+
3939
subgraph "Feature Methods Class"
4040
F
4141
G
@@ -46,25 +46,25 @@ flowchart TD
4646
L
4747
M
4848
end
49-
49+
5050
subgraph "Output"
5151
X[Filtered X_train]
5252
Y[Filtered X_test]
5353
Z[Filtered X_test_orig]
5454
end
55-
55+
5656
S --> B
5757
T --> B
5858
U --> B
5959
V --> B
6060
W --> B
61-
61+
6262
R --> X
6363
R --> Y
6464
R --> Z
65-
65+
6666
style B fill:#e1f5fe
6767
style C fill:#fff3e0
6868
style D fill:#f3e5f5
6969
style E fill:#e8f5e8
70-
style N fill:#fff8e1
70+
style N fill:#fff8e1

assets/data_feature_importance_methods.svg

Lines changed: 1 addition & 1 deletion
Loading

assets/data_pipeline.mmd

Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -3,68 +3,68 @@ flowchart TD
33
B --> C{Read Data}
44
C -->|Sample Mode| D[Read Sample Data]
55
C -->|Full Mode| E[Read Full Data]
6-
6+
77
D --> F[Apply Test/Column Sampling]
88
E --> F
9-
9+
1010
F --> G[Get Perturbation Columns]
1111
G --> H[Set Outcome Variable]
1212
H --> I[Handle Correlation Matrix]
13-
13+
1414
I --> J[Handle Percent Missing]
1515
J --> K[Handle Outcome List]
1616
K --> L[Remove Constant Columns]
17-
17+
1818
L --> M{Final Column List Empty?}
1919
M -->|Yes| N[Safety Retention Mechanism]
2020
M -->|No| O[Proceed with Final Columns]
21-
21+
2222
N --> P[Retain Protected Columns]
2323
P --> Q{Still Empty?}
2424
Q -->|Yes| R[Select Random Features]
2525
Q -->|No| O
2626
R --> O
27-
27+
2828
O --> S[Clean Up Data]
2929
S --> T[Screen Non-Float Types]
3030
T --> U[Handle Column Names]
31-
31+
3232
U --> V{Scale Data?}
3333
V -->|Yes| W[Apply Standard Scaling]
3434
V -->|No| X[Skip Scaling]
3535
W --> X
36-
36+
3737
X --> Y{Time Series Mode?}
3838
Y -->|Yes| Z[Convert to Time Series]
3939
Y -->|No| AA[Regular Processing]
40-
40+
4141
Z --> BB[Get Max Sequence Length]
4242
BB --> CC[Convert X,y to Time Series]
4343
CC --> DD[Train/Test Split]
44-
44+
4545
AA --> DD
4646
DD --> EE[Remove Constant Columns After Split]
47-
47+
4848
EE --> FF{Feature Reduction Needed?}
4949
FF -->|Yes| GG[Apply Feature Importance Methods]
5050
FF -->|No| HH[Skip Feature Reduction]
51-
51+
5252
GG --> II{Features Remain?}
5353
II -->|No| JJ[Error: All Features Removed]
5454
II -->|Yes| HH
55-
55+
5656
HH --> KK{Time Series Mode?}
5757
KK -->|Yes| LL[Get Time Series Model List]
5858
KK -->|No| MM[Get Regular Model List]
59-
59+
6060
LL --> NN[Pipeline Complete]
6161
MM --> NN
62-
62+
6363
JJ --> OO[Pipeline Failed]
64-
64+
6565
style A fill:#e1f5fe
6666
style NN fill:#c8e6c9
6767
style OO fill:#ffcdd2
6868
style N fill:#fff3e0
6969
style P fill:#fff3e0
70-
style R fill:#fff3e0
70+
style R fill:#fff3e0

assets/data_pipeline.svg

Lines changed: 1 addition & 1 deletion
Loading

assets/grid_param_space.mmd

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ graph TD
22
A[Grid Class Initialization] --> B[Set Global Parameters]
33
B --> C[Set sample_n default=1000]
44
C --> D[Initialize Grid Dictionary]
5-
5+
66
D --> E[Grid Parameters]
77
E --> E1[resample: undersample/oversample/None]
88
E --> E2[scale: True/False]
@@ -14,7 +14,7 @@ graph TD
1414
E --> E8[corr: 0.98,0.85,0.5,0.25]
1515
E --> E9[feature_selection_method: anova/markov_blanket]
1616
E --> E10[data: nested dictionary]
17-
17+
1818
E10 --> F[Data Features]
1919
F --> F1[age: True/False]
2020
F --> F2[sex: True/False]
@@ -30,29 +30,29 @@ graph TD
3030
F --> F12[vte_status: True]
3131
F --> F13[hosp_site: True]
3232
F --> F14[Other features: False]
33-
33+
3434
D --> G[c_prod Function Definition]
3535
G --> H[Generate Cartesian Product]
3636
H --> I[Create settings_list]
3737
I --> J[Print Full Size]
3838
J --> K[Shuffle List]
3939
K --> L[Sample n Items]
4040
L --> M[Create Iterator]
41-
41+
4242
subgraph "c_prod Function Logic"
4343
N[Input: Dictionary or List]
4444
N --> O{Is List?}
4545
O -->|Yes| P[Yield Items Recursively]
4646
O -->|No| Q[Generate Product of Values]
4747
Q --> R[Yield Dictionary with Keys]
4848
end
49-
49+
5050
subgraph "Key Dependencies"
5151
S[itertools as it]
5252
T[random]
5353
U[ml_grid.util.global_params]
5454
end
55-
55+
5656
subgraph "Class Attributes"
5757
V[self.global_params]
5858
W[self.verbose]
@@ -61,8 +61,8 @@ graph TD
6161
Z[self.settings_list]
6262
AA[self.settings_list_iterator]
6363
end
64-
64+
6565
style A fill:#e1f5fe
6666
style D fill:#f3e5f5
6767
style G fill:#fff3e0
68-
style M fill:#e8f5e8
68+
style M fill:#e8f5e8

assets/grid_param_space.svg

Lines changed: 1 addition & 1 deletion
Loading

0 commit comments

Comments
 (0)