Skip to content

Commit 3cea0c1

Browse files
authored
Merge pull request #5 from OpenTabular/main
v0.0.2
2 parents d05c88f + 8af3764 commit 3cea0c1

3 files changed

Lines changed: 31 additions & 17 deletions

File tree

README.md

Lines changed: 21 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -54,34 +54,43 @@ pip install -e .
5454

5555
```python
5656
import pandas as pd
57-
from pretab import Preprocessor
57+
import numpy as np
58+
from pretab.preprocessor import Preprocessor
5859

60+
# Simulated tabular dataset
5961
df = pd.DataFrame({
60-
"age": [22, 35, 46, 59],
61-
"income": [40000, 52000, 98000, 87000],
62-
"job": ["nurse", "engineer", "scientist", "teacher"]
62+
"age": np.random.randint(18, 65, size=100),
63+
"income": np.random.normal(60000, 15000, size=100).astype(int),
64+
"job": np.random.choice(["nurse", "engineer", "scientist", "teacher", "artist", "manager"], size=100),
65+
"city": np.random.choice(["Berlin", "Munich", "Hamburg", "Cologne"], size=100),
66+
"experience": np.random.randint(0, 40, size=100)
6367
})
6468

65-
# Optional feature-specific config
69+
y = np.random.randn(100, 1)
70+
71+
# Optional feature-specific preprocessing config
6672
config = {
6773
"age": "ple",
6874
"income": "rbf",
69-
"job": "one-hot"
75+
"experience": "quantile",
76+
"job": "one-hot",
77+
"city": "none"
7078
}
7179

80+
# Initialize Preprocessor
7281
preprocessor = Preprocessor(
7382
feature_preprocessing=config,
7483
task="regression"
7584
)
7685

77-
# Fit and transform
78-
X_dict = preprocessor.fit_transform(df)
86+
# Fit and transform the data into a dictionary of feature arrays
87+
X_dict = preprocessor.fit_transform(df, y)
7988

80-
# Optionally get stacked array
81-
X_array = preprocessor.transform(df, return_dict=False)
89+
# Optionally get a stacked array instead of a dictionary
90+
X_array = preprocessor.transform(df, return_array=True)
8291

83-
# Get feature info
84-
preprocessor.get_feature_info()
92+
# Get feature metadata
93+
preprocessor.get_feature_info(verbose=True)
8594
```
8695

8796
---

pretab/__version__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
"""Version information."""
22

33
# The following line *must* be the last in the module, exactly as formatted:
4-
__version__ = "0.0.1"
4+
__version__ = "0.0.2"

pretab/utils/get_numerical.py

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -45,10 +45,11 @@ def get_numerical_transformer_steps(
4545
("imputer", SimpleImputer(strategy=imputer_strategy, **imputer_kwargs))
4646
)
4747

48-
if scaling == "standardization":
49-
steps.append(("scaler", StandardScaler()))
50-
elif scaling == "minmax":
51-
steps.append(("minmax", MinMaxScaler(feature_range=(-1, 1))))
48+
# Define scalers that could be added independently
49+
scalers = {
50+
"standardization": ("scaler", StandardScaler()),
51+
"minmax": ("minmax", MinMaxScaler(feature_range=(-1, 1))),
52+
}
5253

5354
method_map = {
5455
"standardization": (StandardScaler, []),
@@ -93,6 +94,10 @@ def get_numerical_transformer_steps(
9394
"none": (NoTransformer, []),
9495
}
9596

97+
# Add optional scaling step only if not already part of method
98+
if scaling in scalers and scaling != method:
99+
steps.append(scalers[scaling])
100+
96101
if method not in method_map:
97102
raise ValueError(f"Unknown numerical transformer method: {method}")
98103

0 commit comments

Comments
 (0)