@@ -54,34 +54,43 @@ pip install -e .
5454
5555``` python
5656import pandas as pd
57- from pretab import Preprocessor
57+ import numpy as np
58+ from pretab.preprocessor import Preprocessor
5859
60+ # Simulated tabular dataset
5961df = pd.DataFrame({
60- " age" : [22 , 35 , 46 , 59 ],
61- " income" : [40000 , 52000 , 98000 , 87000 ],
62- " job" : [" nurse" , " engineer" , " scientist" , " teacher" ]
62+ " age" : np.random.randint(18 , 65 , size = 100 ),
63+ " income" : np.random.normal(60000 , 15000 , size = 100 ).astype(int ),
64+ " job" : np.random.choice([" nurse" , " engineer" , " scientist" , " teacher" , " artist" , " manager" ], size = 100 ),
65+ " city" : np.random.choice([" Berlin" , " Munich" , " Hamburg" , " Cologne" ], size = 100 ),
66+ " experience" : np.random.randint(0 , 40 , size = 100 )
6367})
6468
65- # Optional feature-specific config
69+ y = np.random.randn(100 , 1 )
70+
71+ # Optional feature-specific preprocessing config
6672config = {
6773 " age" : " ple" ,
6874 " income" : " rbf" ,
69- " job" : " one-hot"
75+ " experience" : " quantile" ,
76+ " job" : " one-hot" ,
77+ " city" : " none"
7078}
7179
80+ # Initialize Preprocessor
7281preprocessor = Preprocessor(
7382 feature_preprocessing = config,
7483 task = " regression"
7584)
7685
77- # Fit and transform
78- X_dict = preprocessor.fit_transform(df)
86+ # Fit and transform the data into a dictionary of feature arrays
87+ X_dict = preprocessor.fit_transform(df, y )
7988
80- # Optionally get stacked array
81- X_array = preprocessor.transform(df, return_dict = False )
89+ # Optionally get a stacked array instead of a dictionary
90+ X_array = preprocessor.transform(df, return_array = True )
8291
83- # Get feature info
84- preprocessor.get_feature_info()
92+ # Get feature metadata
93+ preprocessor.get_feature_info(verbose = True )
8594```
8695
8796---
0 commit comments