Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 23 additions & 17 deletions ISLP/models/generic_selector.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,10 @@
import scipy as sp

from sklearn.metrics import get_scorer
from sklearn.base import (clone, MetaEstimatorMixin)
from sklearn.base import (clone,
MetaEstimatorMixin,
is_classifier,
is_regressor)
from sklearn.model_selection import cross_val_score
from joblib import Parallel, delayed

Expand Down Expand Up @@ -149,13 +152,13 @@ def __init__(self,
self.scoring = scoring

if scoring is None:
if self.est_._estimator_type == 'classifier':
if is_classifier(self.est_):
scoring = 'accuracy'
elif self.est_._estimator_type == 'regressor':
elif is_regressor(self.est_):
scoring = 'r2'
else:
raise AttributeError('Estimator must '
'be a Classifier or Regressor.')
scoring = None

if isinstance(scoring, str):
self.scorer = get_scorer(scoring)
else:
Expand All @@ -166,7 +169,7 @@ def __init__(self,
# don't mess with this unless testing
self._TESTING_INTERRUPT_MODE = False

def fit(self, X, y, groups=None, **params):
def fit(self, X, y, groups=None, **fit_params):
"""Perform feature selection and learn model from training data.

Parameters
Expand All @@ -183,7 +186,7 @@ def fit(self, X, y, groups=None, **params):
groups: array-like, with shape (n_samples,), optional
Group labels for the samples used while splitting the dataset into
train/test set. Passed to the fit method of the cross-validator.
params: various, optional
fit_params: various, optional
Additional parameters that are being passed to the estimator.
For example, `sample_weights=weights`.

Expand Down Expand Up @@ -218,7 +221,7 @@ def fit(self, X, y, groups=None, **params):
groups=groups,
cv=self.cv,
pre_dispatch=self.pre_dispatch,
**params)
**fit_params)

# keep a running track of the best state

Expand All @@ -242,7 +245,7 @@ def fit(self, X, y, groups=None, **params):
X,
y,
groups=groups,
**params)
**fit_params)
iteration += 1
cur, best_, self.finished_ = self.update_results_check(results_,
self.path_,
Expand Down Expand Up @@ -287,7 +290,7 @@ def fit_transform(self,
X,
y,
groups=None,
**params):
**fit_params):
"""Fit to training data then reduce X to its most important features.

Parameters
Expand All @@ -304,7 +307,7 @@ def fit_transform(self,
groups: array-like, with shape (n_samples,), optional
Group labels for the samples used while splitting the dataset into
train/test set. Passed to the fit method of the cross-validator.
params: various, optional
fit_params: various, optional
Additional parameters that are being passed to the estimator.
For example, `sample_weights=weights`.

Expand All @@ -313,7 +316,7 @@ def fit_transform(self,
Reduced feature subset of X, shape={n_samples, k_features}

"""
self.fit(X, y, groups=groups, **params)
self.fit(X, y, groups=groups, **fit_params)
return self.transform(X)

def get_metric_dict(self, confidence_interval=0.95):
Expand Down Expand Up @@ -368,7 +371,7 @@ def _batch(self,
X,
y,
groups=None,
**params):
**fit_params):

results = []

Expand All @@ -388,7 +391,7 @@ def _batch(self,
groups=groups,
cv=self.cv,
pre_dispatch=self.pre_dispatch,
**params)
**fit_params)
for state in candidates)

for state, scores in work:
Expand Down Expand Up @@ -484,8 +487,11 @@ def _calc_score(estimator,
groups=None,
cv=None,
pre_dispatch='2*n_jobs',
**params):
**fit_params):

if scorer is None:
scorer = lambda estimator, X, y: estimator.score(X, y)

X_state = build_submodel(X, state)

if cv:
Expand All @@ -497,11 +503,11 @@ def _calc_score(estimator,
scoring=scorer,
n_jobs=1,
pre_dispatch=pre_dispatch,
params=params)
fit_params=fit_params)
else:
estimator.fit(X_state,
y,
**params)
**fit_params)
scores = np.array([scorer(estimator,
X_state,
y)])
Expand Down
15 changes: 14 additions & 1 deletion ISLP/models/sklearn_wrap.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,17 @@ def __init__(self,
self.model_type = model_type
self.model_spec = model_spec
self.model_args = model_args


def __sklearn_tags__(self):
tags = super().__sklearn_tags__()
if self.model_type == sm.OLS:
tags.estimator_type = 'regressor'
elif (issubclass(self.model_type, sm.GLM) and
'family' in self.model_args and
isinstance(self.model_args.get('family', None), sm.families.Binomial)):
tags.estimator_type = 'classifier'
return tags

def fit(self, X, y):
"""
Fit a statsmodel model
Expand Down Expand Up @@ -171,6 +181,9 @@ def __init__(self,
self.cv = cv
self.scoring = scoring

def __sklearn_tags__(self):
tags = super().__sklearn_tags__()
return tags

def fit(self, X, y):
"""
Expand Down
1 change: 0 additions & 1 deletion ISLP/torch/imdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
import torch
from torch.utils.data import TensorDataset
from scipy.sparse import load_npz
from pkg_resources import resource_filename
from pickle import load as load_pickle
import urllib

Expand Down
31 changes: 27 additions & 4 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
name = "ISLP"
dependencies = ["numpy>=1.7.1",
"scipy>=0.9",
"pandas>=0.20",
"pandas>=1.5",
"lxml", # pandas needs this for html
"scikit-learn>=1.2",
"joblib",
Expand All @@ -15,7 +15,7 @@ dependencies = ["numpy>=1.7.1",
]
description = "Library for ISLP labs"
readme = "README.md"
requires-python = ">=3.9"
requires-python = ">=3.10"
license = {file = "LICENSE"}
keywords = []
authors = [
Expand All @@ -38,6 +38,23 @@ classifiers = ["Development Status :: 3 - Alpha",
]
dynamic = ["version"]

[tool.setuptools]
packages = [
"ISLP",
"ISLP.models",
"ISLP.bart",
"ISLP.torch",
"ISLP.data"
]
include-package-data = true

[tool.setuptools.package-data]
ISLP = ["data/*.csv", "data/*.npy", "data/*.data"]

[tool.setuptools.dynamic]
version = {attr = "ISLP.__version__"} # Assuming ISLP.__version__ holds your version


[project.urls] # Optional
"Homepage" = "https://github.com/intro-stat-learning/ISLP"
"Bug Reports" = "https://github.com/intro-stat-learning/ISLP/issues"
Expand All @@ -51,8 +68,14 @@ doc = ['Sphinx>=3.0']
[build-system]
requires = ["setuptools>=42",
"wheel",
"versioneer[toml]",
"Sphinx>=1.0"
"Sphinx>=1.0",
"numpy",
"pandas",
"scipy",
"scikit-learn",
"joblib",
"statsmodels",
"versioneer[toml]"
]
build-backend = "setuptools.build_meta"

Expand Down
50 changes: 0 additions & 50 deletions setup.py

This file was deleted.

46 changes: 46 additions & 0 deletions tests/models/test_sklearn_wrap.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@

import numpy as np
import pandas as pd
import statsmodels.api as sm
from sklearn.base import is_classifier, is_regressor
import pytest

from ISLP.models.sklearn_wrap import sklearn_sm, sklearn_selected
from ISLP.models.model_spec import ModelSpec
from ISLP.models.strategy import min_max

@pytest.fixture
def model_setup():
X = pd.DataFrame({'X1': np.random.rand(10), 'X2': np.random.rand(10), 'X3': np.random.rand(10)})
y = pd.Series(np.random.randint(0, 2, 10)) # For classifier
model_spec_dummy = ModelSpec(['X1', 'X2', 'X3']).fit(X)
min_max_strategy_dummy = min_max(model_spec_dummy, min_terms=1, max_terms=2)
return X, y, model_spec_dummy, min_max_strategy_dummy

def test_OLS_is_regressor():
model = sklearn_sm(sm.OLS)
assert model.__sklearn_tags__().estimator_type == 'regressor'
assert is_regressor(model)

def test_GLM_binomial_is_classifier():
model = sklearn_sm(sm.GLM, model_args={'family': sm.families.Binomial()})
assert model.__sklearn_tags__().estimator_type == 'classifier'
assert is_classifier(model)

def test_GLM_binomial_probit_is_classifier():
model = sklearn_sm(sm.GLM, model_args={'family': sm.families.Binomial(link=sm.families.links.Probit())})
assert model.__sklearn_tags__().estimator_type == 'classifier'
assert is_classifier(model)


def test_selected_OLS_is_regressor(model_setup):
X, y, model_spec_dummy, min_max_strategy_dummy = model_setup
model = sklearn_selected(sm.OLS, strategy=min_max_strategy_dummy)
assert model.__sklearn_tags__().estimator_type == 'regressor'
assert is_regressor(model)

def test_selected_GLM_binomial_is_classifier(model_setup):
X, y, model_spec_dummy, min_max_strategy_dummy = model_setup
model = sklearn_selected(sm.GLM, strategy=min_max_strategy_dummy, model_args={'family': sm.families.Binomial()})
assert model.__sklearn_tags__().estimator_type == 'classifier'
assert is_classifier(model)
Loading