Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file modified .gitignore
Binary file not shown.
1 change: 1 addition & 0 deletions docs/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ numpy
pandas
scikit-learn
matplotlib
lightgbm

# For plot generation
plotly>=5.0
Expand Down
2 changes: 2 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ viz = [
# Machine learning test functions
ml = [
"scikit-learn",
"lightgbm>=4.0.0",
]
# CEC benchmark data files
cec = [
Expand All @@ -70,6 +71,7 @@ xgboost = [
"surfaces[ml]",
"xgboost>=1.7.0",
]

# Pre-trained ONNX surrogate models
surrogates = [
"surfaces-onnx-files==0.0.1",
Expand Down
7 changes: 7 additions & 0 deletions src/surfaces/test_functions/machine_learning/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,9 @@ def _check_sklearn():
KNeighborsClassifierFunction,
KNeighborsRegressorFunction,
KNNTSClassifierFunction,
# LightGBM
LightGBMClassifierFunction,
LightGBMRegressorFunction,
RandomForestClassifierFunction,
RandomForestForecasterFunction,
# Image
Expand All @@ -51,12 +54,14 @@ def _check_sklearn():
"KNeighborsClassifierFunction",
"RandomForestClassifierFunction",
"SVMClassifierFunction",
"LightGBMClassifierFunction",
# Tabular - Regression
"DecisionTreeRegressorFunction",
"GradientBoostingRegressorFunction",
"KNeighborsRegressorFunction",
"RandomForestRegressorFunction",
"SVMRegressorFunction",
"LightGBMRegressorFunction",
# Time-series - Forecasting
"GradientBoostingForecasterFunction",
"RandomForestForecasterFunction",
Expand All @@ -75,12 +80,14 @@ def _check_sklearn():
KNeighborsClassifierFunction,
RandomForestClassifierFunction,
SVMClassifierFunction,
LightGBMClassifierFunction,
# Tabular - Regression
DecisionTreeRegressorFunction,
GradientBoostingRegressorFunction,
KNeighborsRegressorFunction,
RandomForestRegressorFunction,
SVMRegressorFunction,
LightGBMRegressorFunction,
# Time-series - Forecasting
GradientBoostingForecasterFunction,
RandomForestForecasterFunction,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@
GradientBoostingRegressorFunction,
KNeighborsClassifierFunction,
KNeighborsRegressorFunction,
LightGBMClassifierFunction,
LightGBMRegressorFunction,
RandomForestClassifierFunction,
RandomForestRegressorFunction,
SVMClassifierFunction,
Expand All @@ -57,12 +59,14 @@
"KNeighborsClassifierFunction",
"RandomForestClassifierFunction",
"SVMClassifierFunction",
"LightGBMClassifierFunction",
# Tabular - Regression
"DecisionTreeRegressorFunction",
"GradientBoostingRegressorFunction",
"KNeighborsRegressorFunction",
"RandomForestRegressorFunction",
"SVMRegressorFunction",
"LightGBMRegressorFunction",
# Time-series - Forecasting
"GradientBoostingForecasterFunction",
"RandomForestForecasterFunction",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,15 @@
DecisionTreeClassifierFunction,
GradientBoostingClassifierFunction,
KNeighborsClassifierFunction,
LightGBMClassifierFunction,
RandomForestClassifierFunction,
SVMClassifierFunction,
)
from .regression import (
DecisionTreeRegressorFunction,
GradientBoostingRegressorFunction,
KNeighborsRegressorFunction,
LightGBMRegressorFunction,
RandomForestRegressorFunction,
SVMRegressorFunction,
)
Expand All @@ -24,10 +26,12 @@
"KNeighborsClassifierFunction",
"RandomForestClassifierFunction",
"SVMClassifierFunction",
"LightGBMClassifierFunction",
# Regression
"DecisionTreeRegressorFunction",
"GradientBoostingRegressorFunction",
"KNeighborsRegressorFunction",
"RandomForestRegressorFunction",
"SVMRegressorFunction",
"LightGBMRegressorFunction",
]
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
DecisionTreeClassifierFunction,
GradientBoostingClassifierFunction,
KNeighborsClassifierFunction,
LightGBMClassifierFunction,
RandomForestClassifierFunction,
SVMClassifierFunction,
)
Expand All @@ -16,4 +17,5 @@
"KNeighborsClassifierFunction",
"RandomForestClassifierFunction",
"SVMClassifierFunction",
"LightGBMClassifierFunction",
]
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from .decision_tree_classifier import DecisionTreeClassifierFunction
from .gradient_boosting_classifier import GradientBoostingClassifierFunction
from .k_neighbors_classifier import KNeighborsClassifierFunction
from .lightgbm_classifier import LightGBMClassifierFunction
from .random_forest_classifier import RandomForestClassifierFunction
from .svm_classifier import SVMClassifierFunction

Expand All @@ -15,4 +16,5 @@
"KNeighborsClassifierFunction",
"RandomForestClassifierFunction",
"SVMClassifierFunction",
"LightGBMClassifierFunction",
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
"""LightGBM Classifier test function with surrogate support."""

from typing import Any, Dict, List, Optional

import numpy as np
from lightgbm import LGBMClassifier
from sklearn.model_selection import cross_val_score

from surfaces.modifiers import BaseModifier

from .._base_classification import BaseClassification
from ..datasets import DATASETS


class LightGBMClassifierFunction(BaseClassification):
"""LightGBM Classifier test function.

Parameters
----------
dataset : str, default="digits"
Dataset to use. One of: "digits", "iris", "wine", "breast_cancer", "covtype".
cv : int, default=5
Number of cross-validation folds.
use_surrogate : bool, default=False
If True, use pre-trained surrogate for fast evaluation.
"""

name = "LightGBM Classifier Function"
_name_ = "lightgbm_classifier"
__name__ = "LightGBMClassifierFunction"

available_datasets = list(DATASETS.keys())
available_cv = [2, 3, 5, 10]

para_names = [
"n_estimators",
"learning_rate",
"num_leaves",
"max_depth",
"min_child_samples",
"subsample",
"colsample_bytree",
"reg_alpha",
"reg_lambda",
]

# Hp search space defaults

n_estimators_default = list(np.arange(10, 300, 10))
learning_rate_default = [1e-3, 1e-1, 0.5, 1.0]
num_leaves_default = list(range(10, 100, 5))
max_depth_default = list(range(2, 20, 1))
min_child_samples_default = list(range(5, 100, 5))
subsample_default = list(np.arange(0.1, 1.01, 0.1))
colsample_bytree_default = list(np.arange(0.1, 1.01, 0.1))
reg_alpha_default = [0, 0.001, 0.01, 0.1, 1, 10]
reg_lambda_default = [0, 0.001, 0.01, 0.1, 10]

# Function sheet for doc
latex_formula = r"\text{CV-Accuracy} = f(\text{n\_estimators}, \text{learning\_rate}, \dots)"
tagline = (
"Cross-validated accuracy of a LightGBM classifier. "
"Gradient boosting with tree-based learning."
)

def __init__(
self,
dataset: str = "digits",
cv: int = 5,
objective: str = "maximize",
modifiers: Optional[List[BaseModifier]] = None,
memory: bool = False,
collect_data: bool = True,
callbacks=None,
catch_errors=None,
use_surrogate: bool = False,
):
if dataset not in DATASETS:
raise ValueError(f"Unknown dataset '{dataset}'. Available: {self.available_datasets}")
if cv not in self.available_cv:
raise ValueError(f"Invalid cv={cv}. Available: {self.available_cv}")

self.dataset = dataset
self.cv = cv
self._dataset_loader = DATASETS[dataset]

super().__init__(
objective=objective,
modifiers=modifiers,
memory=memory,
collect_data=collect_data,
callbacks=callbacks,
catch_errors=catch_errors,
use_surrogate=use_surrogate,
)

@property
def search_space(self) -> Dict[str, Any]:
return {
"n_estimators": self.n_estimators_default,
"learning_rate": self.learning_rate_default,
"num_leaves": self.num_leaves_default,
"max_depth": self.max_depth_default,
"min_child_samples": self.min_child_samples_default,
"subsample": self.subsample_default,
"colsample_bytree": self.colsample_bytree_default,
"reg_alpha": self.reg_alpha_default,
"reg_lambda": self.reg_lambda_default,
}

def _create_objective_function(self) -> None:
"""
Creates the objective function closure with fixed data
"""
X, y = self._dataset_loader()
cv = self.cv

def objective(params: Dict[str, Any]) -> float:
clf = LGBMClassifier(
n_estimators=params["n_estimators"],
learning_rate=params["learning_rate"],
num_leaves=params["num_leaves"],
max_depth=params["max_depth"],
min_child_samples=params["min_child_samples"],
subsample=params["subsample"],
colsample_bytree=params["colsample_bytree"],
reg_alpha=params["reg_alpha"],
reg_lambda=params["reg_lambda"],
random_state=42,
n_jobs=-1,
verbose=-1,
)
scores = cross_val_score(clf, X, y, cv=cv, scoring="accuracy")
return scores.mean()

self.pure_objective_function = objective

def _get_surrogate_params(self, params: Dict[str, Any]) -> Dict[str, Any]:
return {**params, "dataset": self.dataset, "cv": self.cv}
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
DecisionTreeRegressorFunction,
GradientBoostingRegressorFunction,
KNeighborsRegressorFunction,
LightGBMRegressorFunction,
RandomForestRegressorFunction,
SVMRegressorFunction,
)
Expand All @@ -16,4 +17,5 @@
"KNeighborsRegressorFunction",
"RandomForestRegressorFunction",
"SVMRegressorFunction",
"LightGBMRegressorFunction",
]
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from .decision_tree_regressor import DecisionTreeRegressorFunction
from .gradient_boosting_regressor import GradientBoostingRegressorFunction
from .k_neighbors_regressor import KNeighborsRegressorFunction
from .lightgbm_regressor import LightGBMRegressorFunction
from .random_forest_regressor import RandomForestRegressorFunction
from .svm_regressor import SVMRegressorFunction

Expand All @@ -15,4 +16,5 @@
"KNeighborsRegressorFunction",
"RandomForestRegressorFunction",
"SVMRegressorFunction",
"LightGBMRegressorFunction",
]
Loading