"""Explainable Boosting Machine (EBM) Integration.
This module provides enhanced wrappers for InterpretML's Explainable Boosting
Machines (EBMs), adding additional functionality for integration with the
endgame library.
EBMs are interpretable machine learning models that combine modern ML techniques
(bagging, gradient boosting, automatic interaction detection) with the
transparency of Generalized Additive Models (GAMs).
Key features:
- Glass-box interpretability with competitive accuracy
- Automatic pairwise interaction detection
- Global and local explanations
- Editable by domain experts
- Support for mixed feature types (continuous, categorical)
- Missing value handling
Example usage:
>>> from endgame.models import EBMClassifier, EBMRegressor
>>> clf = EBMClassifier(interactions=10)
>>> clf.fit(X_train, y_train)
>>> clf.explain_global()
>>> clf.explain_local(X_test[:5])
"""
from __future__ import annotations
from typing import Any
import numpy as np
from numpy.typing import ArrayLike, NDArray
from sklearn.base import BaseEstimator, ClassifierMixin, RegressorMixin
from sklearn.utils.validation import check_array, check_is_fitted, check_X_y
try:
from interpret.glassbox import (
ExplainableBoostingClassifier,
ExplainableBoostingRegressor,
)
HAS_INTERPRET = True
except ImportError:
HAS_INTERPRET = False
ExplainableBoostingClassifier = None
ExplainableBoostingRegressor = None
def _check_interpret_installed():
"""Raise ImportError if interpret is not installed."""
if not HAS_INTERPRET:
raise ImportError(
"The 'interpret' package is required for EBM models. "
"Install it with: pip install interpret"
)
class EBMBase(BaseEstimator):
"""Base class for EBM wrappers with common functionality.
This class provides shared functionality for both classification and
regression EBM models, including explanation methods and model inspection.
"""
def __init__(
self,
# Feature configuration
feature_names: list[str] | None = None,
feature_types: list[str] | None = None,
# Binning
max_bins: int = 1024,
max_interaction_bins: int = 64,
# Interactions
interactions: int | float | str | list = 10,
exclude: list | None = None,
# Training
validation_size: float = 0.15,
outer_bags: int = 14,
inner_bags: int = 0,
learning_rate: float = 0.015,
greedy_ratio: float = 10.0,
cyclic_progress: bool = False,
smoothing_rounds: int = 75,
interaction_smoothing_rounds: int = 75,
max_rounds: int = 50000,
early_stopping_rounds: int = 100,
early_stopping_tolerance: float = 1e-5,
# Regularization
min_samples_leaf: int = 4,
min_hessian: float = 0.0001,
reg_alpha: float = 0.0,
reg_lambda: float = 0.0,
max_delta_step: float = 0.0,
gain_scale: float = 5.0,
# Categorical handling
min_cat_samples: int = 10,
cat_smooth: float = 10.0,
missing: str = "separate",
# Tree structure
max_leaves: int = 2,
monotone_constraints: list | None = None,
# Parallelism
n_jobs: int = -2,
random_state: int | None = 42,
):
_check_interpret_installed()
self.feature_names = feature_names
self.feature_types = feature_types
self.max_bins = max_bins
self.max_interaction_bins = max_interaction_bins
self.interactions = interactions
self.exclude = exclude
self.validation_size = validation_size
self.outer_bags = outer_bags
self.inner_bags = inner_bags
self.learning_rate = learning_rate
self.greedy_ratio = greedy_ratio
self.cyclic_progress = cyclic_progress
self.smoothing_rounds = smoothing_rounds
self.interaction_smoothing_rounds = interaction_smoothing_rounds
self.max_rounds = max_rounds
self.early_stopping_rounds = early_stopping_rounds
self.early_stopping_tolerance = early_stopping_tolerance
self.min_samples_leaf = min_samples_leaf
self.min_hessian = min_hessian
self.reg_alpha = reg_alpha
self.reg_lambda = reg_lambda
self.max_delta_step = max_delta_step
self.gain_scale = gain_scale
self.min_cat_samples = min_cat_samples
self.cat_smooth = cat_smooth
self.missing = missing
self.max_leaves = max_leaves
self.monotone_constraints = monotone_constraints
self.n_jobs = n_jobs
self.random_state = random_state
def _get_ebm_params(self) -> dict[str, Any]:
"""Get parameters for the underlying EBM model."""
return {
"feature_names": self.feature_names,
"feature_types": self.feature_types,
"max_bins": self.max_bins,
"max_interaction_bins": self.max_interaction_bins,
"interactions": self.interactions,
"exclude": self.exclude,
"validation_size": self.validation_size,
"outer_bags": self.outer_bags,
"inner_bags": self.inner_bags,
"learning_rate": self.learning_rate,
"greedy_ratio": self.greedy_ratio,
"cyclic_progress": self.cyclic_progress,
"smoothing_rounds": self.smoothing_rounds,
"interaction_smoothing_rounds": self.interaction_smoothing_rounds,
"max_rounds": self.max_rounds,
"early_stopping_rounds": self.early_stopping_rounds,
"early_stopping_tolerance": self.early_stopping_tolerance,
"min_samples_leaf": self.min_samples_leaf,
"min_hessian": self.min_hessian,
"reg_alpha": self.reg_alpha,
"reg_lambda": self.reg_lambda,
"max_delta_step": self.max_delta_step,
"gain_scale": self.gain_scale,
"min_cat_samples": self.min_cat_samples,
"cat_smooth": self.cat_smooth,
"missing": self.missing,
"max_leaves": self.max_leaves,
"monotone_constraints": self.monotone_constraints,
"n_jobs": self.n_jobs,
"random_state": self.random_state,
}
@property
def intercept_(self) -> NDArray:
"""Model intercept."""
check_is_fitted(self, "_ebm")
return self._ebm.intercept_
@property
def term_features_(self) -> list[tuple[int, ...]]:
"""Feature indices for each term."""
check_is_fitted(self, "_ebm")
return self._ebm.term_features_
@property
def term_scores_(self) -> list[NDArray]:
"""Scores for each term (lookup tables)."""
check_is_fitted(self, "_ebm")
return self._ebm.term_scores_
@property
def feature_names_in_(self) -> list[str]:
"""Feature names seen during fit."""
if hasattr(self, "_feature_names_override"):
return self._feature_names_override
check_is_fitted(self, "_ebm")
return self._ebm.feature_names_in_
@feature_names_in_.setter
def feature_names_in_(self, value):
self._feature_names_override = value
@property
def feature_types_in_(self) -> list[str]:
"""Feature types detected during fit."""
check_is_fitted(self, "_ebm")
return self._ebm.feature_types_in_
@property
def n_features_in_(self) -> int:
"""Number of features seen during fit."""
check_is_fitted(self, "_ebm")
return self._ebm.n_features_in_
def explain_global(self, name: str | None = None):
"""Generate global explanations for the model.
Returns an explanation object showing the contribution of each
feature/term to the model's predictions across all data.
Parameters
----------
name : str, optional
Name for the explanation.
Returns
-------
explanation : EBMExplanation
Global explanation object with visualization methods.
"""
check_is_fitted(self, "_ebm")
return self._ebm.explain_global(name=name)
def explain_local(
self,
X: ArrayLike,
y: ArrayLike | None = None,
name: str | None = None,
):
"""Generate local explanations for specific predictions.
Returns an explanation object showing how each feature contributed
to the predictions for the given samples.
Parameters
----------
X : array-like of shape (n_samples, n_features)
Samples to explain.
y : array-like of shape (n_samples,), optional
True labels (for comparison in visualization).
name : str, optional
Name for the explanation.
Returns
-------
explanation : EBMExplanation
Local explanation object with visualization methods.
"""
check_is_fitted(self, "_ebm")
return self._ebm.explain_local(X, y, name=name)
def term_importances(
self,
importance_type: str = "avg_weight",
) -> NDArray:
"""Get importance scores for each term.
Parameters
----------
importance_type : str, default="avg_weight"
Type of importance to compute. Options:
- "avg_weight": Average absolute contribution
- "min_max": Range of contributions
Returns
-------
importances : ndarray of shape (n_terms,)
Importance score for each term.
"""
check_is_fitted(self, "_ebm")
return self._ebm.term_importances(importance_type=importance_type)
def get_term_names(self) -> list[str]:
"""Get human-readable names for each term.
Returns
-------
names : list of str
Names like "feature_0" or "feature_0 x feature_1" for interactions.
"""
check_is_fitted(self, "_ebm")
names = []
for term in self.term_features_:
if len(term) == 1:
names.append(self.feature_names_in_[term[0]])
else:
names.append(" x ".join(self.feature_names_in_[i] for i in term))
return names
def get_feature_importances(self) -> dict[str, float]:
"""Get feature importances aggregated across all terms.
For interactions, importance is split equally among participating features.
Returns
-------
importances : dict
Mapping from feature name to importance score.
"""
check_is_fitted(self, "_ebm")
importances = {name: 0.0 for name in self.feature_names_in_}
term_imp = self.term_importances()
for term, imp in zip(self.term_features_, term_imp):
share = imp / len(term)
for idx in term:
importances[self.feature_names_in_[idx]] += share
# Normalize
total = sum(importances.values())
if total > 0:
importances = {k: v / total for k, v in importances.items()}
return importances
@property
def feature_importances_(self) -> NDArray:
"""Feature importances as numpy array (sklearn compatible)."""
check_is_fitted(self, "_ebm")
imp_dict = self.get_feature_importances()
return np.array([imp_dict[name] for name in self.feature_names_in_])
def monotonize(
self,
term: int | str,
increasing: bool = True,
passthrough: float = 0.0,
) -> EBMBase:
"""Enforce monotonicity on a univariate term.
Parameters
----------
term : int or str
Index or name of the term to monotonize.
increasing : bool, default=True
If True, enforce increasing monotonicity; if False, decreasing.
passthrough : float, default=0.0
Fraction of violations to allow.
Returns
-------
self : EBMBase
The model with monotonicity enforced.
"""
check_is_fitted(self, "_ebm")
self._ebm.monotonize(term, increasing=increasing, passthrough=passthrough)
return self
def remove_terms(self, terms: list[int | str]) -> EBMBase:
"""Remove terms from the model.
Parameters
----------
terms : list of int or str
Indices or names of terms to remove.
Returns
-------
self : EBMBase
The model with terms removed.
"""
check_is_fitted(self, "_ebm")
self._ebm.remove_terms(terms)
return self
def to_json(self, file: str, detail: str = "all", indent: int = 2) -> None:
"""Export model to JSON format.
Parameters
----------
file : str
Path to save JSON.
detail : str, default="all"
Level of detail ("all", "minimal", etc.).
indent : int, default=2
JSON indentation.
Returns
-------
None
"""
check_is_fitted(self, "_ebm")
self._ebm.to_json(file, detail=detail, indent=indent)
def get_histogram(self, term: int | str) -> tuple[NDArray, NDArray]:
"""Get the histogram (bin edges and scores) for a term.
Parameters
----------
term : int or str
Index or name of the term.
Returns
-------
bin_edges : ndarray
Edges of bins for the term.
scores : ndarray
Score contribution for each bin.
"""
check_is_fitted(self, "_ebm")
if isinstance(term, str):
term_names = self.get_term_names()
term = term_names.index(term)
term_features = self.term_features_[term]
if len(term_features) > 1:
raise ValueError("get_histogram only works for univariate terms")
scores = self.term_scores_[term]
bins = self._ebm.bins_[term_features[0]][0]
return bins, scores
def predict_contributions(self, X: ArrayLike) -> NDArray:
"""Get per-sample, per-term contributions to predictions.
Parameters
----------
X : array-like of shape (n_samples, n_features)
Samples to get contributions for.
Returns
-------
contributions : ndarray of shape (n_samples, n_terms + 1)
Contribution of each term to each sample's prediction.
Last column is the intercept.
"""
check_is_fitted(self, "_ebm")
X = check_array(X, dtype=None, ensure_all_finite="allow-nan")
# Get local explanation to extract contributions
local_exp = self._ebm.explain_local(X)
n_samples = X.shape[0]
n_terms = len(self.term_features_)
contributions = np.zeros((n_samples, n_terms + 1))
# Extract contributions from explanation data
for i in range(n_samples):
data = local_exp.data(i)
if "scores" in data:
contributions[i, :-1] = data["scores"]
contributions[i, -1] = self.intercept_[0] if len(self.intercept_.shape) > 0 else self.intercept_
return contributions
[docs]
class EBMClassifier(ClassifierMixin, EBMBase):
"""Explainable Boosting Machine for Classification.
An interpretable classifier that combines the accuracy of gradient boosting
with the transparency of Generalized Additive Models (GAMs).
Parameters
----------
feature_names : list of str, optional
Names for features. If None, uses default naming.
feature_types : list of str, optional
Types for features ("continuous", "nominal", "ordinal").
max_bins : int, default=1024
Maximum number of bins for continuous features.
max_interaction_bins : int, default=64
Maximum bins for interaction terms.
interactions : int, float, str, or list, default=10
Number or specification of interaction terms to detect.
Can be an integer (number of interactions), float (fraction),
string like "3x" (multiple of features), or explicit list.
exclude : list, optional
Features or interactions to exclude.
validation_size : float, default=0.15
Fraction of data to use for validation during training.
outer_bags : int, default=14
Number of outer bags for ensembling.
inner_bags : int, default=0
Number of inner bags (0 means no inner bagging).
learning_rate : float, default=0.015
Learning rate for boosting.
greedy_ratio : float, default=10.0
Ratio controlling greedy vs cyclic feature selection.
cyclic_progress : bool, default=False
If True, use cyclic progress; if False, use greedy.
smoothing_rounds : int, default=75
Number of smoothing rounds for main effects.
interaction_smoothing_rounds : int, default=75
Number of smoothing rounds for interactions.
max_rounds : int, default=50000
Maximum number of boosting rounds.
early_stopping_rounds : int, default=100
Stop if no improvement after this many rounds.
early_stopping_tolerance : float, default=1e-5
Tolerance for early stopping.
min_samples_leaf : int, default=4
Minimum samples in a leaf.
min_hessian : float, default=0.0001
Minimum hessian in a leaf.
reg_alpha : float, default=0.0
L1 regularization.
reg_lambda : float, default=0.0
L2 regularization.
max_delta_step : float, default=0.0
Maximum delta step (0 means no limit).
gain_scale : float, default=5.0
Scale factor for gain computation.
min_cat_samples : int, default=10
Minimum samples for categorical bins.
cat_smooth : float, default=10.0
Smoothing for categorical features.
missing : str, default="separate"
How to handle missing values ("separate", "min", "max").
max_leaves : int, default=2
Maximum leaves per tree (2 = stumps).
monotone_constraints : list, optional
Monotonicity constraints per feature (-1, 0, 1).
n_jobs : int, default=-2
Number of jobs for parallel processing.
random_state : int, default=42
Random state for reproducibility.
Attributes
----------
classes_ : ndarray
Unique class labels.
n_features_in_ : int
Number of features seen during fit.
feature_names_in_ : list of str
Feature names.
feature_types_in_ : list of str
Detected feature types.
intercept_ : ndarray
Model intercept.
term_features_ : list of tuple
Feature indices for each term.
term_scores_ : list of ndarray
Score lookup tables for each term.
Examples
--------
>>> from endgame.models import EBMClassifier
>>> from sklearn.datasets import load_iris
>>> X, y = load_iris(return_X_y=True)
>>> clf = EBMClassifier(interactions=5)
>>> clf.fit(X, y)
>>> clf.score(X, y)
0.98
>>> global_exp = clf.explain_global()
>>> local_exp = clf.explain_local(X[:5])
"""
_estimator_type = "classifier"
def __init__(
self,
feature_names: list[str] | None = None,
feature_types: list[str] | None = None,
max_bins: int = 512,
max_interaction_bins: int = 32,
interactions: int | float | str | list = 10,
exclude: list | None = None,
validation_size: float = 0.15,
outer_bags: int = 8,
inner_bags: int = 0,
learning_rate: float = 0.02,
greedy_ratio: float = 10.0,
cyclic_progress: bool = False,
smoothing_rounds: int = 50,
interaction_smoothing_rounds: int = 50,
max_rounds: int = 25000,
early_stopping_rounds: int = 50,
early_stopping_tolerance: float = 1e-5,
min_samples_leaf: int = 4,
min_hessian: float = 0.0001,
reg_alpha: float = 0.0,
reg_lambda: float = 0.0,
max_delta_step: float = 0.0,
gain_scale: float = 5.0,
min_cat_samples: int = 10,
cat_smooth: float = 10.0,
missing: str = "separate",
max_leaves: int = 2,
monotone_constraints: list | None = None,
n_jobs: int = -2,
random_state: int | None = 42,
):
super().__init__(
feature_names=feature_names,
feature_types=feature_types,
max_bins=max_bins,
max_interaction_bins=max_interaction_bins,
interactions=interactions,
exclude=exclude,
validation_size=validation_size,
outer_bags=outer_bags,
inner_bags=inner_bags,
learning_rate=learning_rate,
greedy_ratio=greedy_ratio,
cyclic_progress=cyclic_progress,
smoothing_rounds=smoothing_rounds,
interaction_smoothing_rounds=interaction_smoothing_rounds,
max_rounds=max_rounds,
early_stopping_rounds=early_stopping_rounds,
early_stopping_tolerance=early_stopping_tolerance,
min_samples_leaf=min_samples_leaf,
min_hessian=min_hessian,
reg_alpha=reg_alpha,
reg_lambda=reg_lambda,
max_delta_step=max_delta_step,
gain_scale=gain_scale,
min_cat_samples=min_cat_samples,
cat_smooth=cat_smooth,
missing=missing,
max_leaves=max_leaves,
monotone_constraints=monotone_constraints,
n_jobs=n_jobs,
random_state=random_state,
)
[docs]
def fit(
self,
X: ArrayLike,
y: ArrayLike,
sample_weight: ArrayLike | None = None,
) -> EBMClassifier:
"""Fit the EBM classifier.
Parameters
----------
X : array-like of shape (n_samples, n_features)
Training data.
y : array-like of shape (n_samples,)
Target labels.
sample_weight : array-like of shape (n_samples,), optional
Sample weights.
Returns
-------
self : EBMClassifier
Fitted classifier.
"""
# Extract feature names from DataFrame before check_X_y converts to array
if self.feature_names is None and hasattr(X, 'columns'):
self.feature_names = list(X.columns)
X, y = check_X_y(X, y, dtype=None, ensure_all_finite="allow-nan")
self._ebm = ExplainableBoostingClassifier(**self._get_ebm_params())
self._ebm.fit(X, y, sample_weight=sample_weight)
self.classes_ = self._ebm.classes_
return self
[docs]
def predict(self, X: ArrayLike) -> NDArray:
"""Predict class labels.
Parameters
----------
X : array-like of shape (n_samples, n_features)
Samples to predict.
Returns
-------
y_pred : ndarray of shape (n_samples,)
Predicted class labels.
"""
check_is_fitted(self, "_ebm")
X = check_array(X, dtype=None, ensure_all_finite="allow-nan")
return self._ebm.predict(X)
[docs]
def predict_proba(self, X: ArrayLike) -> NDArray:
"""Predict class probabilities.
Parameters
----------
X : array-like of shape (n_samples, n_features)
Samples to predict.
Returns
-------
proba : ndarray of shape (n_samples, n_classes)
Class probabilities.
"""
check_is_fitted(self, "_ebm")
X = check_array(X, dtype=None, ensure_all_finite="allow-nan")
return self._ebm.predict_proba(X)
[docs]
def decision_function(self, X: ArrayLike) -> NDArray:
"""Compute decision function values.
Parameters
----------
X : array-like of shape (n_samples, n_features)
Samples.
Returns
-------
decision : ndarray
Decision function values.
"""
check_is_fitted(self, "_ebm")
X = check_array(X, dtype=None, ensure_all_finite="allow-nan")
return self._ebm.decision_function(X)
[docs]
class EBMRegressor(EBMBase, RegressorMixin):
"""Explainable Boosting Machine for Regression.
An interpretable regressor that combines the accuracy of gradient boosting
with the transparency of Generalized Additive Models (GAMs).
Parameters
----------
feature_names : list of str, optional
Names for features. If None, uses default naming.
feature_types : list of str, optional
Types for features ("continuous", "nominal", "ordinal").
max_bins : int, default=1024
Maximum number of bins for continuous features.
max_interaction_bins : int, default=64
Maximum bins for interaction terms.
interactions : int, float, str, or list, default=10
Number or specification of interaction terms to detect.
exclude : list, optional
Features or interactions to exclude.
validation_size : float, default=0.15
Fraction of data to use for validation during training.
outer_bags : int, default=14
Number of outer bags for ensembling.
inner_bags : int, default=0
Number of inner bags.
learning_rate : float, default=0.015
Learning rate for boosting.
greedy_ratio : float, default=10.0
Ratio controlling greedy vs cyclic feature selection.
cyclic_progress : bool, default=False
If True, use cyclic progress.
smoothing_rounds : int, default=75
Number of smoothing rounds.
interaction_smoothing_rounds : int, default=75
Number of smoothing rounds for interactions.
max_rounds : int, default=50000
Maximum number of boosting rounds.
early_stopping_rounds : int, default=100
Stop if no improvement after this many rounds.
early_stopping_tolerance : float, default=1e-5
Tolerance for early stopping.
min_samples_leaf : int, default=4
Minimum samples in a leaf.
min_hessian : float, default=0.0001
Minimum hessian in a leaf.
reg_alpha : float, default=0.0
L1 regularization.
reg_lambda : float, default=0.0
L2 regularization.
max_delta_step : float, default=0.0
Maximum delta step.
gain_scale : float, default=5.0
Scale factor for gain computation.
min_cat_samples : int, default=10
Minimum samples for categorical bins.
cat_smooth : float, default=10.0
Smoothing for categorical features.
missing : str, default="separate"
How to handle missing values.
max_leaves : int, default=2
Maximum leaves per tree.
monotone_constraints : list, optional
Monotonicity constraints per feature.
n_jobs : int, default=-2
Number of jobs for parallel processing.
random_state : int, default=42
Random state for reproducibility.
Attributes
----------
n_features_in_ : int
Number of features seen during fit.
feature_names_in_ : list of str
Feature names.
feature_types_in_ : list of str
Detected feature types.
intercept_ : float
Model intercept.
term_features_ : list of tuple
Feature indices for each term.
term_scores_ : list of ndarray
Score lookup tables for each term.
Examples
--------
>>> from endgame.models import EBMRegressor
>>> from sklearn.datasets import load_diabetes
>>> X, y = load_diabetes(return_X_y=True)
>>> reg = EBMRegressor(interactions=10)
>>> reg.fit(X, y)
>>> reg.score(X, y)
0.72
>>> importance = reg.get_feature_importances()
"""
def __init__(
self,
feature_names: list[str] | None = None,
feature_types: list[str] | None = None,
max_bins: int = 1024,
max_interaction_bins: int = 64,
interactions: int | float | str | list = 10,
exclude: list | None = None,
validation_size: float = 0.15,
outer_bags: int = 8,
inner_bags: int = 0,
learning_rate: float = 0.02,
greedy_ratio: float = 10.0,
cyclic_progress: bool = False,
smoothing_rounds: int = 50,
interaction_smoothing_rounds: int = 50,
max_rounds: int = 25000,
early_stopping_rounds: int = 50,
early_stopping_tolerance: float = 1e-5,
min_samples_leaf: int = 4,
min_hessian: float = 0.0001,
reg_alpha: float = 0.0,
reg_lambda: float = 0.0,
max_delta_step: float = 0.0,
gain_scale: float = 5.0,
min_cat_samples: int = 10,
cat_smooth: float = 10.0,
missing: str = "separate",
max_leaves: int = 2,
monotone_constraints: list | None = None,
n_jobs: int = -2,
random_state: int | None = 42,
):
super().__init__(
feature_names=feature_names,
feature_types=feature_types,
max_bins=max_bins,
max_interaction_bins=max_interaction_bins,
interactions=interactions,
exclude=exclude,
validation_size=validation_size,
outer_bags=outer_bags,
inner_bags=inner_bags,
learning_rate=learning_rate,
greedy_ratio=greedy_ratio,
cyclic_progress=cyclic_progress,
smoothing_rounds=smoothing_rounds,
interaction_smoothing_rounds=interaction_smoothing_rounds,
max_rounds=max_rounds,
early_stopping_rounds=early_stopping_rounds,
early_stopping_tolerance=early_stopping_tolerance,
min_samples_leaf=min_samples_leaf,
min_hessian=min_hessian,
reg_alpha=reg_alpha,
reg_lambda=reg_lambda,
max_delta_step=max_delta_step,
gain_scale=gain_scale,
min_cat_samples=min_cat_samples,
cat_smooth=cat_smooth,
missing=missing,
max_leaves=max_leaves,
monotone_constraints=monotone_constraints,
n_jobs=n_jobs,
random_state=random_state,
)
[docs]
def fit(
self,
X: ArrayLike,
y: ArrayLike,
sample_weight: ArrayLike | None = None,
) -> EBMRegressor:
"""Fit the EBM regressor.
Parameters
----------
X : array-like of shape (n_samples, n_features)
Training data.
y : array-like of shape (n_samples,)
Target values.
sample_weight : array-like of shape (n_samples,), optional
Sample weights.
Returns
-------
self : EBMRegressor
Fitted regressor.
"""
# Extract feature names from DataFrame before check_X_y converts to array
if self.feature_names is None and hasattr(X, 'columns'):
self.feature_names = list(X.columns)
X, y = check_X_y(X, y, dtype=None, y_numeric=True, ensure_all_finite="allow-nan")
self._ebm = ExplainableBoostingRegressor(**self._get_ebm_params())
self._ebm.fit(X, y, sample_weight=sample_weight)
return self
[docs]
def predict(self, X: ArrayLike) -> NDArray:
"""Predict target values.
Parameters
----------
X : array-like of shape (n_samples, n_features)
Samples to predict.
Returns
-------
y_pred : ndarray of shape (n_samples,)
Predicted values.
"""
check_is_fitted(self, "_ebm")
X = check_array(X, dtype=None, ensure_all_finite="allow-nan")
return self._ebm.predict(X)
[docs]
def show_explanation(explanation, share_graphs: bool = False):
"""Display an EBM explanation in a dashboard.
This is a convenience function that wraps interpret's show() function.
Parameters
----------
explanation : EBMExplanation
Explanation from explain_global() or explain_local().
share_graphs : bool, default=False
If True, link axes across graphs.
Returns
-------
None
Opens an interactive dashboard.
"""
try:
from interpret import show
show(explanation, share_graphs=share_graphs)
except ImportError:
raise ImportError(
"The 'interpret' package is required. "
"Install it with: pip install interpret"
)