from __future__ import annotations
"""Gaussian Process models with competition-tuned defaults.
Gaussian Processes provide Bayesian inference with kernel methods,
offering principled uncertainty quantification and different error
patterns from tree-based and neural network models.
References
----------
- Rasmussen & Williams, "Gaussian Processes for Machine Learning" (2006)
- sklearn.gaussian_process documentation
"""
from typing import Any
import numpy as np
from sklearn.base import BaseEstimator, ClassifierMixin, RegressorMixin
from sklearn.gaussian_process import GaussianProcessClassifier as _GPClassifier
from sklearn.gaussian_process import GaussianProcessRegressor as _GPRegressor
from sklearn.gaussian_process.kernels import (
RBF,
ConstantKernel,
DotProduct,
Matern,
RationalQuadratic,
WhiteKernel,
)
from sklearn.preprocessing import LabelEncoder, StandardScaler
# Kernel presets for different problem types
KERNEL_PRESETS = {
"rbf": lambda length_scale: ConstantKernel(1.0) * RBF(length_scale=length_scale),
"matern": lambda length_scale: ConstantKernel(1.0) * Matern(length_scale=length_scale, nu=2.5),
"matern12": lambda length_scale: ConstantKernel(1.0) * Matern(length_scale=length_scale, nu=0.5),
"matern32": lambda length_scale: ConstantKernel(1.0) * Matern(length_scale=length_scale, nu=1.5),
"matern52": lambda length_scale: ConstantKernel(1.0) * Matern(length_scale=length_scale, nu=2.5),
"rq": lambda length_scale: ConstantKernel(1.0) * RationalQuadratic(length_scale=length_scale),
"linear": lambda length_scale: ConstantKernel(1.0) * DotProduct(sigma_0=1.0),
}
[docs]
class GPClassifier(ClassifierMixin, BaseEstimator):
"""Gaussian Process Classifier with competition-tuned defaults.
A Bayesian kernel method that provides probabilistic predictions with
principled uncertainty estimates. Different inductive bias from trees
and neural networks, making it valuable for ensemble diversity.
Parameters
----------
kernel : str or sklearn kernel, default='rbf'
Kernel type. Options: 'rbf', 'matern', 'matern12', 'matern32',
'matern52', 'rq', 'linear', or a sklearn kernel object.
length_scale : float, default=1.0
Length scale parameter for the kernel.
n_restarts_optimizer : int, default=3
Number of restarts for the optimizer.
max_iter_predict : int, default=100
Maximum iterations for prediction.
warm_start : bool, default=False
Use previous fit as initialization.
multi_class : str, default='one_vs_rest'
Multi-class strategy: 'one_vs_rest' or 'one_vs_one'.
auto_scale : bool, default=True
Automatically scale features before fitting.
random_state : int, optional
Random seed for reproducibility.
Attributes
----------
classes_ : ndarray
Unique class labels.
n_features_in_ : int
Number of features.
model_ : GaussianProcessClassifier
Fitted sklearn GP classifier.
Examples
--------
>>> from endgame.models.kernel import GPClassifier
>>> clf = GPClassifier(kernel='rbf', random_state=42)
>>> clf.fit(X_train, y_train)
>>> proba = clf.predict_proba(X_test)
>>> # Get uncertainty
>>> proba, std = clf.predict_proba(X_test, return_std=True)
Notes
-----
Gaussian Processes excel on small-medium datasets where uncertainty
matters. They scale O(n^3) with training size, so not suitable for
large datasets (>10k samples) without approximations.
"""
_estimator_type = "classifier"
def __init__(
self,
kernel: str | Any = "rbf",
length_scale: float = 1.0,
n_restarts_optimizer: int = 3,
max_iter_predict: int = 100,
warm_start: bool = False,
multi_class: str = "one_vs_rest",
auto_scale: bool = True,
random_state: int | None = None,
):
self.kernel = kernel
self.length_scale = length_scale
self.n_restarts_optimizer = n_restarts_optimizer
self.max_iter_predict = max_iter_predict
self.warm_start = warm_start
self.multi_class = multi_class
self.auto_scale = auto_scale
self.random_state = random_state
self.classes_: np.ndarray | None = None
self.n_classes_: int = 0
self.n_features_in_: int = 0
self.model_: _GPClassifier | None = None
self._scaler: StandardScaler | None = None
self._label_encoder: LabelEncoder | None = None
self._is_fitted: bool = False
def _get_kernel(self):
"""Get kernel object from string or return as-is."""
if isinstance(self.kernel, str):
if self.kernel not in KERNEL_PRESETS:
raise ValueError(f"Unknown kernel: {self.kernel}. "
f"Options: {list(KERNEL_PRESETS.keys())}")
return KERNEL_PRESETS[self.kernel](self.length_scale)
return self.kernel
[docs]
def fit(self, X, y, **fit_params) -> GPClassifier:
"""Fit the Gaussian Process classifier.
Parameters
----------
X : array-like of shape (n_samples, n_features)
Training features.
y : array-like of shape (n_samples,)
Target labels.
Returns
-------
self
"""
X = np.asarray(X, dtype=np.float64)
y = np.asarray(y)
self.n_features_in_ = X.shape[1]
# Encode labels
self._label_encoder = LabelEncoder()
y_encoded = self._label_encoder.fit_transform(y)
self.classes_ = self._label_encoder.classes_
self.n_classes_ = len(self.classes_)
# Scale features
if self.auto_scale:
self._scaler = StandardScaler()
X_scaled = self._scaler.fit_transform(X)
else:
X_scaled = X
# Handle NaN
X_scaled = np.nan_to_num(X_scaled, nan=0.0)
# Create and fit model
kernel = self._get_kernel()
self.model_ = _GPClassifier(
kernel=kernel,
n_restarts_optimizer=self.n_restarts_optimizer,
max_iter_predict=self.max_iter_predict,
warm_start=self.warm_start,
multi_class=self.multi_class,
random_state=self.random_state,
)
self.model_.fit(X_scaled, y_encoded)
self._is_fitted = True
return self
[docs]
def predict(self, X) -> np.ndarray:
"""Predict class labels.
Parameters
----------
X : array-like of shape (n_samples, n_features)
Samples to predict.
Returns
-------
y_pred : ndarray of shape (n_samples,)
Predicted class labels.
"""
if not self._is_fitted:
raise RuntimeError("GPClassifier has not been fitted.")
X = np.asarray(X, dtype=np.float64)
if self.auto_scale:
X_scaled = self._scaler.transform(X)
else:
X_scaled = X
X_scaled = np.nan_to_num(X_scaled, nan=0.0)
y_pred = self.model_.predict(X_scaled)
return self._label_encoder.inverse_transform(y_pred)
[docs]
def predict_proba(self, X, return_std: bool = False) -> np.ndarray | tuple:
"""Predict class probabilities.
Parameters
----------
X : array-like of shape (n_samples, n_features)
Samples to predict.
return_std : bool, default=False
If True, also return uncertainty estimates.
Returns
-------
proba : ndarray of shape (n_samples, n_classes)
Class probabilities.
std : ndarray of shape (n_samples,), optional
Uncertainty estimates (if return_std=True).
"""
if not self._is_fitted:
raise RuntimeError("GPClassifier has not been fitted.")
X = np.asarray(X, dtype=np.float64)
if self.auto_scale:
X_scaled = self._scaler.transform(X)
else:
X_scaled = X
X_scaled = np.nan_to_num(X_scaled, nan=0.0)
proba = self.model_.predict_proba(X_scaled)
if return_std:
# Estimate uncertainty from entropy of predictions
entropy = -np.sum(proba * np.log(proba + 1e-10), axis=1)
max_entropy = np.log(self.n_classes_)
std = entropy / max_entropy # Normalized uncertainty
return proba, std
return proba
[docs]
class GPRegressor(RegressorMixin, BaseEstimator):
"""Gaussian Process Regressor with competition-tuned defaults.
A Bayesian kernel method that provides predictions with principled
uncertainty estimates through the posterior predictive distribution.
Parameters
----------
kernel : str or sklearn kernel, default='rbf'
Kernel type. Options: 'rbf', 'matern', 'matern12', 'matern32',
'matern52', 'rq', 'linear', or a sklearn kernel object.
length_scale : float, default=1.0
Length scale parameter for the kernel.
alpha : float, default=1e-10
Value added to diagonal for numerical stability.
n_restarts_optimizer : int, default=3
Number of restarts for the optimizer.
normalize_y : bool, default=True
Normalize target values.
auto_scale : bool, default=True
Automatically scale features before fitting.
random_state : int, optional
Random seed for reproducibility.
Attributes
----------
n_features_in_ : int
Number of features.
model_ : GaussianProcessRegressor
Fitted sklearn GP regressor.
Examples
--------
>>> from endgame.models.kernel import GPRegressor
>>> reg = GPRegressor(kernel='matern', random_state=42)
>>> reg.fit(X_train, y_train)
>>> y_pred, y_std = reg.predict(X_test, return_std=True)
>>> # Prediction intervals
>>> lower = y_pred - 1.96 * y_std
>>> upper = y_pred + 1.96 * y_std
"""
_estimator_type = "regressor"
def __init__(
self,
kernel: str | Any = "rbf",
length_scale: float = 1.0,
alpha: float = 1e-10,
n_restarts_optimizer: int = 3,
normalize_y: bool = True,
auto_scale: bool = True,
random_state: int | None = None,
):
self.kernel = kernel
self.length_scale = length_scale
self.alpha = alpha
self.n_restarts_optimizer = n_restarts_optimizer
self.normalize_y = normalize_y
self.auto_scale = auto_scale
self.random_state = random_state
self.n_features_in_: int = 0
self.model_: _GPRegressor | None = None
self._scaler: StandardScaler | None = None
self._is_fitted: bool = False
def _get_kernel(self):
"""Get kernel object from string or return as-is."""
if isinstance(self.kernel, str):
if self.kernel not in KERNEL_PRESETS:
raise ValueError(f"Unknown kernel: {self.kernel}. "
f"Options: {list(KERNEL_PRESETS.keys())}")
# Add white noise kernel for regression
base_kernel = KERNEL_PRESETS[self.kernel](self.length_scale)
return base_kernel + WhiteKernel(noise_level=0.1)
return self.kernel
[docs]
def fit(self, X, y, **fit_params) -> GPRegressor:
"""Fit the Gaussian Process regressor.
Parameters
----------
X : array-like of shape (n_samples, n_features)
Training features.
y : array-like of shape (n_samples,)
Target values.
Returns
-------
self
"""
X = np.asarray(X, dtype=np.float64)
y = np.asarray(y, dtype=np.float64)
self.n_features_in_ = X.shape[1]
# Scale features
if self.auto_scale:
self._scaler = StandardScaler()
X_scaled = self._scaler.fit_transform(X)
else:
X_scaled = X
# Handle NaN
X_scaled = np.nan_to_num(X_scaled, nan=0.0)
y = np.nan_to_num(y, nan=0.0)
# Create and fit model
kernel = self._get_kernel()
self.model_ = _GPRegressor(
kernel=kernel,
alpha=self.alpha,
n_restarts_optimizer=self.n_restarts_optimizer,
normalize_y=self.normalize_y,
random_state=self.random_state,
)
self.model_.fit(X_scaled, y)
self._is_fitted = True
return self
[docs]
def predict(self, X, return_std: bool = False, return_cov: bool = False):
"""Predict target values.
Parameters
----------
X : array-like of shape (n_samples, n_features)
Samples to predict.
return_std : bool, default=False
If True, return standard deviation of predictions.
return_cov : bool, default=False
If True, return covariance of predictions.
Returns
-------
y_pred : ndarray of shape (n_samples,)
Predicted values.
y_std : ndarray of shape (n_samples,), optional
Standard deviation (if return_std=True).
y_cov : ndarray of shape (n_samples, n_samples), optional
Covariance matrix (if return_cov=True).
"""
if not self._is_fitted:
raise RuntimeError("GPRegressor has not been fitted.")
X = np.asarray(X, dtype=np.float64)
if self.auto_scale:
X_scaled = self._scaler.transform(X)
else:
X_scaled = X
X_scaled = np.nan_to_num(X_scaled, nan=0.0)
return self.model_.predict(X_scaled, return_std=return_std, return_cov=return_cov)
[docs]
def predict_interval(self, X, alpha: float = 0.05) -> tuple:
"""Predict with prediction intervals.
Parameters
----------
X : array-like of shape (n_samples, n_features)
Samples to predict.
alpha : float, default=0.05
Significance level (0.05 = 95% interval).
Returns
-------
y_pred : ndarray of shape (n_samples,)
Point predictions.
lower : ndarray of shape (n_samples,)
Lower bound of prediction interval.
upper : ndarray of shape (n_samples,)
Upper bound of prediction interval.
"""
from scipy import stats
y_pred, y_std = self.predict(X, return_std=True)
z = stats.norm.ppf(1 - alpha / 2)
lower = y_pred - z * y_std
upper = y_pred + z * y_std
return y_pred, lower, upper
[docs]
def sample_y(self, X, n_samples: int = 1, random_state: int | None = None) -> np.ndarray:
"""Sample from the posterior predictive distribution.
Parameters
----------
X : array-like of shape (n_samples, n_features)
Query points.
n_samples : int, default=1
Number of samples to draw.
random_state : int, optional
Random seed.
Returns
-------
samples : ndarray of shape (n_query, n_samples)
Samples from posterior predictive.
"""
if not self._is_fitted:
raise RuntimeError("GPRegressor has not been fitted.")
X = np.asarray(X, dtype=np.float64)
if self.auto_scale:
X_scaled = self._scaler.transform(X)
else:
X_scaled = X
X_scaled = np.nan_to_num(X_scaled, nan=0.0)
return self.model_.sample_y(X_scaled, n_samples=n_samples, random_state=random_state)