Source code for endgame.models.baselines.linear

from __future__ import annotations

"""Linear models for classification and regression.

Linear models provide a fundamentally different inductive bias from
tree-based and neural network models:
- Global linear decision boundaries
- Strong regularization prevents overfitting
- Fast training and inference
- Feature importance via coefficients

These characteristics make linear models valuable for ensemble diversity.

References
----------
- Ridge: Hoerl & Kennard, "Ridge Regression: Biased Estimation" (1970)
- Logistic: Cox, "The Regression Analysis of Binary Sequences" (1958)
- sklearn.linear_model documentation
"""

from typing import Any, Literal

import numpy as np
from sklearn.base import BaseEstimator, ClassifierMixin, RegressorMixin
from sklearn.linear_model import (
    ElasticNet,
    Lasso,
    LogisticRegression,
    Ridge,
)
from sklearn.preprocessing import LabelEncoder, StandardScaler


[docs] class LinearClassifier(ClassifierMixin, BaseEstimator): """Linear Classifier with competition-tuned defaults. Wraps LogisticRegression with automatic feature scaling and sensible defaults for competitive ML. Supports both L1, L2, and ElasticNet regularization. Parameters ---------- penalty : str, default='l2' Regularization: 'l1', 'l2', 'elasticnet', or 'none'. C : float, default=1.0 Inverse of regularization strength. Smaller values = stronger regularization. l1_ratio : float, default=0.5 ElasticNet mixing parameter (only used when penalty='elasticnet'). solver : str, default='lbfgs' Optimization algorithm. 'saga' required for L1/ElasticNet. max_iter : int, default=1000 Maximum iterations for solver. class_weight : str or dict, default='balanced' Class weights: 'balanced' adjusts for class imbalance. scale_features : bool, default=True Whether to standardize features before fitting. n_jobs : int, default=-1 Number of parallel jobs. random_state : int, optional Random seed for reproducibility. Attributes ---------- classes_ : ndarray Unique class labels. n_features_in_ : int Number of features. coef_ : ndarray Feature coefficients. intercept_ : ndarray Intercept term. Examples -------- >>> from endgame.models.baselines import LinearClassifier >>> clf = LinearClassifier(penalty='l2', C=1.0) >>> clf.fit(X_train, y_train) >>> proba = clf.predict_proba(X_test) Notes ----- Linear classifiers are different from tree-based models because: 1. Global decision boundary - same coefficients for all regions 2. Monotonic feature relationships 3. Implicit feature selection with L1 penalty 4. Well-calibrated probabilities (especially with Platt scaling) The class_weight='balanced' default helps with imbalanced datasets. """ _estimator_type = "classifier" def __init__( self, penalty: Literal["l1", "l2", "elasticnet", "none"] = "l2", C: float = 1.0, l1_ratio: float = 0.5, solver: str = "lbfgs", max_iter: int = 1000, class_weight: str | dict | None = "balanced", scale_features: bool = True, n_jobs: int = -1, random_state: int | None = None, ): self.penalty = penalty self.C = C self.l1_ratio = l1_ratio self.solver = solver self.max_iter = max_iter self.class_weight = class_weight self.scale_features = scale_features self.n_jobs = n_jobs self.random_state = random_state self.classes_: np.ndarray | None = None self.n_classes_: int = 0 self.n_features_in_: int = 0 self.model_: LogisticRegression | None = None self._scaler: StandardScaler | None = None self._label_encoder: LabelEncoder | None = None self._is_fitted: bool = False
[docs] def fit(self, X, y, sample_weight=None, **fit_params) -> LinearClassifier: """Fit the linear classifier. Parameters ---------- X : array-like of shape (n_samples, n_features) Training features. y : array-like of shape (n_samples,) Target labels. sample_weight : array-like, optional Sample weights. Returns ------- self """ X = np.asarray(X, dtype=np.float64) y = np.asarray(y) self.n_features_in_ = X.shape[1] # Encode labels self._label_encoder = LabelEncoder() y_encoded = self._label_encoder.fit_transform(y) self.classes_ = self._label_encoder.classes_ self.n_classes_ = len(self.classes_) # Handle NaN X_clean = np.nan_to_num(X, nan=0.0) # Scale features if self.scale_features: self._scaler = StandardScaler() X_scaled = self._scaler.fit_transform(X_clean) else: X_scaled = X_clean # Determine solver based on penalty solver = self.solver if self.penalty in ("l1", "elasticnet") and solver not in ("saga", "liblinear"): solver = "saga" # Handle penalty=None for sklearn compatibility penalty = self.penalty if self.penalty != "none" else None # Create and fit model self.model_ = LogisticRegression( penalty=penalty, C=self.C, l1_ratio=self.l1_ratio if self.penalty == "elasticnet" else None, solver=solver, max_iter=self.max_iter, class_weight=self.class_weight, n_jobs=self.n_jobs, random_state=self.random_state, ) self.model_.fit(X_scaled, y_encoded, sample_weight=sample_weight) self._is_fitted = True return self
def _preprocess(self, X) -> np.ndarray: """Preprocess features for prediction.""" X = np.asarray(X, dtype=np.float64) X_clean = np.nan_to_num(X, nan=0.0) if self.scale_features and self._scaler is not None: return self._scaler.transform(X_clean) return X_clean
[docs] def predict(self, X) -> np.ndarray: """Predict class labels.""" if not self._is_fitted: raise RuntimeError("LinearClassifier has not been fitted.") X_proc = self._preprocess(X) y_pred = self.model_.predict(X_proc) return self._label_encoder.inverse_transform(y_pred)
[docs] def predict_proba(self, X) -> np.ndarray: """Predict class probabilities.""" if not self._is_fitted: raise RuntimeError("LinearClassifier has not been fitted.") X_proc = self._preprocess(X) return self.model_.predict_proba(X_proc)
[docs] def predict_log_proba(self, X) -> np.ndarray: """Predict log class probabilities.""" if not self._is_fitted: raise RuntimeError("LinearClassifier has not been fitted.") X_proc = self._preprocess(X) return self.model_.predict_log_proba(X_proc)
[docs] def decision_function(self, X) -> np.ndarray: """Compute decision function.""" if not self._is_fitted: raise RuntimeError("LinearClassifier has not been fitted.") X_proc = self._preprocess(X) return self.model_.decision_function(X_proc)
@property def coef_(self): """Feature coefficients.""" if not self._is_fitted: raise RuntimeError("LinearClassifier has not been fitted.") return self.model_.coef_ @property def intercept_(self): """Intercept term.""" if not self._is_fitted: raise RuntimeError("LinearClassifier has not been fitted.") return self.model_.intercept_ @property def feature_importances_(self) -> np.ndarray: """Feature importances (absolute value of coefficients).""" if not self._is_fitted: raise RuntimeError("LinearClassifier has not been fitted.") # Average absolute coefficients across classes for multiclass return np.mean(np.abs(self.model_.coef_), axis=0)
[docs] class LinearRegressor(RegressorMixin, BaseEstimator): """Linear Regressor with competition-tuned defaults. Wraps Ridge/Lasso/ElasticNet with automatic feature scaling and sensible defaults for competitive ML. Parameters ---------- penalty : str, default='l2' Regularization: 'l1' (Lasso), 'l2' (Ridge), 'elasticnet'. alpha : float, default=1.0 Regularization strength. Larger values = stronger regularization. l1_ratio : float, default=0.5 ElasticNet mixing parameter (only used when penalty='elasticnet'). max_iter : int, default=1000 Maximum iterations for solver (only for L1/ElasticNet). scale_features : bool, default=True Whether to standardize features before fitting. random_state : int, optional Random seed for reproducibility. Attributes ---------- n_features_in_ : int Number of features. coef_ : ndarray Feature coefficients. intercept_ : float Intercept term. Examples -------- >>> from endgame.models.baselines import LinearRegressor >>> reg = LinearRegressor(penalty='l2', alpha=1.0) >>> reg.fit(X_train, y_train) >>> predictions = reg.predict(X_test) Notes ----- Linear regression provides: 1. Interpretable coefficients 2. Fast training and inference 3. L1 penalty for feature selection 4. L2 penalty for multicollinearity """ _estimator_type = "regressor" def __init__( self, penalty: Literal["l1", "l2", "elasticnet"] = "l2", alpha: float = 1.0, l1_ratio: float = 0.5, max_iter: int = 1000, scale_features: bool = True, random_state: int | None = None, ): self.penalty = penalty self.alpha = alpha self.l1_ratio = l1_ratio self.max_iter = max_iter self.scale_features = scale_features self.random_state = random_state self.n_features_in_: int = 0 self.model_: Any | None = None self._scaler: StandardScaler | None = None self._is_fitted: bool = False
[docs] def fit(self, X, y, sample_weight=None, **fit_params) -> LinearRegressor: """Fit the linear regressor. Parameters ---------- X : array-like of shape (n_samples, n_features) Training features. y : array-like of shape (n_samples,) Target values. sample_weight : array-like, optional Sample weights. Returns ------- self """ X = np.asarray(X, dtype=np.float64) y = np.asarray(y, dtype=np.float64) self.n_features_in_ = X.shape[1] # Handle NaN X_clean = np.nan_to_num(X, nan=0.0) y_clean = np.nan_to_num(y, nan=0.0) # Scale features if self.scale_features: self._scaler = StandardScaler() X_scaled = self._scaler.fit_transform(X_clean) else: X_scaled = X_clean # Create model based on penalty if self.penalty == "l2": self.model_ = Ridge( alpha=self.alpha, random_state=self.random_state, ) elif self.penalty == "l1": self.model_ = Lasso( alpha=self.alpha, max_iter=self.max_iter, random_state=self.random_state, ) else: # elasticnet self.model_ = ElasticNet( alpha=self.alpha, l1_ratio=self.l1_ratio, max_iter=self.max_iter, random_state=self.random_state, ) # Fit model (sample_weight only supported by Ridge) if self.penalty == "l2" and sample_weight is not None: self.model_.fit(X_scaled, y_clean, sample_weight=sample_weight) else: self.model_.fit(X_scaled, y_clean) self._is_fitted = True return self
def _preprocess(self, X) -> np.ndarray: """Preprocess features for prediction.""" X = np.asarray(X, dtype=np.float64) X_clean = np.nan_to_num(X, nan=0.0) if self.scale_features and self._scaler is not None: return self._scaler.transform(X_clean) return X_clean
[docs] def predict(self, X) -> np.ndarray: """Predict target values.""" if not self._is_fitted: raise RuntimeError("LinearRegressor has not been fitted.") X_proc = self._preprocess(X) return self.model_.predict(X_proc)
@property def coef_(self): """Feature coefficients.""" if not self._is_fitted: raise RuntimeError("LinearRegressor has not been fitted.") return self.model_.coef_ @property def intercept_(self): """Intercept term.""" if not self._is_fitted: raise RuntimeError("LinearRegressor has not been fitted.") return self.model_.intercept_ @property def feature_importances_(self) -> np.ndarray: """Feature importances (absolute value of coefficients).""" if not self._is_fitted: raise RuntimeError("LinearRegressor has not been fitted.") return np.abs(self.model_.coef_)