Source code for endgame.calibration.scaling

from __future__ import annotations

"""Probability calibration methods.

Methods for calibrating classifier probabilities to be more reliable.
Well-calibrated probabilities satisfy: P(Y=1 | P_pred = p) ≈ p

References
----------
- Platt "Probabilistic Outputs for SVMs" (1999)
- Guo et al. "On Calibration of Modern Neural Networks" (2017)
- Kull et al. "Beta calibration" (2017)
"""


import numpy as np
from scipy import optimize
from scipy.special import expit, logit
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.isotonic import IsotonicRegression



[docs]
class TemperatureScaling(BaseEstimator, TransformerMixin):
    """Temperature scaling for neural network calibration.

    Learns a single temperature parameter T to scale logits:
    calibrated_proba = softmax(logits / T)

    This is a simple but effective method for calibrating neural networks,
    particularly when the model is already reasonably calibrated.

    Parameters
    ----------
    method : str, default='nll'
        Optimization objective:
        - 'nll': Negative log-likelihood (cross-entropy)
        - 'ece': Expected Calibration Error
    max_iter : int, default=100
        Maximum optimization iterations.

    Attributes
    ----------
    temperature_ : float
        Learned temperature parameter.

    Examples
    --------
    >>> ts = TemperatureScaling()
    >>> ts.fit(logits_val, y_val)
    >>> calibrated = ts.transform(logits_test)
    """

    def __init__(
        self,
        method: str = "nll",
        max_iter: int = 100,
    ):
        self.method = method
        self.max_iter = max_iter
        self.temperature_: float = 1.0


[docs]
    def fit(self, logits, y) -> TemperatureScaling:
        """Fit temperature parameter on validation data.

        Parameters
        ----------
        logits : array-like of shape (n_samples, n_classes)
            Raw logits (pre-softmax outputs).
        y : array-like of shape (n_samples,)
            True class labels.

        Returns
        -------
        self
        """
        logits = np.asarray(logits)
        y = np.asarray(y)

        if logits.ndim == 1:
            # Binary classification - convert to 2-class
            logits = np.column_stack([-logits / 2, logits / 2])

        if self.method == "nll":
            # Optimize negative log-likelihood
            def nll(T):
                scaled = logits / T[0]
                log_proba = scaled - np.log(np.sum(np.exp(scaled), axis=1, keepdims=True))
                return -np.mean(log_proba[np.arange(len(y)), y])

            result = optimize.minimize(
                nll,
                x0=[1.0],
                bounds=[(0.01, 10.0)],
                method='L-BFGS-B',
                options={'maxiter': self.max_iter},
            )
            self.temperature_ = result.x[0]

        elif self.method == "ece":
            # Optimize ECE directly
            from endgame.calibration.analysis import expected_calibration_error

            def ece_loss(T):
                scaled = logits / T[0]
                proba = np.exp(scaled) / np.sum(np.exp(scaled), axis=1, keepdims=True)
                return expected_calibration_error(y, proba[:, 1] if proba.shape[1] == 2 else proba)

            result = optimize.minimize(
                ece_loss,
                x0=[1.0],
                bounds=[(0.01, 10.0)],
                method='L-BFGS-B',
                options={'maxiter': self.max_iter},
            )
            self.temperature_ = result.x[0]
        else:
            raise ValueError(f"Unknown method: {self.method}")

        return self



[docs]
    def transform(self, logits) -> np.ndarray:
        """Apply temperature scaling to logits.

        Parameters
        ----------
        logits : array-like
            Raw logits.

        Returns
        -------
        ndarray
            Calibrated probabilities.
        """
        logits = np.asarray(logits)

        if logits.ndim == 1:
            logits = np.column_stack([-logits / 2, logits / 2])

        scaled = logits / self.temperature_
        proba = np.exp(scaled) / np.sum(np.exp(scaled), axis=1, keepdims=True)

        return proba



[docs]
    def fit_transform(self, logits, y) -> np.ndarray:
        """Fit and transform in one step."""
        return self.fit(logits, y).transform(logits)





[docs]
class PlattScaling(BaseEstimator, TransformerMixin):
    """Platt scaling (sigmoid calibration) for binary classification.

    Fits logistic regression: P(y=1|f) = 1 / (1 + exp(A*f + B))

    Parameters
    ----------
    prior_correction : bool, default=True
        Apply prior correction for imbalanced datasets.
        Uses Platt's method with adjusted target probabilities.
    max_iter : int, default=100
        Maximum optimization iterations.

    Attributes
    ----------
    A_ : float
        Learned slope parameter.
    B_ : float
        Learned intercept parameter.

    Examples
    --------
    >>> platt = PlattScaling()
    >>> platt.fit(scores_val, y_val)
    >>> calibrated = platt.transform(scores_test)
    """

    def __init__(
        self,
        prior_correction: bool = True,
        max_iter: int = 100,
    ):
        self.prior_correction = prior_correction
        self.max_iter = max_iter
        self.A_: float = 0.0
        self.B_: float = 0.0


[docs]
    def fit(self, scores, y) -> PlattScaling:
        """Fit Platt scaling parameters.

        Parameters
        ----------
        scores : array-like of shape (n_samples,)
            Raw scores or decision function values.
        y : array-like of shape (n_samples,)
            Binary labels (0 or 1).

        Returns
        -------
        self
        """
        scores = np.asarray(scores).ravel()
        y = np.asarray(y).ravel()

        # Prior correction (Platt's algorithm)
        if self.prior_correction:
            n_pos = np.sum(y == 1)
            n_neg = np.sum(y == 0)
            # Target probabilities with prior correction
            t_pos = (n_pos + 1) / (n_pos + 2)
            t_neg = 1 / (n_neg + 2)
            targets = np.where(y == 1, t_pos, t_neg)
        else:
            targets = y.astype(float)

        # Optimize cross-entropy loss
        def neg_log_likelihood(params):
            A, B = params
            p = expit(A * scores + B)
            p = np.clip(p, 1e-10, 1 - 1e-10)
            return -np.mean(targets * np.log(p) + (1 - targets) * np.log(1 - p))

        result = optimize.minimize(
            neg_log_likelihood,
            x0=[0.0, 0.0],
            method='L-BFGS-B',
            options={'maxiter': self.max_iter},
        )

        self.A_, self.B_ = result.x
        return self



[docs]
    def transform(self, scores) -> np.ndarray:
        """Apply Platt scaling.

        Parameters
        ----------
        scores : array-like
            Raw scores.

        Returns
        -------
        ndarray
            Calibrated probabilities.
        """
        scores = np.asarray(scores).ravel()
        return expit(self.A_ * scores + self.B_)



[docs]
    def fit_transform(self, scores, y) -> np.ndarray:
        """Fit and transform."""
        return self.fit(scores, y).transform(scores)





[docs]
class BetaCalibration(BaseEstimator, TransformerMixin):
    """Beta calibration for improved probability estimates.

    More flexible than Platt scaling, handles different miscalibration patterns.

    Fits: calibrated = 1 / (1 + 1/exp(c*log(p/(1-p)) + d*log(p) + e*log(1-p)))

    Can be simplified to three-parameter form:
    calibrated = 1 / (1 + exp(-(a*logit(p) + b)))

    Parameters
    ----------
    parameters : str, default='abm'
        Parameterization:
        - 'abm': Three parameters (a, b, m) - most common
        - 'full': Five parameters (more flexible, may overfit)

    Attributes
    ----------
    a_, b_, m_ : float
        Learned parameters (abm mode).

    References
    ----------
    Kull et al. "Beta calibration: a well-founded and easily implemented
    improvement on logistic calibration for binary classifiers" (2017)

    Examples
    --------
    >>> beta_cal = BetaCalibration()
    >>> beta_cal.fit(proba_val, y_val)
    >>> calibrated = beta_cal.transform(proba_test)
    """

    def __init__(self, parameters: str = "abm"):
        self.parameters = parameters
        self.a_: float = 1.0
        self.b_: float = 0.0
        self.m_: float = 0.5


[docs]
    def fit(self, proba, y) -> BetaCalibration:
        """Fit beta calibration parameters.

        Parameters
        ----------
        proba : array-like
            Predicted probabilities for positive class.
        y : array-like
            Binary labels.

        Returns
        -------
        self
        """
        proba = np.asarray(proba).ravel()
        y = np.asarray(y).ravel()

        # Clip probabilities to avoid numerical issues
        proba = np.clip(proba, 1e-10, 1 - 1e-10)

        # Log-odds transformation
        log_odds = logit(proba)

        # ABM parameterization: sigmoid(a * logit(p) + b) where logit uses base m
        if self.parameters == "abm":
            def neg_log_likelihood(params):
                a, b, m = params
                # Transform to logit with base m
                # logit_m(p) = log(p^m / (1-p)^(1-m)) = m*log(p) - (1-m)*log(1-p)
                logit_m = m * np.log(proba) - (1 - m) * np.log(1 - proba)
                cal = expit(a * logit_m + b)
                cal = np.clip(cal, 1e-10, 1 - 1e-10)
                return -np.mean(y * np.log(cal) + (1 - y) * np.log(1 - cal))

            result = optimize.minimize(
                neg_log_likelihood,
                x0=[1.0, 0.0, 0.5],
                bounds=[(0.01, 100), (-10, 10), (0.01, 0.99)],
                method='L-BFGS-B',
            )
            self.a_, self.b_, self.m_ = result.x

        return self



[docs]
    def transform(self, proba) -> np.ndarray:
        """Apply beta calibration.

        Parameters
        ----------
        proba : array-like
            Predicted probabilities.

        Returns
        -------
        ndarray
            Calibrated probabilities.
        """
        proba = np.asarray(proba).ravel()
        proba = np.clip(proba, 1e-10, 1 - 1e-10)

        logit_m = self.m_ * np.log(proba) - (1 - self.m_) * np.log(1 - proba)
        return expit(self.a_ * logit_m + self.b_)



[docs]
    def fit_transform(self, proba, y) -> np.ndarray:
        """Fit and transform."""
        return self.fit(proba, y).transform(proba)





[docs]
class IsotonicCalibration(BaseEstimator, TransformerMixin):
    """Isotonic regression calibration.

    Non-parametric calibration that preserves ranking.
    Fits a monotonically increasing step function mapping
    predicted probabilities to calibrated probabilities.

    Best for large calibration sets (>1000 samples) where
    the flexibility doesn't lead to overfitting.

    Parameters
    ----------
    out_of_bounds : str, default='clip'
        How to handle predictions outside training range:
        - 'clip': Clip to [min, max] of training range
        - 'nan': Return NaN for out-of-bounds

    Attributes
    ----------
    isotonic_ : IsotonicRegression
        Fitted isotonic regression model.

    Examples
    --------
    >>> iso = IsotonicCalibration()
    >>> iso.fit(proba_val, y_val)
    >>> calibrated = iso.transform(proba_test)
    """

    def __init__(self, out_of_bounds: str = "clip"):
        self.out_of_bounds = out_of_bounds
        self.isotonic_: IsotonicRegression | None = None


[docs]
    def fit(self, proba, y) -> IsotonicCalibration:
        """Fit isotonic regression.

        Parameters
        ----------
        proba : array-like
            Predicted probabilities.
        y : array-like
            Binary labels.

        Returns
        -------
        self
        """
        proba = np.asarray(proba).ravel()
        y = np.asarray(y).ravel()

        self.isotonic_ = IsotonicRegression(
            y_min=0,
            y_max=1,
            out_of_bounds=self.out_of_bounds,
        )
        self.isotonic_.fit(proba, y)

        return self



[docs]
    def transform(self, proba) -> np.ndarray:
        """Apply isotonic calibration.

        Parameters
        ----------
        proba : array-like
            Predicted probabilities.

        Returns
        -------
        ndarray
            Calibrated probabilities.
        """
        if self.isotonic_ is None:
            raise RuntimeError("IsotonicCalibration has not been fitted.")

        proba = np.asarray(proba).ravel()
        return self.isotonic_.transform(proba)



[docs]
    def fit_transform(self, proba, y) -> np.ndarray:
        """Fit and transform."""
        return self.fit(proba, y).transform(proba)





[docs]
class HistogramBinning(BaseEstimator, TransformerMixin):
    """Histogram binning calibration.

    Divides probability space into bins and maps each bin to
    the empirical frequency of positives within that bin.

    Simple and interpretable, but can be unreliable with few samples.

    Parameters
    ----------
    n_bins : int, default=10
        Number of bins.
    strategy : str, default='uniform'
        Binning strategy:
        - 'uniform': Equal-width bins
        - 'quantile': Equal-frequency bins

    Attributes
    ----------
    bin_edges_ : ndarray
        Edges of calibration bins.
    bin_calibrations_ : ndarray
        Calibrated probability for each bin.

    Examples
    --------
    >>> hb = HistogramBinning(n_bins=15)
    >>> hb.fit(proba_val, y_val)
    >>> calibrated = hb.transform(proba_test)
    """

    def __init__(
        self,
        n_bins: int = 10,
        strategy: str = "uniform",
    ):
        self.n_bins = n_bins
        self.strategy = strategy
        self.bin_edges_: np.ndarray | None = None
        self.bin_calibrations_: np.ndarray | None = None


[docs]
    def fit(self, proba, y) -> HistogramBinning:
        """Fit histogram binning.

        Parameters
        ----------
        proba : array-like
            Predicted probabilities.
        y : array-like
            Binary labels.

        Returns
        -------
        self
        """
        proba = np.asarray(proba).ravel()
        y = np.asarray(y).ravel()

        if self.strategy == "uniform":
            self.bin_edges_ = np.linspace(0, 1, self.n_bins + 1)
        elif self.strategy == "quantile":
            self.bin_edges_ = np.percentile(
                proba,
                np.linspace(0, 100, self.n_bins + 1)
            )
            # Ensure edges are unique and sorted
            self.bin_edges_ = np.unique(self.bin_edges_)
        else:
            raise ValueError(f"Unknown strategy: {self.strategy}")

        # Compute calibrated probability for each bin
        self.bin_calibrations_ = np.zeros(len(self.bin_edges_) - 1)

        for i in range(len(self.bin_edges_) - 1):
            mask = (proba >= self.bin_edges_[i]) & (proba < self.bin_edges_[i + 1])
            if i == len(self.bin_edges_) - 2:
                # Include right edge for last bin
                mask = mask | (proba == self.bin_edges_[i + 1])

            if np.sum(mask) > 0:
                self.bin_calibrations_[i] = np.mean(y[mask])
            else:
                # No samples in bin - use bin midpoint
                self.bin_calibrations_[i] = (self.bin_edges_[i] + self.bin_edges_[i + 1]) / 2

        return self



[docs]
    def transform(self, proba) -> np.ndarray:
        """Apply histogram binning calibration.

        Parameters
        ----------
        proba : array-like
            Predicted probabilities.

        Returns
        -------
        ndarray
            Calibrated probabilities.
        """
        if self.bin_edges_ is None:
            raise RuntimeError("HistogramBinning has not been fitted.")

        proba = np.asarray(proba).ravel()

        # Find bin for each probability
        bin_indices = np.digitize(proba, self.bin_edges_) - 1
        bin_indices = np.clip(bin_indices, 0, len(self.bin_calibrations_) - 1)

        return self.bin_calibrations_[bin_indices]



[docs]
    def fit_transform(self, proba, y) -> np.ndarray:
        """Fit and transform."""
        return self.fit(proba, y).transform(proba)