Source code for endgame.quick.api

from __future__ import annotations

"""Quick API implementation for rapid prototyping.

Provides one-line model training with automatic preprocessing,
cross-validation, and model selection.
"""

from dataclasses import dataclass, field
from typing import TYPE_CHECKING, Any, Literal

import numpy as np
import pandas as pd

if TYPE_CHECKING:
    from endgame.tracking.base import ExperimentLogger
from sklearn.metrics import (
    accuracy_score,
    f1_score,
    mean_squared_error,
    r2_score,
    roc_auc_score,
)
from sklearn.model_selection import KFold, StratifiedKFold
from sklearn.preprocessing import LabelEncoder

PresetName = Literal["fast", "default", "competition", "interpretable"]
TaskType = Literal["classification", "regression"]


# =============================================================================
# Preset Configurations
# =============================================================================

PRESETS: dict[str, dict[str, Any]] = {
    "fast": {
        "description": "Quick iteration with minimal models (~1 min)",
        "models": ["lgbm_fast", "linear"],
        "cv_folds": 3,
        "early_stopping": 50,
        "n_estimators": 500,
    },
    "default": {
        "description": "Balanced speed and accuracy (~5 min)",
        "models": ["lgbm", "xgb", "catboost", "linear"],
        "cv_folds": 5,
        "early_stopping": 100,
        "n_estimators": 2000,
    },
    "competition": {
        "description": "Full competitive pipeline (~30 min)",
        "models": [
            "lgbm",
            "xgb",
            "catboost",
            "linear",
            "knn",
            "elm",
            "rotation_forest",
        ],
        "cv_folds": 5,
        "early_stopping": 100,
        "n_estimators": 5000,
    },
    "interpretable": {
        "description": "Only interpretable models",
        "models": ["linear", "ebm", "nam"],
        "cv_folds": 5,
        "early_stopping": 100,
        "n_estimators": 2000,
    },
}


# =============================================================================
# Model Registry
# =============================================================================


def _extract_feature_names(X) -> list[str] | None:
    """Extract feature names from X if available."""
    if isinstance(X, pd.DataFrame):
        return X.columns.tolist()
    if hasattr(X, "columns"):
        return list(X.columns)
    return None


def _build_feature_importances(
    model, feature_names: list[str] | None
) -> dict[str, float]:
    """Build feature importance dict using real feature names when available."""
    if not hasattr(model, "feature_importances_"):
        return {}
    fi = model.feature_importances_
    if isinstance(fi, dict):
        return fi
    names = feature_names or [f"f{i}" for i in range(len(fi))]
    return {names[i]: v for i, v in enumerate(fi)}


_TABPFN25_MAX_SAMPLES = 50_000


def _maybe_prepend_tabpfn25(
    models: list[str], n_samples: int
) -> list[str]:
    """Prepend TabPFN v2.5 to the model list when the dataset fits."""
    if n_samples <= _TABPFN25_MAX_SAMPLES and "tabpfn25" not in models:
        return ["tabpfn25"] + models
    return models


def _get_model(model_key: str, task: TaskType, preset_config: dict[str, Any]):
    """Get a model instance by key."""
    # Import here to avoid circular imports
    from endgame.models.baselines import (
        ELMClassifier,
        ELMRegressor,
        KNNClassifier,
        KNNRegressor,
        LinearClassifier,
        LinearRegressor,
    )
    from endgame.models.wrappers import CatBoostWrapper, LGBMWrapper, XGBWrapper

    n_estimators = preset_config.get("n_estimators", 2000)
    early_stopping = preset_config.get("early_stopping", 100)

    if model_key == "lgbm":
        return LGBMWrapper(
            preset="endgame",
            early_stopping_rounds=early_stopping,
            n_estimators=n_estimators,
        )
    elif model_key == "lgbm_fast":
        return LGBMWrapper(
            preset="fast",
            early_stopping_rounds=early_stopping,
        )
    elif model_key == "xgb":
        return XGBWrapper(
            preset="endgame",
            early_stopping_rounds=early_stopping,
            n_estimators=n_estimators,
        )
    elif model_key == "catboost":
        return CatBoostWrapper(
            preset="endgame",
            early_stopping_rounds=early_stopping,
            iterations=n_estimators,
        )
    elif model_key == "linear":
        if task == "classification":
            return LinearClassifier()
        return LinearRegressor()
    elif model_key == "knn":
        if task == "classification":
            return KNNClassifier()
        return KNNRegressor()
    elif model_key == "elm":
        if task == "classification":
            return ELMClassifier()
        return ELMRegressor()
    elif model_key == "rotation_forest":
        from endgame.models.trees import RotationForestClassifier, RotationForestRegressor

        if task == "classification":
            return RotationForestClassifier(n_estimators=100)
        return RotationForestRegressor(n_estimators=100)
    elif model_key == "ebm":
        from endgame.models.ebm import EBMClassifier, EBMRegressor

        if task == "classification":
            return EBMClassifier()
        return EBMRegressor()
    elif model_key == "nam":
        # NAM is classification-only for now
        from endgame.models.tabular.nam import NAMClassifier, NAMRegressor

        if task == "classification":
            return NAMClassifier()
        return NAMRegressor()
    elif model_key == "tabpfn25":
        from endgame.models.tabular.tabpfn import TabPFN25Classifier, TabPFN25Regressor

        if task == "classification":
            return TabPFN25Classifier()
        return TabPFN25Regressor()
    else:
        raise ValueError(f"Unknown model key: {model_key}")


# =============================================================================
# Result Classes
# =============================================================================


[docs] @dataclass class QuickResult: """Result from quick.classify() or quick.regress(). Attributes ---------- model : Any The fitted model. oof_predictions : ndarray Out-of-fold predictions. cv_score : float Cross-validation score. metric : str Metric used for scoring. feature_importances : Dict[str, float] Feature importance dictionary (if available). """ model: Any oof_predictions: np.ndarray cv_score: float metric: str feature_importances: dict[str, float] = field(default_factory=dict) def __repr__(self) -> str: return f"QuickResult(cv_score={self.cv_score:.4f}, metric='{self.metric}')"
@dataclass class ModelResult: """Result for a single model in comparison.""" name: str model: Any oof_predictions: np.ndarray cv_score: float fit_time: float
[docs] @dataclass class ComparisonResult: """Result from quick.compare(). Attributes ---------- results : List[ModelResult] Results for each model, sorted by score. best_model : Any The best performing model. leaderboard : List[Dict] Leaderboard with model names and scores. metric : str Metric used for scoring. """ results: list[ModelResult] best_model: Any leaderboard: list[dict[str, Any]] metric: str def __repr__(self) -> str: lines = ["ComparisonResult:"] for i, entry in enumerate(self.leaderboard[:5]): lines.append(f" {i+1}. {entry['model']}: {entry['score']:.4f}") if len(self.leaderboard) > 5: lines.append(f" ... and {len(self.leaderboard) - 5} more") return "\n".join(lines)
# ============================================================================= # Main API Functions # =============================================================================
[docs] def classify( X, y, preset: PresetName = "default", metric: str = "roc_auc", cv_folds: int | None = None, random_state: int = 42, verbose: bool = True, logger: ExperimentLogger | None = None, ) -> QuickResult: """Quick classification with automatic model selection. Parameters ---------- X : array-like Training features. y : array-like Target labels. preset : str, default='default' Preset configuration: 'fast', 'default', 'competition', 'interpretable'. metric : str, default='roc_auc' Scoring metric: 'roc_auc', 'accuracy', 'f1'. cv_folds : int, optional Number of CV folds. If None, uses preset default. random_state : int, default=42 Random seed. verbose : bool, default=True Whether to print progress. logger : ExperimentLogger, optional Experiment logger for tracking params and metrics. Returns ------- QuickResult Result containing model, OOF predictions, and CV score. Examples -------- >>> import endgame as eg >>> result = eg.quick.classify(X, y) >>> print(f"CV Score: {result.cv_score:.4f}") >>> predictions = result.model.predict(X_test) """ feature_names = _extract_feature_names(X) if isinstance(X, pd.DataFrame): X = X.copy() else: X = np.asarray(X) y = np.asarray(y) # Encode labels le = LabelEncoder() y_encoded = le.fit_transform(y) n_classes = len(le.classes_) # Get preset config preset_config = PRESETS[preset].copy() n_folds = cv_folds or preset_config["cv_folds"] # Use first model from preset; prepend TabPFN v2.5 for small datasets models = _maybe_prepend_tabpfn25(preset_config["models"], len(X)) model_key = models[0] if verbose: print(f"Training {model_key} with {preset} preset...") # Log params if logger provided if logger is not None: logger.log_params({ "task": "classification", "model": model_key, "preset": preset, "metric": metric, "cv_folds": n_folds, "n_samples": len(X), "n_features": X.shape[1], "n_classes": n_classes, }) # Get model model = _get_model(model_key, "classification", preset_config) # Cross-validation cv = StratifiedKFold(n_splits=n_folds, shuffle=True, random_state=random_state) # Generate OOF predictions if n_classes == 2: oof_preds = np.zeros(len(y)) else: oof_preds = np.zeros((len(y), n_classes)) # Use iloc for DataFrames, integer indexing for arrays for fold_idx, (train_idx, val_idx) in enumerate(cv.split(X, y_encoded)): if isinstance(X, pd.DataFrame): X_train, X_val = X.iloc[train_idx], X.iloc[val_idx] else: X_train, X_val = X[train_idx], X[val_idx] y_train, y_val = y_encoded[train_idx], y_encoded[val_idx] # Fit with early stopping if available if hasattr(model, "early_stopping_rounds"): model.fit(X_train, y_train, eval_set=[(X_val, y_val)]) else: model.fit(X_train, y_train) # Predict if hasattr(model, "predict_proba"): proba = model.predict_proba(X_val) if n_classes == 2: oof_preds[val_idx] = proba[:, 1] else: oof_preds[val_idx] = proba else: oof_preds[val_idx] = model.predict(X_val) if verbose: print(f" Fold {fold_idx + 1}/{n_folds} complete") # Calculate score if metric == "roc_auc": if n_classes == 2: cv_score = roc_auc_score(y_encoded, oof_preds) else: cv_score = roc_auc_score(y_encoded, oof_preds, multi_class="ovr") elif metric == "accuracy": if n_classes == 2: cv_score = accuracy_score(y_encoded, (oof_preds > 0.5).astype(int)) else: cv_score = accuracy_score(y_encoded, np.argmax(oof_preds, axis=1)) elif metric == "f1": if n_classes == 2: cv_score = f1_score(y_encoded, (oof_preds > 0.5).astype(int)) else: cv_score = f1_score( y_encoded, np.argmax(oof_preds, axis=1), average="weighted" ) else: raise ValueError(f"Unknown metric: {metric}") # Fit final model on all data model.fit(X, y_encoded) # Get feature importances if available feature_importances = _build_feature_importances(model, feature_names) if verbose: print(f"CV {metric}: {cv_score:.4f}") # Log metrics if logger provided if logger is not None: logger.log_metrics({metric: cv_score}) return QuickResult( model=model, oof_predictions=oof_preds, cv_score=cv_score, metric=metric, feature_importances=feature_importances, )
[docs] def regress( X, y, preset: PresetName = "default", metric: str = "rmse", cv_folds: int | None = None, random_state: int = 42, verbose: bool = True, logger: ExperimentLogger | None = None, ) -> QuickResult: """Quick regression with automatic model selection. Parameters ---------- X : array-like Training features. y : array-like Target values. preset : str, default='default' Preset configuration: 'fast', 'default', 'competition', 'interpretable'. metric : str, default='rmse' Scoring metric: 'rmse', 'r2', 'mae'. cv_folds : int, optional Number of CV folds. If None, uses preset default. random_state : int, default=42 Random seed. verbose : bool, default=True Whether to print progress. logger : ExperimentLogger, optional Experiment logger for tracking params and metrics. Returns ------- QuickResult Result containing model, OOF predictions, and CV score. Examples -------- >>> import endgame as eg >>> result = eg.quick.regress(X, y) >>> print(f"CV RMSE: {result.cv_score:.4f}") >>> predictions = result.model.predict(X_test) """ feature_names = _extract_feature_names(X) if isinstance(X, pd.DataFrame): X = X.copy() else: X = np.asarray(X) y = np.asarray(y, dtype=np.float64) # Get preset config preset_config = PRESETS[preset].copy() n_folds = cv_folds or preset_config["cv_folds"] # Use first model from preset; prepend TabPFN v2.5 for small datasets models = _maybe_prepend_tabpfn25(preset_config["models"], len(X)) model_key = models[0] if verbose: print(f"Training {model_key} with {preset} preset...") # Log params if logger provided if logger is not None: logger.log_params({ "task": "regression", "model": model_key, "preset": preset, "metric": metric, "cv_folds": n_folds, "n_samples": len(X), "n_features": X.shape[1], }) # Get model model = _get_model(model_key, "regression", preset_config) # Cross-validation cv = KFold(n_splits=n_folds, shuffle=True, random_state=random_state) # Generate OOF predictions oof_preds = np.zeros(len(y)) for fold_idx, (train_idx, val_idx) in enumerate(cv.split(X)): if isinstance(X, pd.DataFrame): X_train, X_val = X.iloc[train_idx], X.iloc[val_idx] else: X_train, X_val = X[train_idx], X[val_idx] y_train, y_val = y[train_idx], y[val_idx] # Fit with early stopping if available if hasattr(model, "early_stopping_rounds"): model.fit(X_train, y_train, eval_set=[(X_val, y_val)]) else: model.fit(X_train, y_train) # Predict oof_preds[val_idx] = model.predict(X_val) if verbose: print(f" Fold {fold_idx + 1}/{n_folds} complete") # Calculate score if metric == "rmse": cv_score = np.sqrt(mean_squared_error(y, oof_preds)) elif metric == "r2": cv_score = r2_score(y, oof_preds) elif metric == "mae": cv_score = np.mean(np.abs(y - oof_preds)) else: raise ValueError(f"Unknown metric: {metric}") # Fit final model on all data model.fit(X, y) # Get feature importances if available feature_importances = _build_feature_importances(model, feature_names) if verbose: print(f"CV {metric}: {cv_score:.4f}") # Log metrics if logger provided if logger is not None: logger.log_metrics({metric: cv_score}) return QuickResult( model=model, oof_predictions=oof_preds, cv_score=cv_score, metric=metric, feature_importances=feature_importances, )
[docs] def compare( X, y, task: TaskType = "classification", preset: PresetName = "default", metric: str | None = None, cv_folds: int | None = None, random_state: int = 42, verbose: bool = True, logger: ExperimentLogger | None = None, ) -> ComparisonResult: """Compare multiple models quickly. Parameters ---------- X : array-like Training features. y : array-like Target values/labels. task : str, default='classification' Task type: 'classification' or 'regression'. preset : str, default='default' Preset configuration. metric : str, optional Scoring metric. If None, uses default for task. cv_folds : int, optional Number of CV folds. random_state : int, default=42 Random seed. verbose : bool, default=True Whether to print progress. logger : ExperimentLogger, optional Experiment logger for tracking params and metrics. Returns ------- ComparisonResult Comparison results with leaderboard. Examples -------- >>> import endgame as eg >>> comparison = eg.quick.compare(X, y, task='classification') >>> print(comparison) # Shows leaderboard >>> best_model = comparison.best_model """ import time if isinstance(X, pd.DataFrame): X = X.copy() else: X = np.asarray(X) y = np.asarray(y) # Get preset config preset_config = PRESETS[preset].copy() n_folds = cv_folds or preset_config["cv_folds"] # Default metrics if metric is None: metric = "roc_auc" if task == "classification" else "rmse" # Prepend TabPFN v2.5 for small datasets models = _maybe_prepend_tabpfn25(preset_config["models"], len(X)) # Prepare data if task == "classification": le = LabelEncoder() y_encoded = le.fit_transform(y) n_classes = len(le.classes_) cv = StratifiedKFold(n_splits=n_folds, shuffle=True, random_state=random_state) else: y_encoded = y.astype(np.float64) n_classes = 0 cv = KFold(n_splits=n_folds, shuffle=True, random_state=random_state) # Log params if logger provided if logger is not None: logger.log_params({ "task": task, "preset": preset, "metric": metric, "cv_folds": n_folds, "n_samples": len(X), "n_features": X.shape[1], "models": ",".join(models), }) results = [] for model_key in models: if verbose: print(f"Training {model_key}...") try: start_time = time.time() # Get model model = _get_model(model_key, task, preset_config) # Generate OOF predictions if task == "classification" and n_classes == 2: oof_preds = np.zeros(len(y)) elif task == "classification": oof_preds = np.zeros((len(y), n_classes)) else: oof_preds = np.zeros(len(y)) for train_idx, val_idx in cv.split(X, y_encoded if task == "classification" else None): if isinstance(X, pd.DataFrame): X_train, X_val = X.iloc[train_idx], X.iloc[val_idx] else: X_train, X_val = X[train_idx], X[val_idx] y_train, y_val = y_encoded[train_idx], y_encoded[val_idx] if hasattr(model, "early_stopping_rounds"): model.fit(X_train, y_train, eval_set=[(X_val, y_val)]) else: model.fit(X_train, y_train) if task == "classification" and hasattr(model, "predict_proba"): proba = model.predict_proba(X_val) if n_classes == 2: oof_preds[val_idx] = proba[:, 1] else: oof_preds[val_idx] = proba else: oof_preds[val_idx] = model.predict(X_val) fit_time = time.time() - start_time # Calculate score if task == "classification": if metric == "roc_auc": if n_classes == 2: cv_score = roc_auc_score(y_encoded, oof_preds) else: cv_score = roc_auc_score(y_encoded, oof_preds, multi_class="ovr") elif metric == "accuracy": if n_classes == 2: cv_score = accuracy_score(y_encoded, (oof_preds > 0.5).astype(int)) else: cv_score = accuracy_score(y_encoded, np.argmax(oof_preds, axis=1)) elif metric == "f1": if n_classes == 2: cv_score = f1_score(y_encoded, (oof_preds > 0.5).astype(int)) else: cv_score = f1_score( y_encoded, np.argmax(oof_preds, axis=1), average="weighted" ) else: raise ValueError(f"Unknown metric: {metric}") else: if metric == "rmse": cv_score = np.sqrt(mean_squared_error(y_encoded, oof_preds)) elif metric == "r2": cv_score = r2_score(y_encoded, oof_preds) elif metric == "mae": cv_score = np.mean(np.abs(y_encoded - oof_preds)) else: raise ValueError(f"Unknown metric: {metric}") # Fit final model model.fit(X, y_encoded) results.append( ModelResult( name=model_key, model=model, oof_predictions=oof_preds, cv_score=cv_score, fit_time=fit_time, ) ) if verbose: print(f" {metric}: {cv_score:.4f} ({fit_time:.1f}s)") except Exception as e: if verbose: print(f" Failed: {e}") continue # Sort results (higher is better for most metrics, except rmse/mae) reverse = metric not in ("rmse", "mae") results.sort(key=lambda r: r.cv_score, reverse=reverse) # Create leaderboard leaderboard = [ {"model": r.name, "score": r.cv_score, "fit_time": r.fit_time} for r in results ] # Log metrics if logger provided if logger is not None and results: logger.log_metrics({ f"best_{metric}": results[0].cv_score, "n_models_compared": float(len(results)), }) return ComparisonResult( results=results, best_model=results[0].model if results else None, leaderboard=leaderboard, metric=metric, )