Source code for endgame.automl.display

"""Interpretable model display engine.

Provides rich, formatted display of learned model structures: rules, trees,
equations, scorecards, shape functions, coefficients, and feature importances.

Works with any sklearn-compatible estimator — detects model type by probing
for known attributes (``get_rules``, ``get_scorecard``, ``coef_``, etc.)
and renders the appropriate representation.

Example
-------
>>> from endgame.automl.display import display_model, display_models
>>> display_model("EBM", fitted_ebm, feature_names)
>>> display_models({"EBM": ebm, "RuleFit": rulefit}, feature_names)
"""

from __future__ import annotations

import re
import textwrap

import numpy as np


def replace_feature_indices(text: str, feature_names: list[str]) -> str:
    """Replace generic feature references (x0, X0, feature_0000) with real names.

    Processes longest indices first to avoid partial replacement (x13 before x1).
    """
    for i in sorted(range(len(feature_names)), reverse=True):
        name = feature_names[i]
        text = re.sub(rf"\bx{i}\b", name, text)
        text = re.sub(rf"\bX{i}\b", name, text)
        text = re.sub(rf"\bfeature_{i:04d}\b", name, text)
        text = re.sub(rf"\bfeature_{i}\b", name, text)
    return text


def _indent(text: str, prefix: str = "    ") -> str:
    return textwrap.indent(str(text), prefix)


def _bar(value: float, max_value: float, width: int = 30) -> str:
    if max_value <= 0:
        return ""
    return "█" * int(value / max_value * width)


def _show_importances(
    model,
    feature_names: list[str],
    title: str,
    top_n: int = 10,
) -> list[str]:
    """Format feature importances as lines of text."""
    lines: list[str] = []
    try:
        fi = np.asarray(model.feature_importances_).ravel()
        if len(fi) != len(feature_names):
            return lines
        lines.append(f"\n  {title}:")
        pairs = sorted(zip(feature_names, fi), key=lambda t: abs(t[1]), reverse=True)
        mx = max(fi) if max(fi) > 0 else 1
        for fn, imp in pairs[:top_n]:
            lines.append(f"    {fn:35s}  {imp:.4f}  {_bar(imp, mx)}")
    except Exception:
        pass
    return lines



[docs]
def display_model(
    name: str,
    model,
    feature_names: list[str] | None = None,
    X_sample: np.ndarray | None = None,
    *,
    top_rules: int = 15,
    top_features: int = 10,
    print_output: bool = True,
) -> str:
    """Display the learned structure of a fitted interpretable model.

    Parameters
    ----------
    name : str
        Display name for the model (e.g. "EBM", "RuleFit").
    model : estimator
        A fitted sklearn-compatible estimator.
    feature_names : list of str, optional
        Feature names. If None, generic names are not replaced.
    X_sample : ndarray, optional
        Sample data for computing per-sample contributions.
    top_rules : int, default=15
        Maximum number of rules/terms to display.
    top_features : int, default=10
        Maximum number of features in importance displays.
    print_output : bool, default=True
        If True, print to stdout. Always returns the full text.

    Returns
    -------
    str
        The complete formatted display text.
    """
    lines: list[str] = []
    fn = feature_names or []

    def R(t: str) -> str:
        return replace_feature_indices(t, fn) if fn else t

    lines.append(f"\n  {'─' * 64}")
    lines.append(f"  LEARNED MODEL: {name}")
    lines.append(f"  {'─' * 64}")

    displayed = False

    # ── CORELS: rule list ────────────────────────────────────────────
    if hasattr(model, "rule_list_") and model.rule_list_:
        lines.append("\n  Rule List:")
        lines.append(_indent(R(model.rule_list_)))
        displayed = True
    if hasattr(model, "summary") and name == "CORELS":
        try:
            lines.append("\n  Summary:")
            lines.append(_indent(R(model.summary())))
        except Exception:
            pass
        displayed = True

    # ── GAM: summary ─────────────────────────────────────────────────
    if hasattr(model, "summary") and name == "GAM":
        try:
            lines.append("\n  GAM Summary:")
            lines.append(_indent(R(model.summary())))
        except Exception as e:
            lines.append(f"    (summary failed: {e})")
        displayed = True

    # ── EBM: term names + importances ────────────────────────────────
    if hasattr(model, "get_term_names"):
        try:
            term_names = model.get_term_names()
            importances = model.term_importances()
            lines.append(f"\n  EBM Terms (top {top_rules} by importance):")
            pairs = sorted(
                zip(term_names, importances),
                key=lambda t: abs(t[1]),
                reverse=True,
            )
            for tname, imp in pairs[:top_rules]:
                lines.append(f"    {R(tname):40s}  importance={imp:.4f}")
        except Exception as e:
            lines.append(f"    (term display failed: {e})")
        displayed = True

    # ── RuleFit: rules ───────────────────────────────────────────────
    if hasattr(model, "get_rules") and "rulefit" in type(model).__name__.lower():
        try:
            rules = model.get_rules(exclude_zero_coef=True, sort_by="importance")
            lines.append(f"\n  RuleFit: {len(rules)} rules with non-zero coefficients")
            lines.append(f"  Top {top_rules} rules:")
            for i, r in enumerate(rules[:top_rules]):
                rule_str = R(r.get("rule", r.get("description", "")))
                coef = r.get("coefficient", r.get("coef", 0))
                imp = r.get("importance", abs(coef))
                lines.append(f"    [{i+1:2d}] coef={coef:+.4f}  imp={imp:.4f}")
                lines.append(f"         {rule_str}")
        except Exception as e:
            lines.append(f"    (rule display failed: {e})")
        displayed = True

    # ── FURIA: fuzzy rules ───────────────────────────────────────────
    if hasattr(model, "get_rules_str"):
        try:
            lines.append("\n  FURIA Fuzzy Rules:")
            lines.append(_indent(R(model.get_rules_str())))
        except Exception as e:
            lines.append(f"    (rule display failed: {e})")
        displayed = True

    # ── SLIM / FasterRisk: scorecard ─────────────────────────────────
    if hasattr(model, "get_scorecard"):
        try:
            lines.append("\n  Scoring System (Scorecard):")
            lines.append(_indent(R(model.get_scorecard())))
        except Exception as e:
            lines.append(f"    (scorecard display failed: {e})")
        displayed = True

    # ── MARS: summary with basis functions ───────────────────────────
    if hasattr(model, "summary") and "mars" in type(model).__name__.lower():
        try:
            lines.append("\n  MARS Summary:")
            lines.append(_indent(R(model.summary())))
        except Exception as e:
            lines.append(f"    (summary failed: {e})")
        displayed = True

    # ── GOSDT: tree structure ────────────────────────────────────────
    if hasattr(model, "get_tree_structure"):
        try:
            lines.append("\n  Optimal Decision Tree:")
            lines.append(_indent(R(model.get_tree_structure())))
        except Exception as e:
            lines.append(f"    (tree display failed: {e})")
        displayed = True

    # ── C5.0: tree structure ─────────────────────────────────────────
    if hasattr(model, "get_structure"):
        try:
            tree_str = model.get_structure(feature_names=fn or None)
            lines.append("\n  C5.0 Decision Tree:")
            lines.append(_indent(R(tree_str)))
        except Exception as e:
            lines.append(f"    (tree display failed: {e})")
        displayed = True

    # ── Symbolic: discovered equations ───────────────────────────────
    if hasattr(model, "get_best_equation"):
        try:
            eq = model.get_best_equation()
            lines.append("\n  Best Symbolic Equation:")
            lines.append(f"    {R(str(eq))}")
        except Exception as e:
            lines.append(f"    (equation display failed: {e})")
        if hasattr(model, "get_pareto_frontier"):
            try:
                frontier = model.get_pareto_frontier()
                if frontier is not None and len(frontier) > 0:
                    lines.append("\n  Pareto Frontier (complexity vs loss):")
                    for _, row in frontier.head(10).iterrows():
                        lines.append(
                            f"    complexity={int(row.get('complexity', 0)):3d}  "
                            f"loss={row.get('loss', 0):.4f}  "
                            f"eq: {R(str(row.get('equation', '')))}"
                        )
            except Exception:
                pass
        displayed = True

    # ── GAMI-Net: interactions ───────────────────────────────────────
    if hasattr(model, "interaction_pairs_"):
        try:
            pairs = model.interaction_pairs_
            if pairs and fn:
                lines.append("\n  GAMI-Net Interaction Pairs:")
                for p in pairs:
                    f1 = fn[p[0]] if p[0] < len(fn) else str(p[0])
                    f2 = fn[p[1]] if p[1] < len(fn) else str(p[1])
                    lines.append(f"    {f1} x {f2}")
        except Exception:
            pass
        displayed = True

    # ── NODE-GAM: per-feature contributions ──────────────────────────
    if hasattr(model, "get_feature_contributions") and X_sample is not None:
        clsname = type(model).__name__.lower()
        if "nodegam" in clsname or "node_gam" in clsname:
            try:
                contribs = model.get_feature_contributions(X_sample[:5])
                lines.append("\n  NODE-GAM Feature Contributions (first 5 samples):")
                for i in range(min(5, contribs.shape[0])):
                    top_idx = np.argsort(np.abs(contribs[i]))[::-1][:5]
                    parts = [
                        f"{fn[j]}={contribs[i, j]:+.3f}"
                        for j in top_idx
                        if fn and j < len(fn)
                    ]
                    lines.append(f"    sample {i}: {', '.join(parts)}")
            except Exception as e:
                lines.append(f"    (contribution display failed: {e})")
            displayed = True

    # ── NAM: shape-function importances ──────────────────────────────
    if "nam" in type(model).__name__.lower() and hasattr(model, "feature_importances_"):
        lines.extend(
            _show_importances(model, fn, "NAM Feature Importances (shape-function based)", top_features)
        )
        displayed = True

    # ── Linear / LDA: coefficients ───────────────────────────────────
    if hasattr(model, "coef_") and fn:
        clsname = type(model).__name__.lower()
        is_linear = any(k in clsname for k in ("linear", "lda", "logistic"))
        if is_linear:
            try:
                coef = np.asarray(model.coef_).ravel()
                if len(coef) == len(fn):
                    lines.append(f"\n  {name} Coefficients:")
                    pairs = sorted(zip(fn, coef), key=lambda t: abs(t[1]), reverse=True)
                    for fname, c in pairs:
                        lines.append(f"    {fname:35s}  {c:+.4f}")
                    if hasattr(model, "intercept_"):
                        intercept = np.asarray(model.intercept_).ravel()
                        lines.append(f"    {'(intercept)':35s}  {intercept[0]:+.4f}")
            except Exception as e:
                lines.append(f"    (coefficient display failed: {e})")
            displayed = True

    # ── NGBoost / generic: feature importances (if nothing else shown) ─
    if hasattr(model, "feature_importances_") and not displayed and fn:
        lines.extend(_show_importances(model, fn, "Feature Importances", top_features))
        displayed = True

    # ── Naive Bayes: class priors + feature means ────────────────────
    clsname = type(model).__name__.lower()
    if "naivebayes" in clsname or "naive_bayes" in clsname or "gaussiannb" in clsname:
        try:
            inner = getattr(model, "model_", model)
            if hasattr(inner, "class_prior_"):
                lines.append(f"\n  Class Priors: {inner.class_prior_}")
            if hasattr(inner, "theta_") and fn:
                lines.append("\n  Feature Means per Class:")
                for cls_idx in range(inner.theta_.shape[0]):
                    pairs = sorted(
                        zip(fn, inner.theta_[cls_idx]),
                        key=lambda t: t[1],
                        reverse=True,
                    )
                    lines.append(f"    Class {cls_idx}:")
                    for fname, val in pairs[:8]:
                        lines.append(f"      {fname:33s}  mean={val:.4f}")
        except Exception as e:
            lines.append(f"    (Naive Bayes display failed: {e})")
        displayed = True

    if not displayed:
        lines.append("    (No specific display method found for this model)")

    # ── Always show feature importances (unless already fully covered) ─
    skip_fi = {"nam", "linear", "lda", "naivebayes", "naive_bayes", "ngboost"}
    if hasattr(model, "feature_importances_") and displayed and fn:
        if not any(k in clsname for k in skip_fi):
            lines.extend(_show_importances(model, fn, "Feature Importances", top_features))

    text = "\n".join(lines)

    if print_output:
        print(text)

    return text




[docs]
def display_models(
    models: dict[str, object],
    feature_names: list[str] | None = None,
    X_sample: np.ndarray | None = None,
    *,
    top_rules: int = 15,
    top_features: int = 10,
    print_output: bool = True,
) -> str:
    """Display learned structures for multiple models.

    Parameters
    ----------
    models : dict[str, estimator]
        Mapping of model names to fitted estimators.
    feature_names : list of str, optional
        Feature names for readable output.
    X_sample : ndarray, optional
        Sample data for per-sample contribution displays.
    top_rules : int, default=15
        Max rules/terms per model.
    top_features : int, default=10
        Max features per importance display.
    print_output : bool, default=True
        If True, print to stdout.

    Returns
    -------
    str
        Complete formatted text for all models.
    """
    parts: list[str] = []
    for name, model in models.items():
        text = display_model(
            name,
            model,
            feature_names=feature_names,
            X_sample=X_sample,
            top_rules=top_rules,
            top_features=top_features,
            print_output=False,
        )
        parts.append(text)

    full_text = "\n".join(parts)

    if print_output:
        print(full_text)

    return full_text