"""Interpretable model display engine.
Provides rich, formatted display of learned model structures: rules, trees,
equations, scorecards, shape functions, coefficients, and feature importances.
Works with any sklearn-compatible estimator — detects model type by probing
for known attributes (``get_rules``, ``get_scorecard``, ``coef_``, etc.)
and renders the appropriate representation.
Example
-------
>>> from endgame.automl.display import display_model, display_models
>>> display_model("EBM", fitted_ebm, feature_names)
>>> display_models({"EBM": ebm, "RuleFit": rulefit}, feature_names)
"""
from __future__ import annotations
import re
import textwrap
import numpy as np
def replace_feature_indices(text: str, feature_names: list[str]) -> str:
"""Replace generic feature references (x0, X0, feature_0000) with real names.
Processes longest indices first to avoid partial replacement (x13 before x1).
"""
for i in sorted(range(len(feature_names)), reverse=True):
name = feature_names[i]
text = re.sub(rf"\bx{i}\b", name, text)
text = re.sub(rf"\bX{i}\b", name, text)
text = re.sub(rf"\bfeature_{i:04d}\b", name, text)
text = re.sub(rf"\bfeature_{i}\b", name, text)
return text
def _indent(text: str, prefix: str = " ") -> str:
return textwrap.indent(str(text), prefix)
def _bar(value: float, max_value: float, width: int = 30) -> str:
if max_value <= 0:
return ""
return "█" * int(value / max_value * width)
def _show_importances(
model,
feature_names: list[str],
title: str,
top_n: int = 10,
) -> list[str]:
"""Format feature importances as lines of text."""
lines: list[str] = []
try:
fi = np.asarray(model.feature_importances_).ravel()
if len(fi) != len(feature_names):
return lines
lines.append(f"\n {title}:")
pairs = sorted(zip(feature_names, fi), key=lambda t: abs(t[1]), reverse=True)
mx = max(fi) if max(fi) > 0 else 1
for fn, imp in pairs[:top_n]:
lines.append(f" {fn:35s} {imp:.4f} {_bar(imp, mx)}")
except Exception:
pass
return lines
[docs]
def display_model(
name: str,
model,
feature_names: list[str] | None = None,
X_sample: np.ndarray | None = None,
*,
top_rules: int = 15,
top_features: int = 10,
print_output: bool = True,
) -> str:
"""Display the learned structure of a fitted interpretable model.
Parameters
----------
name : str
Display name for the model (e.g. "EBM", "RuleFit").
model : estimator
A fitted sklearn-compatible estimator.
feature_names : list of str, optional
Feature names. If None, generic names are not replaced.
X_sample : ndarray, optional
Sample data for computing per-sample contributions.
top_rules : int, default=15
Maximum number of rules/terms to display.
top_features : int, default=10
Maximum number of features in importance displays.
print_output : bool, default=True
If True, print to stdout. Always returns the full text.
Returns
-------
str
The complete formatted display text.
"""
lines: list[str] = []
fn = feature_names or []
def R(t: str) -> str:
return replace_feature_indices(t, fn) if fn else t
lines.append(f"\n {'─' * 64}")
lines.append(f" LEARNED MODEL: {name}")
lines.append(f" {'─' * 64}")
displayed = False
# ── CORELS: rule list ────────────────────────────────────────────
if hasattr(model, "rule_list_") and model.rule_list_:
lines.append("\n Rule List:")
lines.append(_indent(R(model.rule_list_)))
displayed = True
if hasattr(model, "summary") and name == "CORELS":
try:
lines.append("\n Summary:")
lines.append(_indent(R(model.summary())))
except Exception:
pass
displayed = True
# ── GAM: summary ─────────────────────────────────────────────────
if hasattr(model, "summary") and name == "GAM":
try:
lines.append("\n GAM Summary:")
lines.append(_indent(R(model.summary())))
except Exception as e:
lines.append(f" (summary failed: {e})")
displayed = True
# ── EBM: term names + importances ────────────────────────────────
if hasattr(model, "get_term_names"):
try:
term_names = model.get_term_names()
importances = model.term_importances()
lines.append(f"\n EBM Terms (top {top_rules} by importance):")
pairs = sorted(
zip(term_names, importances),
key=lambda t: abs(t[1]),
reverse=True,
)
for tname, imp in pairs[:top_rules]:
lines.append(f" {R(tname):40s} importance={imp:.4f}")
except Exception as e:
lines.append(f" (term display failed: {e})")
displayed = True
# ── RuleFit: rules ───────────────────────────────────────────────
if hasattr(model, "get_rules") and "rulefit" in type(model).__name__.lower():
try:
rules = model.get_rules(exclude_zero_coef=True, sort_by="importance")
lines.append(f"\n RuleFit: {len(rules)} rules with non-zero coefficients")
lines.append(f" Top {top_rules} rules:")
for i, r in enumerate(rules[:top_rules]):
rule_str = R(r.get("rule", r.get("description", "")))
coef = r.get("coefficient", r.get("coef", 0))
imp = r.get("importance", abs(coef))
lines.append(f" [{i+1:2d}] coef={coef:+.4f} imp={imp:.4f}")
lines.append(f" {rule_str}")
except Exception as e:
lines.append(f" (rule display failed: {e})")
displayed = True
# ── FURIA: fuzzy rules ───────────────────────────────────────────
if hasattr(model, "get_rules_str"):
try:
lines.append("\n FURIA Fuzzy Rules:")
lines.append(_indent(R(model.get_rules_str())))
except Exception as e:
lines.append(f" (rule display failed: {e})")
displayed = True
# ── SLIM / FasterRisk: scorecard ─────────────────────────────────
if hasattr(model, "get_scorecard"):
try:
lines.append("\n Scoring System (Scorecard):")
lines.append(_indent(R(model.get_scorecard())))
except Exception as e:
lines.append(f" (scorecard display failed: {e})")
displayed = True
# ── MARS: summary with basis functions ───────────────────────────
if hasattr(model, "summary") and "mars" in type(model).__name__.lower():
try:
lines.append("\n MARS Summary:")
lines.append(_indent(R(model.summary())))
except Exception as e:
lines.append(f" (summary failed: {e})")
displayed = True
# ── GOSDT: tree structure ────────────────────────────────────────
if hasattr(model, "get_tree_structure"):
try:
lines.append("\n Optimal Decision Tree:")
lines.append(_indent(R(model.get_tree_structure())))
except Exception as e:
lines.append(f" (tree display failed: {e})")
displayed = True
# ── C5.0: tree structure ─────────────────────────────────────────
if hasattr(model, "get_structure"):
try:
tree_str = model.get_structure(feature_names=fn or None)
lines.append("\n C5.0 Decision Tree:")
lines.append(_indent(R(tree_str)))
except Exception as e:
lines.append(f" (tree display failed: {e})")
displayed = True
# ── Symbolic: discovered equations ───────────────────────────────
if hasattr(model, "get_best_equation"):
try:
eq = model.get_best_equation()
lines.append("\n Best Symbolic Equation:")
lines.append(f" {R(str(eq))}")
except Exception as e:
lines.append(f" (equation display failed: {e})")
if hasattr(model, "get_pareto_frontier"):
try:
frontier = model.get_pareto_frontier()
if frontier is not None and len(frontier) > 0:
lines.append("\n Pareto Frontier (complexity vs loss):")
for _, row in frontier.head(10).iterrows():
lines.append(
f" complexity={int(row.get('complexity', 0)):3d} "
f"loss={row.get('loss', 0):.4f} "
f"eq: {R(str(row.get('equation', '')))}"
)
except Exception:
pass
displayed = True
# ── GAMI-Net: interactions ───────────────────────────────────────
if hasattr(model, "interaction_pairs_"):
try:
pairs = model.interaction_pairs_
if pairs and fn:
lines.append("\n GAMI-Net Interaction Pairs:")
for p in pairs:
f1 = fn[p[0]] if p[0] < len(fn) else str(p[0])
f2 = fn[p[1]] if p[1] < len(fn) else str(p[1])
lines.append(f" {f1} x {f2}")
except Exception:
pass
displayed = True
# ── NODE-GAM: per-feature contributions ──────────────────────────
if hasattr(model, "get_feature_contributions") and X_sample is not None:
clsname = type(model).__name__.lower()
if "nodegam" in clsname or "node_gam" in clsname:
try:
contribs = model.get_feature_contributions(X_sample[:5])
lines.append("\n NODE-GAM Feature Contributions (first 5 samples):")
for i in range(min(5, contribs.shape[0])):
top_idx = np.argsort(np.abs(contribs[i]))[::-1][:5]
parts = [
f"{fn[j]}={contribs[i, j]:+.3f}"
for j in top_idx
if fn and j < len(fn)
]
lines.append(f" sample {i}: {', '.join(parts)}")
except Exception as e:
lines.append(f" (contribution display failed: {e})")
displayed = True
# ── NAM: shape-function importances ──────────────────────────────
if "nam" in type(model).__name__.lower() and hasattr(model, "feature_importances_"):
lines.extend(
_show_importances(model, fn, "NAM Feature Importances (shape-function based)", top_features)
)
displayed = True
# ── Linear / LDA: coefficients ───────────────────────────────────
if hasattr(model, "coef_") and fn:
clsname = type(model).__name__.lower()
is_linear = any(k in clsname for k in ("linear", "lda", "logistic"))
if is_linear:
try:
coef = np.asarray(model.coef_).ravel()
if len(coef) == len(fn):
lines.append(f"\n {name} Coefficients:")
pairs = sorted(zip(fn, coef), key=lambda t: abs(t[1]), reverse=True)
for fname, c in pairs:
lines.append(f" {fname:35s} {c:+.4f}")
if hasattr(model, "intercept_"):
intercept = np.asarray(model.intercept_).ravel()
lines.append(f" {'(intercept)':35s} {intercept[0]:+.4f}")
except Exception as e:
lines.append(f" (coefficient display failed: {e})")
displayed = True
# ── NGBoost / generic: feature importances (if nothing else shown) ─
if hasattr(model, "feature_importances_") and not displayed and fn:
lines.extend(_show_importances(model, fn, "Feature Importances", top_features))
displayed = True
# ── Naive Bayes: class priors + feature means ────────────────────
clsname = type(model).__name__.lower()
if "naivebayes" in clsname or "naive_bayes" in clsname or "gaussiannb" in clsname:
try:
inner = getattr(model, "model_", model)
if hasattr(inner, "class_prior_"):
lines.append(f"\n Class Priors: {inner.class_prior_}")
if hasattr(inner, "theta_") and fn:
lines.append("\n Feature Means per Class:")
for cls_idx in range(inner.theta_.shape[0]):
pairs = sorted(
zip(fn, inner.theta_[cls_idx]),
key=lambda t: t[1],
reverse=True,
)
lines.append(f" Class {cls_idx}:")
for fname, val in pairs[:8]:
lines.append(f" {fname:33s} mean={val:.4f}")
except Exception as e:
lines.append(f" (Naive Bayes display failed: {e})")
displayed = True
if not displayed:
lines.append(" (No specific display method found for this model)")
# ── Always show feature importances (unless already fully covered) ─
skip_fi = {"nam", "linear", "lda", "naivebayes", "naive_bayes", "ngboost"}
if hasattr(model, "feature_importances_") and displayed and fn:
if not any(k in clsname for k in skip_fi):
lines.extend(_show_importances(model, fn, "Feature Importances", top_features))
text = "\n".join(lines)
if print_output:
print(text)
return text
[docs]
def display_models(
models: dict[str, object],
feature_names: list[str] | None = None,
X_sample: np.ndarray | None = None,
*,
top_rules: int = 15,
top_features: int = 10,
print_output: bool = True,
) -> str:
"""Display learned structures for multiple models.
Parameters
----------
models : dict[str, estimator]
Mapping of model names to fitted estimators.
feature_names : list of str, optional
Feature names for readable output.
X_sample : ndarray, optional
Sample data for per-sample contribution displays.
top_rules : int, default=15
Max rules/terms per model.
top_features : int, default=10
Max features per importance display.
print_output : bool, default=True
If True, print to stdout.
Returns
-------
str
Complete formatted text for all models.
"""
parts: list[str] = []
for name, model in models.items():
text = display_model(
name,
model,
feature_names=feature_names,
X_sample=X_sample,
top_rules=top_rules,
top_features=top_features,
print_output=False,
)
parts.append(text)
full_text = "\n".join(parts)
if print_output:
print(full_text)
return full_text