Source code for endgame.visualization.word_cloud

"""Word cloud visualizer.

Interactive word cloud for text data visualization, feature name
exploration, and keyword frequency display.

Example
-------
>>> from endgame.visualization import WordCloudVisualizer
>>> words = {"accuracy": 50, "precision": 35, "recall": 40, "f1": 45}
>>> viz = WordCloudVisualizer(words, title="Metrics")
>>> viz.save("word_cloud.html")
"""

from __future__ import annotations

import random
from collections.abc import Sequence
from typing import Any

from endgame.visualization._base import BaseVisualizer


[docs] class WordCloudVisualizer(BaseVisualizer): """Interactive word cloud visualizer. Parameters ---------- words : dict of str → float Word/phrase → weight/frequency. max_words : int, default=100 Maximum number of words to display. min_font : int, default=12 Minimum font size in pixels. max_font : int, default=60 Maximum font size in pixels. title : str, optional Chart title. palette : str, default='tableau' Color palette. width : int, default=800 Chart width. height : int, default=500 Chart height. theme : str, default='dark' 'dark' or 'light'. """ def __init__( self, words: dict[str, float], *, max_words: int = 100, min_font: int = 12, max_font: int = 60, title: str = "", palette: str = "tableau", width: int = 800, height: int = 500, theme: str = "dark", ): super().__init__(title=title, palette=palette, width=width, height=height, theme=theme) self._words = dict(words) self.max_words = max_words self.min_font = min_font self.max_font = max_font
[docs] @classmethod def from_feature_names( cls, feature_names: Sequence[str], importances: Sequence[float] | None = None, **kwargs, ) -> WordCloudVisualizer: """Create from feature names and optional importances. Parameters ---------- feature_names : list of str Feature names. importances : list of float, optional Feature importances. **kwargs Additional keyword arguments. """ if importances is not None: words = dict(zip(feature_names, importances)) else: words = {name: 1.0 for name in feature_names} kwargs.setdefault("title", "Feature Names") return cls(words, **kwargs)
def _build_data(self) -> dict[str, Any]: # Sort by weight, take top N sorted_words = sorted(self._words.items(), key=lambda x: x[1], reverse=True) sorted_words = sorted_words[:self.max_words] if not sorted_words: return {"words": []} max_w = sorted_words[0][1] min_w = sorted_words[-1][1] if len(sorted_words) > 1 else max_w words = [] rng = random.Random(42) for word, weight in sorted_words: if max_w > min_w: t = (weight - min_w) / (max_w - min_w) else: t = 0.5 font = self.min_font + t * (self.max_font - self.min_font) words.append({ "text": word, "weight": round(float(weight), 4), "fontSize": round(font, 1), "rotation": rng.choice([0, 0, 0, -90]) if rng.random() > 0.7 else 0, }) return {"words": words} def _chart_type(self) -> str: return "word_cloud" def _get_chart_js(self) -> str: return _WORDCLOUD_JS
_WORDCLOUD_JS = r""" function renderChart(data, config) { const container = document.getElementById('chart-container'); const W = config.width, H = config.height; const svg = EG.svg('svg', {width: W, height: H}); container.appendChild(svg); const palette = config.palette; const words = data.words; // Simple spiral placement const placed = []; const cx = W / 2, cy = H / 2; words.forEach(function(w, i) { const color = palette[i % palette.length]; let bestX = cx, bestY = cy; let found = false; // Spiral outward to find non-overlapping position for (let t = 0; t < 500 && !found; t++) { const angle = t * 0.15; const radius = t * 1.2; const tx = cx + radius * Math.cos(angle); const ty = cy + radius * Math.sin(angle); // Rough bounding box const estW = w.text.length * w.fontSize * 0.55; const estH = w.fontSize * 1.2; if (tx - estW / 2 < 10 || tx + estW / 2 > W - 10 || ty - estH / 2 < 10 || ty + estH / 2 > H - 10) continue; let overlap = false; for (let j = 0; j < placed.length; j++) { const p = placed[j]; if (Math.abs(tx - p.x) < (estW + p.w) / 2 && Math.abs(ty - p.y) < (estH + p.h) / 2) { overlap = true; break; } } if (!overlap) { bestX = tx; bestY = ty; placed.push({x: tx, y: ty, w: estW, h: estH}); found = true; } } const text = EG.svg('text', { x: bestX, y: bestY, 'text-anchor': 'middle', 'dominant-baseline': 'middle', fill: color, 'font-size': w.fontSize + 'px', 'font-weight': w.fontSize > 30 ? '700' : '500', transform: w.rotation ? `rotate(${w.rotation},${bestX},${bestY})` : '', opacity: 0.9 }); text.textContent = w.text; text.addEventListener('mouseenter', function(e) { text.setAttribute('opacity', '1'); text.setAttribute('font-size', (w.fontSize * 1.1) + 'px'); EG.tooltip.show(e, '<b>' + EG.esc(w.text) + '</b><br>Weight: ' + EG.fmt(w.weight)); }); text.addEventListener('mouseleave', function() { text.setAttribute('opacity', '0.9'); text.setAttribute('font-size', w.fontSize + 'px'); EG.tooltip.hide(); }); svg.appendChild(text); }); } """