Source code for endgame.models.neural.embedding_mlp

from __future__ import annotations

"""MLP with Entity Embeddings for categorical features.

Entity embeddings learn dense representations for categorical variables,
enabling neural networks to effectively handle high-cardinality features.

Reference: https://arxiv.org/abs/1604.06737 (Entity Embeddings of Categorical Variables)
"""

from typing import Any

import numpy as np
from sklearn.base import ClassifierMixin, RegressorMixin
from sklearn.preprocessing import LabelEncoder, StandardScaler

from endgame.core.base import EndgameEstimator

# PyTorch imports (lazy loaded)
try:
    import torch
    import torch.nn as nn
    import torch.optim as optim
    from torch.utils.data import DataLoader, TensorDataset

    HAS_TORCH = True
except ImportError:
    HAS_TORCH = False


def _check_torch():
    """Check if PyTorch is available."""
    if not HAS_TORCH:
        raise ImportError(
            "PyTorch is required for neural network models. "
            "Install with: pip install torch"
        )


class _EmbeddingMLPModule(nn.Module):
    """PyTorch module for MLP with entity embeddings.

    Parameters
    ----------
    n_continuous : int
        Number of continuous features.
    categorical_dims : List[Tuple[int, int]]
        List of (cardinality, embedding_dim) for each categorical feature.
    hidden_dims : List[int]
        Hidden layer dimensions.
    output_dim : int
        Number of output units.
    dropout : float
        Dropout rate.
    batch_norm : bool
        Whether to use batch normalization.
    activation : str
        Activation function.
    embedding_dropout : float
        Dropout rate for embeddings.
    """

    def __init__(
        self,
        n_continuous: int,
        categorical_dims: list[tuple[int, int]],
        hidden_dims: list[int],
        output_dim: int,
        dropout: float = 0.3,
        batch_norm: bool = True,
        activation: str = "relu",
        embedding_dropout: float = 0.1,
    ):
        super().__init__()

        self.n_continuous = n_continuous
        self.categorical_dims = categorical_dims

        # Create embeddings for each categorical feature
        self.embeddings = nn.ModuleList([
            nn.Embedding(cardinality, embed_dim)
            for cardinality, embed_dim in categorical_dims
        ])

        self.embedding_dropout = nn.Dropout(embedding_dropout)

        # Calculate total input dimension
        total_embed_dim = sum(embed_dim for _, embed_dim in categorical_dims)
        input_dim = n_continuous + total_embed_dim

        # Activation function
        self.activation_fn = self._get_activation(activation)

        # Build hidden layers
        layers = []
        prev_dim = input_dim

        for hidden_dim in hidden_dims:
            layers.append(nn.Linear(prev_dim, hidden_dim))

            if batch_norm:
                layers.append(nn.BatchNorm1d(hidden_dim))

            layers.append(self.activation_fn)

            if dropout > 0:
                layers.append(nn.Dropout(dropout))

            prev_dim = hidden_dim

        self.hidden_layers = nn.Sequential(*layers)
        self.output_layer = nn.Linear(prev_dim, output_dim)

        # Initialize embeddings
        for embedding in self.embeddings:
            nn.init.xavier_uniform_(embedding.weight)

    def _get_activation(self, activation: str) -> nn.Module:
        """Get activation function by name."""
        activations = {
            "relu": nn.ReLU(),
            "leaky_relu": nn.LeakyReLU(0.1),
            "elu": nn.ELU(),
            "selu": nn.SELU(),
            "gelu": nn.GELU(),
            "swish": nn.SiLU(),
            "mish": nn.Mish(),
            "tanh": nn.Tanh(),
        }
        if activation not in activations:
            raise ValueError(
                f"Unknown activation: {activation}. "
                f"Choose from: {list(activations.keys())}"
            )
        return activations[activation]

    def forward(
        self,
        x_continuous: torch.Tensor,
        x_categorical: torch.Tensor,
    ) -> torch.Tensor:
        """Forward pass.

        Parameters
        ----------
        x_continuous : Tensor of shape (batch_size, n_continuous)
            Continuous features.
        x_categorical : Tensor of shape (batch_size, n_categorical)
            Categorical features (integer encoded).

        Returns
        -------
        Tensor
            Output predictions.
        """
        # Get embeddings for each categorical feature
        embedded = []
        for i, embedding in enumerate(self.embeddings):
            embedded.append(embedding(x_categorical[:, i]))

        # Concatenate embeddings
        if embedded:
            x_embed = torch.cat(embedded, dim=1)
            x_embed = self.embedding_dropout(x_embed)

            # Concatenate with continuous features
            if self.n_continuous > 0:
                x = torch.cat([x_continuous, x_embed], dim=1)
            else:
                x = x_embed
        else:
            x = x_continuous

        # Forward through hidden layers
        x = self.hidden_layers(x)
        return self.output_layer(x)

    def get_embeddings(self, feature_idx: int) -> np.ndarray:
        """Get embedding weights for a categorical feature.

        Parameters
        ----------
        feature_idx : int
            Index of the categorical feature.

        Returns
        -------
        ndarray
            Embedding weight matrix.
        """
        return self.embeddings[feature_idx].weight.detach().cpu().numpy()


class _BaseEmbeddingMLP(EndgameEstimator):
    """Base class for EmbeddingMLP estimators.

    Parameters
    ----------
    categorical_features : List[str] or List[int], optional
        Names or indices of categorical features.
    embedding_dims : Dict[str, int] or int, optional
        Embedding dimensions: dict mapping feature names to dims,
        or int for default dimension (uses rule: min(50, (cardinality+1)//2)).
    hidden_dims : List[int], default=[256, 128]
        Hidden layer dimensions.
    dropout : float, default=0.3
        Dropout rate for hidden layers.
    embedding_dropout : float, default=0.1
        Dropout rate for embeddings.
    batch_norm : bool, default=True
        Whether to use batch normalization.
    activation : str, default='relu'
        Activation function.
    learning_rate : float, default=1e-3
        Initial learning rate.
    weight_decay : float, default=1e-5
        L2 regularization strength.
    n_epochs : int, default=100
        Maximum training epochs.
    batch_size : int, default=256
        Training batch size.
    early_stopping : int, default=10
        Early stopping patience.
    scheduler : str, default='cosine'
        Learning rate scheduler.
    device : str, default='auto'
        Device: 'cuda', 'cpu', or 'auto'.
    random_state : int, optional
        Random seed.
    verbose : bool, default=False
        Enable verbose output.
    """

    def __init__(
        self,
        categorical_features: list[str] | list[int] | None = None,
        embedding_dims: dict[str, int] | int | None = None,
        hidden_dims: list[int] = None,
        dropout: float = 0.3,
        embedding_dropout: float = 0.1,
        batch_norm: bool = True,
        activation: str = "relu",
        learning_rate: float = 1e-3,
        weight_decay: float = 1e-5,
        n_epochs: int = 100,
        batch_size: int = 256,
        early_stopping: int = 10,
        scheduler: str = "cosine",
        device: str = "auto",
        random_state: int | None = None,
        verbose: bool = False,
    ):
        _check_torch()
        super().__init__(random_state=random_state, verbose=verbose)

        self.categorical_features = categorical_features
        self.embedding_dims = embedding_dims
        self.hidden_dims = hidden_dims or [256, 128]
        self.dropout = dropout
        self.embedding_dropout = embedding_dropout
        self.batch_norm = batch_norm
        self.activation = activation
        self.learning_rate = learning_rate
        self.weight_decay = weight_decay
        self.n_epochs = n_epochs
        self.batch_size = batch_size
        self.early_stopping = early_stopping
        self.scheduler = scheduler
        self.device = device

        # Model components
        self.model_: _EmbeddingMLPModule | None = None
        self.scaler_: StandardScaler | None = None
        self._device: torch.device | None = None
        self._cat_encoders: dict[int, LabelEncoder] = {}
        self._cat_indices: list[int] = []
        self._cont_indices: list[int] = []
        self._categorical_dims: list[tuple[int, int]] = []
        self._feature_names: list[str] | None = None
        self.history_: dict[str, list[float]] = {"train_loss": [], "val_loss": []}

    def _get_device(self) -> torch.device:
        """Get computation device."""
        if self.device == "auto":
            return torch.device("cuda" if torch.cuda.is_available() else "cpu")
        return torch.device(self.device)

    def _set_seed(self):
        """Set random seeds for reproducibility."""
        if self.random_state is not None:
            torch.manual_seed(self.random_state)
            np.random.seed(self.random_state)
            if torch.cuda.is_available():
                torch.cuda.manual_seed_all(self.random_state)

    def _infer_feature_indices(self, X: np.ndarray, feature_names: list[str]):
        """Infer categorical and continuous feature indices."""
        n_features = X.shape[1]

        if self.categorical_features is None:
            # Auto-detect: features with few unique values or non-float dtype
            self._cat_indices = []
            for i in range(n_features):
                unique = np.unique(X[:, i])
                # Consider categorical if <= 50 unique values and looks like integers
                if len(unique) <= 50 and np.allclose(unique, unique.astype(int)):
                    self._cat_indices.append(i)
        elif isinstance(self.categorical_features[0], str):
            # Feature names provided
            self._cat_indices = [
                feature_names.index(name)
                for name in self.categorical_features
                if name in feature_names
            ]
        else:
            # Indices provided
            self._cat_indices = list(self.categorical_features)

        self._cont_indices = [
            i for i in range(n_features) if i not in self._cat_indices
        ]

    def _compute_embedding_dim(self, cardinality: int, feature_name: str) -> int:
        """Compute embedding dimension for a categorical feature."""
        if isinstance(self.embedding_dims, dict):
            if feature_name in self.embedding_dims:
                return self.embedding_dims[feature_name]

        if isinstance(self.embedding_dims, int):
            return self.embedding_dims

        # Default rule: min(50, (cardinality + 1) // 2)
        return min(50, (cardinality + 1) // 2)

    def _prepare_data(
        self,
        X: np.ndarray,
        fit: bool = True,
    ) -> tuple[np.ndarray, np.ndarray]:
        """Prepare continuous and categorical data.

        Parameters
        ----------
        X : ndarray
            Input features.
        fit : bool
            Whether to fit encoders/scalers.

        Returns
        -------
        X_cont : ndarray
            Scaled continuous features.
        X_cat : ndarray
            Encoded categorical features.
        """
        # Continuous features
        if self._cont_indices:
            X_cont = X[:, self._cont_indices].astype(np.float32)
            if fit:
                self.scaler_ = StandardScaler()
                X_cont = self.scaler_.fit_transform(X_cont)
            else:
                X_cont = self.scaler_.transform(X_cont)
        else:
            X_cont = np.zeros((X.shape[0], 0), dtype=np.float32)

        # Categorical features
        if self._cat_indices:
            X_cat = np.zeros((X.shape[0], len(self._cat_indices)), dtype=np.int64)
            for j, i in enumerate(self._cat_indices):
                if fit:
                    self._cat_encoders[i] = LabelEncoder()
                    X_cat[:, j] = self._cat_encoders[i].fit_transform(X[:, i].astype(str))
                else:
                    # Handle unseen categories
                    col = X[:, i].astype(str)
                    known = set(self._cat_encoders[i].classes_)
                    col = np.array([c if c in known else self._cat_encoders[i].classes_[0] for c in col])
                    X_cat[:, j] = self._cat_encoders[i].transform(col)
        else:
            X_cat = np.zeros((X.shape[0], 0), dtype=np.int64)

        return X_cont, X_cat

    def _get_scheduler(
        self,
        optimizer: optim.Optimizer,
        n_epochs: int,
    ) -> Any | None:
        """Create learning rate scheduler."""
        if self.scheduler == "none":
            return None
        elif self.scheduler == "cosine":
            return optim.lr_scheduler.CosineAnnealingLR(
                optimizer, T_max=n_epochs, eta_min=1e-6
            )
        elif self.scheduler == "step":
            return optim.lr_scheduler.StepLR(
                optimizer, step_size=n_epochs // 3, gamma=0.1
            )
        elif self.scheduler == "plateau":
            return optim.lr_scheduler.ReduceLROnPlateau(
                optimizer, mode="min", factor=0.5, patience=5
            )
        else:
            raise ValueError(f"Unknown scheduler: {self.scheduler}")

    def _create_dataloader(
        self,
        X_cont: np.ndarray,
        X_cat: np.ndarray,
        y: np.ndarray,
        shuffle: bool = True,
    ) -> DataLoader:
        """Create a DataLoader."""
        X_cont_tensor = torch.FloatTensor(X_cont)
        X_cat_tensor = torch.LongTensor(X_cat)
        y_tensor = self._prepare_target_tensor(y)

        dataset = TensorDataset(X_cont_tensor, X_cat_tensor, y_tensor)
        return DataLoader(
            dataset,
            batch_size=self.batch_size,
            shuffle=shuffle,
            num_workers=0,
            pin_memory=self._device.type == "cuda",
        )

    def _prepare_target_tensor(self, y: np.ndarray) -> torch.Tensor:
        """Prepare target tensor (override in subclasses)."""
        raise NotImplementedError

    def _train_epoch(
        self,
        dataloader: DataLoader,
        optimizer: optim.Optimizer,
        criterion: nn.Module,
    ) -> float:
        """Train for one epoch."""
        self.model_.train()
        total_loss = 0.0
        n_batches = 0

        for X_cont, X_cat, y_batch in dataloader:
            X_cont = X_cont.to(self._device)
            X_cat = X_cat.to(self._device)
            y_batch = y_batch.to(self._device)

            optimizer.zero_grad()
            outputs = self.model_(X_cont, X_cat)
            loss = criterion(outputs, y_batch)
            loss.backward()
            optimizer.step()

            total_loss += loss.item()
            n_batches += 1

        return total_loss / n_batches

    def _validate_epoch(
        self,
        dataloader: DataLoader,
        criterion: nn.Module,
    ) -> float:
        """Validate for one epoch."""
        self.model_.eval()
        total_loss = 0.0
        n_batches = 0

        with torch.no_grad():
            for X_cont, X_cat, y_batch in dataloader:
                X_cont = X_cont.to(self._device)
                X_cat = X_cat.to(self._device)
                y_batch = y_batch.to(self._device)

                outputs = self.model_(X_cont, X_cat)
                loss = criterion(outputs, y_batch)

                total_loss += loss.item()
                n_batches += 1

        return total_loss / n_batches

    def _fit_impl(
        self,
        X: np.ndarray,
        y: np.ndarray,
        output_dim: int,
        criterion: nn.Module,
        val_data: tuple[np.ndarray, np.ndarray] | None = None,
    ) -> EndgameEstimator:
        """Internal fit implementation."""
        self._set_seed()
        self._device = self._get_device()

        # Get feature names
        self._feature_names = [f"f{i}" for i in range(X.shape[1])]

        # Infer feature indices
        self._infer_feature_indices(X, self._feature_names)

        # Prepare data
        X_cont, X_cat = self._prepare_data(X, fit=True)

        # Compute categorical dimensions
        self._categorical_dims = []
        for j, i in enumerate(self._cat_indices):
            cardinality = len(self._cat_encoders[i].classes_)
            embed_dim = self._compute_embedding_dim(cardinality, self._feature_names[i])
            self._categorical_dims.append((cardinality, embed_dim))

        # Create model
        self.model_ = _EmbeddingMLPModule(
            n_continuous=len(self._cont_indices),
            categorical_dims=self._categorical_dims,
            hidden_dims=self.hidden_dims,
            output_dim=output_dim,
            dropout=self.dropout,
            batch_norm=self.batch_norm,
            activation=self.activation,
            embedding_dropout=self.embedding_dropout,
        ).to(self._device)

        # Create optimizer and scheduler
        optimizer = optim.AdamW(
            self.model_.parameters(),
            lr=self.learning_rate,
            weight_decay=self.weight_decay,
        )
        scheduler = self._get_scheduler(optimizer, self.n_epochs)

        # Create dataloaders
        train_loader = self._create_dataloader(X_cont, X_cat, y, shuffle=True)

        val_loader = None
        if val_data is not None:
            X_val, y_val = val_data
            X_val_cont, X_val_cat = self._prepare_data(X_val, fit=False)
            val_loader = self._create_dataloader(X_val_cont, X_val_cat, y_val, shuffle=False)

        # Training loop
        best_val_loss = float("inf")
        best_state = None
        patience_counter = 0

        self._log(f"Training EmbeddingMLP on {self._device}...")
        self._log(f"Continuous features: {len(self._cont_indices)}, Categorical features: {len(self._cat_indices)}")

        for epoch in range(self.n_epochs):
            train_loss = self._train_epoch(train_loader, optimizer, criterion)
            self.history_["train_loss"].append(train_loss)

            if val_loader is not None:
                val_loss = self._validate_epoch(val_loader, criterion)
                self.history_["val_loss"].append(val_loss)

                if val_loss < best_val_loss:
                    best_val_loss = val_loss
                    best_state = {k: v.cpu().clone() for k, v in self.model_.state_dict().items()}
                    patience_counter = 0
                else:
                    patience_counter += 1

                if self.verbose and (epoch + 1) % 10 == 0:
                    self._log(
                        f"Epoch {epoch + 1}/{self.n_epochs}: "
                        f"train_loss={train_loss:.4f}, val_loss={val_loss:.4f}"
                    )

                if patience_counter >= self.early_stopping:
                    self._log(f"Early stopping at epoch {epoch + 1}")
                    break
            else:
                if self.verbose and (epoch + 1) % 10 == 0:
                    self._log(f"Epoch {epoch + 1}/{self.n_epochs}: train_loss={train_loss:.4f}")

            if scheduler is not None:
                if isinstance(scheduler, optim.lr_scheduler.ReduceLROnPlateau):
                    scheduler.step(train_loss if val_loader is None else val_loss)
                else:
                    scheduler.step()

        if best_state is not None:
            self.model_.load_state_dict(best_state)

        self._is_fitted = True
        return self

    def _predict_impl(self, X: np.ndarray) -> np.ndarray:
        """Internal predict implementation."""
        self._check_is_fitted()

        X_cont, X_cat = self._prepare_data(X, fit=False)

        X_cont_tensor = torch.FloatTensor(X_cont).to(self._device)
        X_cat_tensor = torch.LongTensor(X_cat).to(self._device)

        self.model_.eval()
        with torch.no_grad():
            outputs = self.model_(X_cont_tensor, X_cat_tensor)

        return outputs.cpu().numpy()

    def get_embeddings(self, feature: str | int) -> np.ndarray:
        """Get learned embeddings for a categorical feature.

        Parameters
        ----------
        feature : str or int
            Feature name or index.

        Returns
        -------
        ndarray of shape (cardinality, embedding_dim)
            Embedding weights.
        """
        self._check_is_fitted()

        if isinstance(feature, str):
            if feature not in self._feature_names:
                raise ValueError(f"Unknown feature: {feature}")
            feature_idx = self._feature_names.index(feature)
        else:
            feature_idx = feature

        if feature_idx not in self._cat_indices:
            raise ValueError(f"Feature {feature} is not categorical")

        cat_pos = self._cat_indices.index(feature_idx)
        return self.model_.get_embeddings(cat_pos)


[docs] class EmbeddingMLPClassifier(ClassifierMixin, _BaseEmbeddingMLP): """MLP classifier with entity embeddings for categorical features. Learns dense representations for categorical variables, enabling effective handling of high-cardinality features. Parameters ---------- categorical_features : List[str] or List[int], optional Names or indices of categorical features. If None, auto-detects based on unique values. embedding_dims : Dict[str, int] or int, optional Embedding dimensions per feature or default dimension. hidden_dims : List[int], default=[256, 128] Hidden layer dimensions. dropout : float, default=0.3 Dropout rate for hidden layers. embedding_dropout : float, default=0.1 Dropout rate for embeddings. batch_norm : bool, default=True Whether to use batch normalization. activation : str, default='relu' Activation function. learning_rate : float, default=1e-3 Initial learning rate. weight_decay : float, default=1e-5 L2 regularization strength. n_epochs : int, default=100 Maximum training epochs. batch_size : int, default=256 Training batch size. early_stopping : int, default=10 Early stopping patience. class_weight : str or dict, optional Class weights: 'balanced' or dict. scheduler : str, default='cosine' Learning rate scheduler. device : str, default='auto' Device: 'cuda', 'cpu', or 'auto'. random_state : int, optional Random seed. verbose : bool, default=False Enable verbose output. Attributes ---------- classes_ : ndarray Unique class labels. n_classes_ : int Number of classes. model_ : _EmbeddingMLPModule Fitted PyTorch model. history_ : dict Training history. Examples -------- >>> from endgame.models.neural import EmbeddingMLPClassifier >>> clf = EmbeddingMLPClassifier( ... categorical_features=['category', 'brand'], ... embedding_dims={'category': 10, 'brand': 8}, ... hidden_dims=[128, 64] ... ) >>> clf.fit(X_train, y_train, val_data=(X_val, y_val)) >>> predictions = clf.predict(X_test) >>> # Get learned embeddings >>> category_embeddings = clf.get_embeddings('category') """ _estimator_type = "classifier" def __init__( self, categorical_features: list[str] | list[int] | None = None, embedding_dims: dict[str, int] | int | None = None, hidden_dims: list[int] = None, dropout: float = 0.3, embedding_dropout: float = 0.1, batch_norm: bool = True, activation: str = "relu", learning_rate: float = 1e-3, weight_decay: float = 1e-5, n_epochs: int = 100, batch_size: int = 256, early_stopping: int = 10, class_weight: str | dict | None = None, scheduler: str = "cosine", device: str = "auto", random_state: int | None = None, verbose: bool = False, ): super().__init__( categorical_features=categorical_features, embedding_dims=embedding_dims, hidden_dims=hidden_dims, dropout=dropout, embedding_dropout=embedding_dropout, batch_norm=batch_norm, activation=activation, learning_rate=learning_rate, weight_decay=weight_decay, n_epochs=n_epochs, batch_size=batch_size, early_stopping=early_stopping, scheduler=scheduler, device=device, random_state=random_state, verbose=verbose, ) self.class_weight = class_weight self.classes_: np.ndarray | None = None self.n_classes_: int | None = None self._label_encoder: LabelEncoder | None = None self._class_weights: torch.Tensor | None = None def _prepare_target_tensor(self, y: np.ndarray) -> torch.Tensor: """Prepare target tensor for classification.""" return torch.LongTensor(y) def _compute_class_weights(self, y: np.ndarray) -> torch.Tensor | None: """Compute class weights.""" if self.class_weight is None: return None if self.class_weight == "balanced": from sklearn.utils.class_weight import compute_class_weight weights = compute_class_weight( "balanced", classes=np.unique(y), y=y ) return torch.FloatTensor(weights) if isinstance(self.class_weight, dict): weights = np.array([ self.class_weight.get(c, 1.0) for c in range(self.n_classes_) ]) return torch.FloatTensor(weights) return None
[docs] def fit( self, X, y, val_data: tuple[Any, Any] | None = None, ) -> EmbeddingMLPClassifier: """Fit the classifier. Parameters ---------- X : array-like of shape (n_samples, n_features) Training features. y : array-like of shape (n_samples,) Target labels. val_data : tuple of (X_val, y_val), optional Validation data for early stopping. Returns ------- self Fitted classifier. """ X_arr, y_arr = self._validate_data(X, y) # Encode labels self._label_encoder = LabelEncoder() y_encoded = self._label_encoder.fit_transform(y_arr) self.classes_ = self._label_encoder.classes_ self.n_classes_ = len(self.classes_) # Compute class weights self._class_weights = self._compute_class_weights(y_encoded) # Prepare validation data if val_data is not None: X_val, y_val = val_data X_val = self._to_numpy(X_val) y_val = self._label_encoder.transform(np.asarray(y_val)) val_data = (X_val, y_val) # Create criterion if self._class_weights is not None: criterion = nn.CrossEntropyLoss(weight=self._class_weights.to(self._get_device())) else: criterion = nn.CrossEntropyLoss() return self._fit_impl( X_arr, y_encoded, self.n_classes_, criterion, val_data )
[docs] def predict(self, X) -> np.ndarray: """Predict class labels.""" proba = self.predict_proba(X) indices = np.argmax(proba, axis=1) return self._label_encoder.inverse_transform(indices)
[docs] def predict_proba(self, X) -> np.ndarray: """Predict class probabilities.""" X_arr = self._to_numpy(X) logits = self._predict_impl(X_arr) # Apply softmax exp_logits = np.exp(logits - np.max(logits, axis=1, keepdims=True)) return exp_logits / np.sum(exp_logits, axis=1, keepdims=True)
[docs] class EmbeddingMLPRegressor(_BaseEmbeddingMLP, RegressorMixin): """MLP regressor with entity embeddings for categorical features. Learns dense representations for categorical variables, enabling effective handling of high-cardinality features. Parameters ---------- categorical_features : List[str] or List[int], optional Names or indices of categorical features. embedding_dims : Dict[str, int] or int, optional Embedding dimensions per feature or default dimension. hidden_dims : List[int], default=[256, 128] Hidden layer dimensions. dropout : float, default=0.3 Dropout rate for hidden layers. embedding_dropout : float, default=0.1 Dropout rate for embeddings. batch_norm : bool, default=True Whether to use batch normalization. activation : str, default='relu' Activation function. learning_rate : float, default=1e-3 Initial learning rate. weight_decay : float, default=1e-5 L2 regularization strength. n_epochs : int, default=100 Maximum training epochs. batch_size : int, default=256 Training batch size. early_stopping : int, default=10 Early stopping patience. loss : str, default='mse' Loss function: 'mse', 'mae', 'huber'. scheduler : str, default='cosine' Learning rate scheduler. device : str, default='auto' Device: 'cuda', 'cpu', or 'auto'. random_state : int, optional Random seed. verbose : bool, default=False Enable verbose output. Attributes ---------- model_ : _EmbeddingMLPModule Fitted PyTorch model. history_ : dict Training history. Examples -------- >>> from endgame.models.neural import EmbeddingMLPRegressor >>> reg = EmbeddingMLPRegressor( ... categorical_features=['store_id', 'product_id'], ... embedding_dims=16 ... ) >>> reg.fit(X_train, y_train, val_data=(X_val, y_val)) >>> predictions = reg.predict(X_test) """ _estimator_type = "regressor" def __init__( self, categorical_features: list[str] | list[int] | None = None, embedding_dims: dict[str, int] | int | None = None, hidden_dims: list[int] = None, dropout: float = 0.3, embedding_dropout: float = 0.1, batch_norm: bool = True, activation: str = "relu", learning_rate: float = 1e-3, weight_decay: float = 1e-5, n_epochs: int = 100, batch_size: int = 256, early_stopping: int = 10, loss: str = "mse", scheduler: str = "cosine", device: str = "auto", random_state: int | None = None, verbose: bool = False, ): super().__init__( categorical_features=categorical_features, embedding_dims=embedding_dims, hidden_dims=hidden_dims, dropout=dropout, embedding_dropout=embedding_dropout, batch_norm=batch_norm, activation=activation, learning_rate=learning_rate, weight_decay=weight_decay, n_epochs=n_epochs, batch_size=batch_size, early_stopping=early_stopping, scheduler=scheduler, device=device, random_state=random_state, verbose=verbose, ) self.loss = loss self._target_scaler: StandardScaler | None = None def _prepare_target_tensor(self, y: np.ndarray) -> torch.Tensor: """Prepare target tensor for regression.""" if y.ndim == 1: y = y.reshape(-1, 1) return torch.FloatTensor(y) def _get_criterion(self) -> nn.Module: """Get loss criterion.""" if self.loss == "mse": return nn.MSELoss() elif self.loss == "mae": return nn.L1Loss() elif self.loss == "huber": return nn.HuberLoss() else: raise ValueError(f"Unknown loss: {self.loss}")
[docs] def fit( self, X, y, val_data: tuple[Any, Any] | None = None, ) -> EmbeddingMLPRegressor: """Fit the regressor.""" X_arr, y_arr = self._validate_data(X, y) # Scale targets self._target_scaler = StandardScaler() if y_arr.ndim == 1: y_arr = y_arr.reshape(-1, 1) y_scaled = self._target_scaler.fit_transform(y_arr) output_dim = y_scaled.shape[1] # Prepare validation data if val_data is not None: X_val, y_val = val_data X_val = self._to_numpy(X_val) y_val = np.asarray(y_val) if y_val.ndim == 1: y_val = y_val.reshape(-1, 1) y_val = self._target_scaler.transform(y_val) val_data = (X_val, y_val) criterion = self._get_criterion() return self._fit_impl(X_arr, y_scaled, output_dim, criterion, val_data)
[docs] def predict(self, X) -> np.ndarray: """Predict target values.""" X_arr = self._to_numpy(X) predictions = self._predict_impl(X_arr) predictions = self._target_scaler.inverse_transform(predictions) if predictions.shape[1] == 1: predictions = predictions.ravel() return predictions