from __future__ import annotations
"""Entropy measures for signal complexity analysis.
Provides sklearn-compatible entropy feature extractors:
- Permutation entropy
- Sample entropy
- Approximate entropy
- SVD entropy
- Spectral entropy
These measures quantify signal complexity and irregularity,
commonly used in EEG/biosignal analysis.
References
----------
- Bandt & Pompe (2002): Permutation entropy
- Richman & Moorman (2000): Sample entropy
- Pincus (1991): Approximate entropy
- antropy library: Algorithm implementations
"""
import numpy as np
from scipy import signal as scipy_signal
from scipy.special import comb
from endgame.signal.base import (
BaseFeatureExtractor,
ensure_2d_signals,
)
def _embed(x: np.ndarray, order: int, delay: int) -> np.ndarray:
"""Time-delay embedding of a signal.
Parameters
----------
x : np.ndarray
1D signal.
order : int
Embedding dimension.
delay : int
Time delay (lag).
Returns
-------
np.ndarray
Embedded signal of shape (n_vectors, order).
"""
n = len(x)
n_vectors = n - (order - 1) * delay
if n_vectors <= 0:
raise ValueError(
f"Signal too short for embedding: {n} samples, "
f"order={order}, delay={delay}"
)
embedded = np.zeros((n_vectors, order))
for i in range(order):
embedded[:, i] = x[i * delay : i * delay + n_vectors]
return embedded
def _count_neighbors(embedded: np.ndarray, r: float, metric: str = "chebyshev") -> int:
"""Count pairs of vectors within distance r.
Parameters
----------
embedded : np.ndarray
Embedded signal of shape (n_vectors, dimension).
r : float
Tolerance threshold.
metric : str
Distance metric: 'chebyshev' (max norm) or 'euclidean'.
Returns
-------
int
Number of pairs within distance r.
"""
n = len(embedded)
count = 0
for i in range(n - 1):
for j in range(i + 1, n):
if metric == "chebyshev":
dist = np.max(np.abs(embedded[i] - embedded[j]))
else:
dist = np.sqrt(np.sum((embedded[i] - embedded[j]) ** 2))
if dist < r:
count += 1
return count
[docs]
def permutation_entropy(
x: np.ndarray,
order: int = 3,
delay: int = 1,
normalize: bool = True,
) -> float:
"""Compute permutation entropy of a signal.
Permutation entropy measures the complexity of a time series by
analyzing the order relations between successive values.
Parameters
----------
x : np.ndarray
1D signal.
order : int, default=3
Embedding dimension (typically 3-7).
delay : int, default=1
Time delay between elements.
normalize : bool, default=True
If True, normalize by log2(order!).
Returns
-------
float
Permutation entropy value.
References
----------
Bandt, C., & Pompe, B. (2002). Permutation entropy: a natural complexity
measure for time series. Physical review letters, 88(17), 174102.
"""
x = np.asarray(x).flatten()
n = len(x)
# Create embedded vectors
n_vectors = n - (order - 1) * delay
if n_vectors <= 0:
return np.nan
# Get ordinal patterns
from math import factorial
n_patterns = factorial(order)
pattern_counts = np.zeros(n_patterns)
for i in range(n_vectors):
# Extract embedding vector
vec = x[i : i + order * delay : delay]
# Get permutation pattern (argsort gives ranking)
pattern = tuple(np.argsort(vec))
# Convert pattern to index (simple hash)
idx = sum(p * factorial(order - 1 - j) for j, p in enumerate(pattern))
pattern_counts[int(idx % n_patterns)] += 1
# Compute entropy from probability distribution
probs = pattern_counts / n_vectors
probs = probs[probs > 0] # Remove zeros
entropy = -np.sum(probs * np.log2(probs))
if normalize:
entropy /= np.log2(n_patterns)
return entropy
[docs]
def sample_entropy(
x: np.ndarray,
order: int = 2,
r: float | None = None,
) -> float:
"""Compute sample entropy of a signal.
Sample entropy measures the complexity of a time series based on
approximate entropy but without counting self-matches.
Parameters
----------
x : np.ndarray
1D signal.
order : int, default=2
Embedding dimension.
r : float, optional
Tolerance threshold. If None, uses 0.2 * std(x).
Returns
-------
float
Sample entropy value.
References
----------
Richman, J. S., & Moorman, J. R. (2000). Physiological time-series analysis
using approximate entropy and sample entropy.
"""
x = np.asarray(x).flatten()
n = len(x)
if r is None:
r = 0.2 * np.std(x, ddof=1)
if r <= 0:
return np.nan
# Count matches for embedding dimension m
embedded_m = _embed(x, order, 1)
count_m = _count_neighbors(embedded_m, r)
# Count matches for embedding dimension m+1
embedded_m1 = _embed(x, order + 1, 1)
count_m1 = _count_neighbors(embedded_m1, r)
# Avoid log(0)
if count_m == 0 or count_m1 == 0:
return np.nan
# Normalize by number of possible pairs
n_m = len(embedded_m)
n_m1 = len(embedded_m1)
phi_m = count_m / comb(n_m, 2)
phi_m1 = count_m1 / comb(n_m1, 2)
return -np.log(phi_m1 / phi_m)
[docs]
def approximate_entropy(
x: np.ndarray,
order: int = 2,
r: float | None = None,
) -> float:
"""Compute approximate entropy of a signal.
Approximate entropy measures the likelihood that similar patterns
remain similar on the next comparison.
Parameters
----------
x : np.ndarray
1D signal.
order : int, default=2
Embedding dimension.
r : float, optional
Tolerance threshold. If None, uses 0.2 * std(x).
Returns
-------
float
Approximate entropy value.
References
----------
Pincus, S. M. (1991). Approximate entropy as a measure of system complexity.
Proceedings of the National Academy of Sciences, 88(6), 2297-2301.
"""
x = np.asarray(x).flatten()
n = len(x)
if r is None:
r = 0.2 * np.std(x, ddof=1)
if r <= 0:
return np.nan
def phi(m):
"""Count template matches for dimension m."""
embedded = _embed(x, m, 1)
n_vec = len(embedded)
# Count matches including self-matches
counts = np.zeros(n_vec)
for i in range(n_vec):
for j in range(n_vec):
if np.max(np.abs(embedded[i] - embedded[j])) < r:
counts[i] += 1
# Normalize and compute mean log
return np.mean(np.log(counts / n_vec))
return phi(order) - phi(order + 1)
[docs]
def svd_entropy(
x: np.ndarray,
order: int = 3,
delay: int = 1,
normalize: bool = True,
) -> float:
"""Compute SVD entropy of a signal.
SVD entropy measures the complexity of a time series using singular
value decomposition of the embedded signal.
Parameters
----------
x : np.ndarray
1D signal.
order : int, default=3
Embedding dimension.
delay : int, default=1
Time delay.
normalize : bool, default=True
If True, normalize by log2(order).
Returns
-------
float
SVD entropy value.
"""
x = np.asarray(x).flatten()
# Create embedded matrix
embedded = _embed(x, order, delay)
# SVD
_, s, _ = np.linalg.svd(embedded, full_matrices=False)
# Normalize singular values
s_norm = s / np.sum(s)
s_norm = s_norm[s_norm > 0]
# Compute entropy
entropy = -np.sum(s_norm * np.log2(s_norm))
if normalize:
entropy /= np.log2(order)
return entropy
[docs]
def spectral_entropy(
x: np.ndarray,
fs: float,
method: str = "welch",
nperseg: int | None = None,
normalize: bool = True,
) -> float:
"""Compute spectral entropy of a signal.
Spectral entropy measures the flatness of the power spectrum,
indicating how spread the spectral energy is across frequencies.
Parameters
----------
x : np.ndarray
1D signal.
fs : float
Sample rate in Hz.
method : str, default='welch'
PSD estimation method: 'welch' or 'fft'.
nperseg : int, optional
Segment length for Welch's method.
normalize : bool, default=True
If True, normalize by log2(n_freqs).
Returns
-------
float
Spectral entropy value.
"""
x = np.asarray(x).flatten()
if method == "welch":
nperseg = nperseg if nperseg is not None else min(256, len(x))
freqs, psd = scipy_signal.welch(x, fs=fs, nperseg=nperseg)
else:
from scipy.fft import rfft, rfftfreq
fft_vals = np.abs(rfft(x)) ** 2
freqs = rfftfreq(len(x), 1 / fs)
psd = fft_vals
# Normalize to probability distribution
psd_norm = psd / np.sum(psd)
psd_norm = psd_norm[psd_norm > 0]
# Compute entropy
entropy = -np.sum(psd_norm * np.log2(psd_norm))
if normalize:
entropy /= np.log2(len(psd_norm))
return entropy
[docs]
class PermutationEntropy(BaseFeatureExtractor):
"""Permutation entropy feature extractor.
Parameters
----------
fs : float, optional
Sample rate in Hz.
order : int, default=3
Embedding dimension.
delay : int, default=1
Time delay.
normalize : bool, default=True
Normalize entropy by maximum possible value.
Examples
--------
>>> pe = PermutationEntropy(order=3)
>>> features = pe.fit_transform(signals)
"""
def __init__(
self,
fs: float | None = None,
order: int = 3,
delay: int = 1,
normalize: bool = True,
):
super().__init__(fs=fs)
self.order = order
self.delay = delay
self.normalize = normalize
[docs]
def fit(self, X, y=None, **fit_params) -> PermutationEntropy:
X = self._validate_signal(X)
super().fit(X, y, **fit_params)
self.feature_names_ = ["permutation_entropy"]
return self
[docs]
class SampleEntropy(BaseFeatureExtractor):
"""Sample entropy feature extractor.
Parameters
----------
fs : float, optional
Sample rate in Hz.
order : int, default=2
Embedding dimension.
r : float, optional
Tolerance threshold. If None, uses 0.2 * std(x).
Examples
--------
>>> se = SampleEntropy(order=2)
>>> features = se.fit_transform(signals)
"""
def __init__(
self,
fs: float | None = None,
order: int = 2,
r: float | None = None,
):
super().__init__(fs=fs)
self.order = order
self.r = r
[docs]
def fit(self, X, y=None, **fit_params) -> SampleEntropy:
X = self._validate_signal(X)
super().fit(X, y, **fit_params)
self.feature_names_ = ["sample_entropy"]
return self
[docs]
class ApproximateEntropy(BaseFeatureExtractor):
"""Approximate entropy feature extractor.
Parameters
----------
fs : float, optional
Sample rate in Hz.
order : int, default=2
Embedding dimension.
r : float, optional
Tolerance threshold. If None, uses 0.2 * std(x).
Examples
--------
>>> ae = ApproximateEntropy(order=2)
>>> features = ae.fit_transform(signals)
"""
def __init__(
self,
fs: float | None = None,
order: int = 2,
r: float | None = None,
):
super().__init__(fs=fs)
self.order = order
self.r = r
[docs]
def fit(self, X, y=None, **fit_params) -> ApproximateEntropy:
X = self._validate_signal(X)
super().fit(X, y, **fit_params)
self.feature_names_ = ["approximate_entropy"]
return self
[docs]
class SpectralEntropy(BaseFeatureExtractor):
"""Spectral entropy feature extractor.
Parameters
----------
fs : float
Sample rate in Hz.
method : str, default='welch'
PSD estimation method.
nperseg : int, optional
Segment length for Welch.
normalize : bool, default=True
Normalize by maximum entropy.
Examples
--------
>>> se = SpectralEntropy(fs=256)
>>> features = se.fit_transform(signals)
"""
def __init__(
self,
fs: float,
method: str = "welch",
nperseg: int | None = None,
normalize: bool = True,
):
super().__init__(fs=fs)
self.method = method
self.nperseg = nperseg
self.normalize = normalize
[docs]
def fit(self, X, y=None, **fit_params) -> SpectralEntropy:
X = self._validate_signal(X)
super().fit(X, y, **fit_params)
self.feature_names_ = ["spectral_entropy"]
return self
[docs]
class SVDEntropy(BaseFeatureExtractor):
"""SVD entropy feature extractor.
Parameters
----------
fs : float, optional
Sample rate in Hz.
order : int, default=3
Embedding dimension.
delay : int, default=1
Time delay.
normalize : bool, default=True
Normalize by maximum entropy.
Examples
--------
>>> svde = SVDEntropy(order=3)
>>> features = svde.fit_transform(signals)
"""
def __init__(
self,
fs: float | None = None,
order: int = 3,
delay: int = 1,
normalize: bool = True,
):
super().__init__(fs=fs)
self.order = order
self.delay = delay
self.normalize = normalize
[docs]
def fit(self, X, y=None, **fit_params) -> SVDEntropy:
X = self._validate_signal(X)
super().fit(X, y, **fit_params)
self.feature_names_ = ["svd_entropy"]
return self