Source code for kooplearn.preprocessing

import numpy as np
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.utils.validation import check_array, check_is_fitted


[docs] class TimeDelayEmbedding(BaseEstimator, TransformerMixin): """ A scikit-learn compatible transformer that constructs time-delay embeddings (temporal windows) from trajectory data, with a configurable stride. Each output sample corresponds to a flattened temporal window of length :math:`H`, with a stride :math:`s` between the starting points of consecutive windows. Parameters ---------- history_length : int Number of consecutive time steps per embedding window (:math:`H`). stride : int, default=1 Step between the starts of successive windows (:math:`s`). Attributes ---------- n_samples_in_ : int Number of samples in the input data seen during fitting. n_features_in_ : int Number of features per sample in the input data. Notes ----- - The ``inverse_transform`` method **only works when ``stride=1``**. Using ``stride>1`` will raise a ``ValueError``, because reconstruction requires overlapping windows. Examples -------- >>> import numpy as np >>> traj = np.arange(20).reshape(10, 2) >>> tde = TimeDelayEmbedding(history_length=3, stride=1) >>> X = tde.fit_transform(traj) >>> X.shape (8, 6) >>> reconstructed = tde.inverse_transform(X) >>> np.allclose(traj, reconstructed, atol=1e-8) True >>> tde2 = TimeDelayEmbedding(history_length=3, stride=2) >>> X2 = tde2.fit_transform(traj) >>> X2.shape (4, 6) """ def __init__(self, history_length: int, stride: int = 1): self.history_length = history_length self.stride = stride
[docs] def fit(self, X, y=None): """ Fit the transformer by storing the input data shape. This method validates the input array and stores its dimensions for later use in transformations or inverse transformations. Parameters ---------- X : ndarray of shape (n_samples, n_features) Input trajectory data. y : None Ignored. Present for API compatibility with scikit-learn pipelines. Returns ------- self : TimeDelayEmbedding Fitted transformer instance. """ X = check_array(X, ensure_2d=True, dtype=float) self.n_samples_in_, self.n_features_in_ = X.shape self._is_fitted = True return self
[docs] def transform(self, X): """ Construct the time-delay embedding of the input trajectory. Builds overlapping or non-overlapping temporal windows of length ``history_length`` with ``stride`` between successive windows. Parameters ---------- X : ndarray of shape (n_samples, n_features) Input trajectory data to embed. Returns ------- ndarray of shape (n_windows, history_length * n_features) Time-delay embedded representation of the input data. Raises ------ ValueError If ``history_length`` exceeds the number of samples in ``X``. ValueError If ``stride`` is not a positive integer. """ check_is_fitted(self, ["n_samples_in_", "n_features_in_"]) X = check_array(X, ensure_2d=True, dtype=float) n_samples = X.shape[0] if self.history_length > n_samples: raise ValueError("history_length must not exceed number of samples.") if self.stride < 1: raise ValueError("stride must be a positive integer.") n_windows = (n_samples - self.history_length) // self.stride + 1 indices = ( np.arange(self.history_length)[None, :] + self.stride * np.arange(n_windows)[:, None] ) windows = X[indices] # shape: (n_windows, history_length, n_features) X_embedded = windows.reshape(n_windows, -1) return X_embedded
[docs] def fit_transform(self, X, y=None, **fit_params): return self.fit(X, y).transform(X)
[docs] def inverse_transform(self, X): """ Reconstruct input trajectory from flattened time-delay embeddings. This method reverses the transformation performed by :meth:`~TimeDelayEmbedding.transform`. It is only supported when ``stride=1``, since larger strides lead to non-overlapping windows and ambiguous reconstruction. Parameters ---------- X : ndarray of shape (n_windows, history_length * n_features_in_) Flattened time-delay embedded data. Returns ------- ndarray of shape (n_samples, n_features_in_) Approximate reconstruction of the original trajectory. Raises ------ ValueError If ``stride != 1``. ValueError If input shape is incompatible with ``history_length`` and the number of input features. """ check_is_fitted(self, ["n_samples_in_", "n_features_in_"]) X = np.asarray(X, dtype=float) if self.stride != 1: raise ValueError("inverse_transform only works when stride=1.") expected_width = self.history_length * self.n_features_in_ if X.ndim != 2 or X.shape[1] != expected_width: raise ValueError( f"Input must have shape (n_windows, {expected_width}), got {X.shape} instead." ) n_windows = X.shape[0] reconstructed_length = (n_windows - 1) * self.stride + self.history_length reconstructed = np.zeros((reconstructed_length, self.n_features_in_)) counts = np.zeros(reconstructed_length) for i in range(n_windows): start = i * self.stride end = start + self.history_length reconstructed[start:end] += X[i].reshape( self.history_length, self.n_features_in_ ) counts[start:end] += 1 reconstructed /= np.maximum(counts[:, np.newaxis], 1) return reconstructed
[docs] class FeatureFlattener(BaseEstimator, TransformerMixin): """ A scikit-learn compatible transformer that flattens multi-dimensional trajectories into a 2D array, and restores them to their original shape when inverted. This transformer is useful when working with models that expect 2D input (e.g., `(n_samples, n_features)`), but the data naturally has higher-order structure, e.g., images or spatio-temporal fields. Examples -------- >>> import numpy as np >>> from sklearn.pipeline import make_pipeline >>> from sklearn.preprocessing import StandardScaler >>> from kooplearn.preprocessing import FeatureFlattener >>> >>> X = np.random.rand(10, 4, 5) # e.g., 10 snapshots of a 4×5 field >>> flattener = FeatureFlattener() >>> X_flat = flattener.fit_transform(X) >>> X_flat.shape (10, 20) >>> X_reconstructed = flattener.inverse_transform(X_flat) >>> np.allclose(X, X_reconstructed) True """
[docs] def fit(self, X, y=None): """Store the original feature shape for later reconstruction. Parameters ---------- X : ndarray of shape (n_samples, ...) Input data with arbitrary feature dimensions. y : None Ignored. Present for API compatibility with scikit-learn pipelines. Returns ------- self : object Fitted transformer instance. """ self._feature_shape = X.shape[1:] self._is_fitted = True return self
[docs] def transform(self, X, y=None): """Flatten input features into a 2D array. Parameters ---------- X : ndarray of shape (n_samples, ...) Input data to flatten. y : None Ignored. Present for API compatibility with scikit-learn pipelines. Returns ------- ndarray of shape (n_samples, n_features) Flattened input data. """ n_samples = X.shape[0] return X.reshape(n_samples, -1)
[docs] def inverse_transform(self, X, y=None): """Restore flattened features to their original shape. Parameters ---------- X : ndarray of shape (n_samples, n_features) Flattened data to reconstruct. y : None Ignored. Present for API compatibility with scikit-learn pipelines. Returns ------- ndarray of shape (n_samples, ...) Data reshaped to the original feature dimensions. """ n_samples = X.shape[0] return X.reshape((n_samples,) + self._feature_shape)