A Dataset Generated by Damped Pendulum¶

In this notebook, we demo a dataset we created to simulate the oscillations of a pendulumn.

In [ ]:

                
                    Copied!
                    
from functools import cached_property
from typing import List, Tuple
from functools import cached_property
from typing import List, Tuple

In [ ]:

                
                    Copied!
                    
                        
                        
                    
                    

            
import lightning as L
import matplotlib as mpl
import matplotlib.animation as animation
import matplotlib.pyplot as plt
import pandas as pd
from torchmetrics import MetricCollection
from torchmetrics.regression import (
    MeanAbsoluteError,
    MeanAbsolutePercentageError,
    MeanSquaredError,
    SymmetricMeanAbsolutePercentageError,
)
from ts_dl_utils.datasets.dataset import DataFrameDataset
from ts_dl_utils.datasets.pendulum import Pendulum, PendulumDataModule
from ts_dl_utils.evaluation.evaluator import Evaluator
from ts_dl_utils.naive_forecasters.last_observation import LastObservationForecaster
import lightning as L
import matplotlib as mpl
import matplotlib.animation as animation
import matplotlib.pyplot as plt
import pandas as pd
from torchmetrics import MetricCollection
from torchmetrics.regression import (
    MeanAbsoluteError,
    MeanAbsolutePercentageError,
    MeanSquaredError,
    SymmetricMeanAbsolutePercentageError,
)
from ts_dl_utils.datasets.dataset import DataFrameDataset
from ts_dl_utils.datasets.pendulum import Pendulum, PendulumDataModule
from ts_dl_utils.evaluation.evaluator import Evaluator
from ts_dl_utils.naive_forecasters.last_observation import LastObservationForecaster

Data¶

We create a dataset that models a damped pendulum. The pendulum is modelled as a damped harmonic oscillator, i.e.,

$\theta(t) = \theta(0) \cos(2 \pi t / p)\exp(-\beta t),$

where $\theta(t)$ is the angle of the pendulum at time $t$ . The period $p$ is calculated using

$p = 2 \pi \sqrt(L / g),$

with $L$ being the length of the pendulum and $g$ being the surface gravity.

In [ ]:

                
                    Copied!
                    
pen = Pendulum(length=200)
pen = Pendulum(length=200)

In [ ]:

                
                    Copied!
                    
df = pd.DataFrame(
    pen(num_periods=5, num_samples_per_period=100, initial_angle=1, beta=0.01)
)
df = pd.DataFrame(
    pen(num_periods=5, num_samples_per_period=100, initial_angle=1, beta=0.01)
)

Since the damping constant is very small, the data generated is mostly a sin wave.

In [ ]:

                
                    Copied!
                    
_, ax = plt.subplots(figsize=(10, 6.18))

df.plot(x="t", y="theta", ax=ax)
_, ax = plt.subplots(figsize=(10, 6.18))

df.plot(x="t", y="theta", ax=ax)

PyTorch and Lighting DataModule¶

In [ ]:

                
                    Copied!
                    
history_length = 100
horizon = 5
history_length = 100
horizon = 5

In [ ]:

                
                    Copied!
                    
ds = DataFrameDataset(dataframe=df, history_length=history_length, horizon=horizon)
ds = DataFrameDataset(dataframe=df, history_length=history_length, horizon=horizon)

In [ ]:

                
                    Copied!
                    
                        
                        
                    
                    

            
print(
    f"""
    There were {len(df)} rows in the dataframe\n
    We got {len(ds)} data points in the dataset (history length: {history_length}, horizon: {horizon})
    """
)
print(
    f"""
    There were {len(df)} rows in the dataframe\n
    We got {len(ds)} data points in the dataset (history length: {history_length}, horizon: {horizon})
    """
)

We can create a LightningDataModule for Lightning. When training/evaluating using Lightning, we only need to pass this object pdm to the training.

In [ ]:

                
                    Copied!
                    
pdm = PendulumDataModule(
    history_length=history_length, horizon=horizon, dataframe=df[["theta"]]
)
pdm = PendulumDataModule(
    history_length=history_length, horizon=horizon, dataframe=df[["theta"]]
)

Naive Forecasts¶

In [ ]:

                
                    Copied!
                    
prediction_truths = [i[1].squeeze() for i in pdm.predict_dataloader()]
prediction_truths = [i[1].squeeze() for i in pdm.predict_dataloader()]

In [ ]:

                
                    Copied!
                    
trainer_naive = L.Trainer(precision="64")

lobs_forecaster = LastObservationForecaster(horizon=horizon)
lobs_predictions = trainer_naive.predict(model=lobs_forecaster, datamodule=pdm)
trainer_naive = L.Trainer(precision="64")

lobs_forecaster = LastObservationForecaster(horizon=horizon)
lobs_predictions = trainer_naive.predict(model=lobs_forecaster, datamodule=pdm)

In [ ]:

                
                    Copied!
                    
evaluator = Evaluator(step=0)
evaluator = Evaluator(step=0)

In [ ]:

                
                    Copied!
                    
fig, ax = plt.subplots(figsize=(10, 6.18))

ax.plot(
    evaluator.y_true(dataloader=pdm.predict_dataloader()),
    "g-",
    label="truth",
)

ax.plot(evaluator.y(lobs_predictions), "b-.", label="naive predictions")

plt.legend()
fig, ax = plt.subplots(figsize=(10, 6.18))

ax.plot(
    evaluator.y_true(dataloader=pdm.predict_dataloader()),
    "g-",
    label="truth",
)

ax.plot(evaluator.y(lobs_predictions), "b-.", label="naive predictions")

plt.legend()

In [ ]:

                
                    Copied!
                    
evaluator.metrics(lobs_predictions, pdm.predict_dataloader())
evaluator.metrics(lobs_predictions, pdm.predict_dataloader())

Naive forecaster works well since we do not have dramatic changes between two time steps.

Delayed Embedding¶

In [ ]:

                
                    Copied!
                    
ds_de = DataFrameDataset(dataframe=df["theta"][:200], history_length=1, horizon=1)
ds_de = DataFrameDataset(dataframe=df["theta"][:200], history_length=1, horizon=1)

In [ ]:

                
                    Copied!
                    
                        
                        
                    
                    

            
class DelayedEmbeddingAnimation:
    """Builds an animation for univariate time series
    using delayed embedding.

    ```python
    fig, ax = plt.subplots(figsize=(10, 10))

    dea = DelayedEmbeddingAnimation(dataset=ds_de, fig=fig, ax=ax)
    ani = dea.build(interval=10, save_count=dea.time_steps)
    ani.save("results/pendulum_dataset/delayed_embedding_animation.mp4")
    ```

    :param dataset: a PyTorch dataset, input and target should have only length 1
    :param fig: figure object from matplotlib
    :param ax: axis object from matplotlib
    """

    def __init__(
        self, dataset: DataFrameDataset, fig: mpl.figure.Figure, ax: mpl.axes.Axes
    ):
        self.dataset = dataset
        self.ax = ax
        self.fig = fig

    @cached_property
    def data(self) -> List[Tuple[float, float]]:
        return [(i[0][0], i[1][0]) for i in self.dataset]

    @cached_property
    def x(self):
        return [i[0] for i in self.data]

    @cached_property
    def y(self):
        return [i[1] for i in self.data]

    def data_gen(self):
        for i in self.data:
            yield i

    def animation_init(self) -> mpl.axes.Axes:
        ax.plot(
            self.x,
            self.y,
        )
        ax.set_xlim([-1.1, 1.1])
        ax.set_ylim([-1.1, 1.1])
        ax.set_xlabel("t")
        ax.set_ylabel("t+1")

        return self.ax

    def animation_run(self, data: Tuple[float, float]) -> mpl.axes.Axes:
        x, y = data
        self.ax.scatter(x, y)
        return self.ax

    @cached_property
    def time_steps(self):
        return len(self.data)

    def build(self, interval: int = 10, save_count: int = 10):
        return animation.FuncAnimation(
            self.fig,
            self.animation_run,
            self.data_gen,
            interval=interval,
            init_func=self.animation_init,
            save_count=save_count,
        )


fig, ax = plt.subplots(figsize=(10, 10))

dea = DelayedEmbeddingAnimation(dataset=ds_de, fig=fig, ax=ax)

ani = dea.build(interval=10, save_count=dea.time_steps)

gif_writer = animation.PillowWriter(fps=5, metadata=dict(artist="Lei Ma"), bitrate=100)

ani.save("results/pendulum_dataset/delayed_embedding_animation.gif", writer=gif_writer)
# ani.save("results/pendulum_dataset/delayed_embedding_animation.mp4")
class DelayedEmbeddingAnimation:
    """Builds an animation for univariate time series
    using delayed embedding.

    ```python
    fig, ax = plt.subplots(figsize=(10, 10))

    dea = DelayedEmbeddingAnimation(dataset=ds_de, fig=fig, ax=ax)
    ani = dea.build(interval=10, save_count=dea.time_steps)
    ani.save("results/pendulum_dataset/delayed_embedding_animation.mp4")
    ```

    :param dataset: a PyTorch dataset, input and target should have only length 1
    :param fig: figure object from matplotlib
    :param ax: axis object from matplotlib
    """

    def __init__(
        self, dataset: DataFrameDataset, fig: mpl.figure.Figure, ax: mpl.axes.Axes
    ):
        self.dataset = dataset
        self.ax = ax
        self.fig = fig

    @cached_property
    def data(self) -> List[Tuple[float, float]]:
        return [(i[0][0], i[1][0]) for i in self.dataset]

    @cached_property
    def x(self):
        return [i[0] for i in self.data]

    @cached_property
    def y(self):
        return [i[1] for i in self.data]

    def data_gen(self):
        for i in self.data:
            yield i

    def animation_init(self) -> mpl.axes.Axes:
        ax.plot(
            self.x,
            self.y,
        )
        ax.set_xlim([-1.1, 1.1])
        ax.set_ylim([-1.1, 1.1])
        ax.set_xlabel("t")
        ax.set_ylabel("t+1")

        return self.ax

    def animation_run(self, data: Tuple[float, float]) -> mpl.axes.Axes:
        x, y = data
        self.ax.scatter(x, y)
        return self.ax

    @cached_property
    def time_steps(self):
        return len(self.data)

    def build(self, interval: int = 10, save_count: int = 10):
        return animation.FuncAnimation(
            self.fig,
            self.animation_run,
            self.data_gen,
            interval=interval,
            init_func=self.animation_init,
            save_count=save_count,
        )


fig, ax = plt.subplots(figsize=(10, 10))

dea = DelayedEmbeddingAnimation(dataset=ds_de, fig=fig, ax=ax)

ani = dea.build(interval=10, save_count=dea.time_steps)

gif_writer = animation.PillowWriter(fps=5, metadata=dict(artist="Lei Ma"), bitrate=100)

ani.save("results/pendulum_dataset/delayed_embedding_animation.gif", writer=gif_writer)
# ani.save("results/pendulum_dataset/delayed_embedding_animation.mp4")

Contributors: LM