Time Series Data and Embeddings¶
In [ ]:
Copied!
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import plotly.express as px
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import plotly.express as px
In [ ]:
Copied!
def plot_arrow_chart(
dataframe: pd.DataFrame,
x_col: str,
y_col: str,
ax: plt.Axes,
color: str = "k",
alpha: float = 0.7,
marker: str = ".",
linestyle: str = "-",
arrow_head_width: int = 4000,
) -> plt.Axes:
"""
Plot an arrow chart for the 'Total' and its lagged values
within a specified date range.
"""
x = dataframe[x_col].values
y = dataframe[y_col].values
ax.plot(x, y, marker=marker, linestyle=linestyle, color=color, alpha=alpha)
step = max(1, len(x) // 100)
for i in range(0, len(x) - 1, step):
ax.arrow(
x[i],
y[i],
x[i + 1] - x[i],
y[i + 1] - y[i],
shape="full",
lw=0,
length_includes_head=True,
head_width=arrow_head_width,
color=color,
alpha=alpha,
)
return ax
def plot_arrow_chart(
dataframe: pd.DataFrame,
x_col: str,
y_col: str,
ax: plt.Axes,
color: str = "k",
alpha: float = 0.7,
marker: str = ".",
linestyle: str = "-",
arrow_head_width: int = 4000,
) -> plt.Axes:
"""
Plot an arrow chart for the 'Total' and its lagged values
within a specified date range.
"""
x = dataframe[x_col].values
y = dataframe[y_col].values
ax.plot(x, y, marker=marker, linestyle=linestyle, color=color, alpha=alpha)
step = max(1, len(x) // 100)
for i in range(0, len(x) - 1, step):
ax.arrow(
x[i],
y[i],
x[i + 1] - x[i],
y[i + 1] - y[i],
shape="full",
lw=0,
length_includes_head=True,
head_width=arrow_head_width,
color=color,
alpha=alpha,
)
return ax
Pendulum¶
In [ ]:
Copied!
from ts_dl_utils.datasets.pendulum import Pendulum
from ts_dl_utils.datasets.pendulum import Pendulum
In [ ]:
Copied!
pen = Pendulum(length=20)
df_pen = pd.DataFrame(pen(3, 100, initial_angle=1, beta=0.01))
df_pen["theta_1"] = df_pen["theta"].shift()
df_pen["theta_diff"] = df_pen["theta"].diff()
df_pen
pen = Pendulum(length=20)
df_pen = pd.DataFrame(pen(3, 100, initial_angle=1, beta=0.01))
df_pen["theta_1"] = df_pen["theta"].shift()
df_pen["theta_diff"] = df_pen["theta"].diff()
df_pen
In [ ]:
Copied!
fig = plt.figure(figsize=(10, 8), layout="constrained")
spec = fig.add_gridspec(2, 2)
ax0 = fig.add_subplot(spec[0, :])
ax10 = fig.add_subplot(spec[1, 0])
ax11 = fig.add_subplot(spec[1, 1])
ax0.plot(
df_pen.t,
df_pen.theta,
marker=".",
linestyle="-",
color="k",
)
# Make x-ticks readable
ax0.xaxis.set_major_locator(plt.MaxNLocator(8))
# fig.autofmt_xdate(rotation=30)
ax0.set_title("Swing Angle")
ax10 = plot_arrow_chart(
df_pen, x_col="theta", y_col="theta_1", ax=ax10, arrow_head_width=0.00001
)
ax10.set_xlabel("Swing Angle")
ax10.set_ylabel("Swing Angle 0.05 seconds ago")
ax10.set_title("Swing Angle and Angle 0.05 seconds ago")
ax11 = plot_arrow_chart(
df_pen, x_col="theta", y_col="theta_diff", ax=ax11, arrow_head_width=0.00001
)
ax11.set_xlabel("Swing Angle")
ax11.set_ylabel("Swing Angle Change Rate")
ax11.set_title("Phase Portrait")
plt.tight_layout()
fig = plt.figure(figsize=(10, 8), layout="constrained")
spec = fig.add_gridspec(2, 2)
ax0 = fig.add_subplot(spec[0, :])
ax10 = fig.add_subplot(spec[1, 0])
ax11 = fig.add_subplot(spec[1, 1])
ax0.plot(
df_pen.t,
df_pen.theta,
marker=".",
linestyle="-",
color="k",
)
# Make x-ticks readable
ax0.xaxis.set_major_locator(plt.MaxNLocator(8))
# fig.autofmt_xdate(rotation=30)
ax0.set_title("Swing Angle")
ax10 = plot_arrow_chart(
df_pen, x_col="theta", y_col="theta_1", ax=ax10, arrow_head_width=0.00001
)
ax10.set_xlabel("Swing Angle")
ax10.set_ylabel("Swing Angle 0.05 seconds ago")
ax10.set_title("Swing Angle and Angle 0.05 seconds ago")
ax11 = plot_arrow_chart(
df_pen, x_col="theta", y_col="theta_diff", ax=ax11, arrow_head_width=0.00001
)
ax11.set_xlabel("Swing Angle")
ax11.set_ylabel("Swing Angle Change Rate")
ax11.set_title("Phase Portrait")
plt.tight_layout()
Covid¶
In [ ]:
Copied!
df_ecdc_covid = pd.read_csv(
"https://gist.githubusercontent.com/emptymalei/"
"90869e811b4aa118a7d28a5944587a64/raw"
"/1534670c8a3859ab3a6ae8e9ead6795248a3e664"
"/ecdc%2520covid%252019%2520data"
)
df_ecdc_covid = pd.read_csv(
"https://gist.githubusercontent.com/emptymalei/"
"90869e811b4aa118a7d28a5944587a64/raw"
"/1534670c8a3859ab3a6ae8e9ead6795248a3e664"
"/ecdc%2520covid%252019%2520data"
)
In [ ]:
Copied!
px.line(df_ecdc_covid, x="datetime", y="Total")
px.line(df_ecdc_covid, x="datetime", y="Total")
In [ ]:
Copied!
df_ecdc_covid
df_ecdc_covid
In [ ]:
Copied!
df_ecdc_covid_arrow_chart = df_ecdc_covid.loc[
pd.to_datetime(df_ecdc_covid.datetime).between("2020-08-01", "2020-12-01")
].copy()
df_ecdc_covid_arrow_chart["Total_1"] = df_ecdc_covid_arrow_chart["Total"].shift()
df_ecdc_covid_arrow_chart["Total_diff"] = df_ecdc_covid_arrow_chart["Total"].diff()
df_ecdc_covid_arrow_chart = df_ecdc_covid.loc[
pd.to_datetime(df_ecdc_covid.datetime).between("2020-08-01", "2020-12-01")
].copy()
df_ecdc_covid_arrow_chart["Total_1"] = df_ecdc_covid_arrow_chart["Total"].shift()
df_ecdc_covid_arrow_chart["Total_diff"] = df_ecdc_covid_arrow_chart["Total"].diff()
In [ ]:
Copied!
fig = plt.figure(figsize=(10, 8), layout="constrained")
spec = fig.add_gridspec(2, 2)
ax0 = fig.add_subplot(spec[0, :])
ax10 = fig.add_subplot(spec[1, 0])
ax11 = fig.add_subplot(spec[1, 1])
ax0.plot(
df_ecdc_covid_arrow_chart.datetime,
df_ecdc_covid_arrow_chart.Total,
marker=".",
linestyle="-",
color="k",
)
# Make x-ticks readable
ax0.xaxis.set_major_locator(plt.MaxNLocator(8))
# fig.autofmt_xdate(rotation=30)
ax0.set_title("Covid Cases in EU Over Time")
ax10 = plot_arrow_chart(
df_ecdc_covid_arrow_chart, x_col="Total", y_col="Total_1", ax=ax10
)
ax10.set_xlabel("Total Cases")
ax10.set_ylabel("Total Cases Lagged by 1 Day")
ax10.set_title("Covid Cases and Lagged Values")
ax11 = plot_arrow_chart(
df_ecdc_covid_arrow_chart, x_col="Total", y_col="Total_diff", ax=ax11
)
ax11.set_xlabel("Total Cases")
ax11.set_ylabel("Total Cases Change")
ax11.set_title("Covid Cases in EU Phase Portrait")
ax11.set_ylim(-100_000, 100_000)
plt.tight_layout()
fig = plt.figure(figsize=(10, 8), layout="constrained")
spec = fig.add_gridspec(2, 2)
ax0 = fig.add_subplot(spec[0, :])
ax10 = fig.add_subplot(spec[1, 0])
ax11 = fig.add_subplot(spec[1, 1])
ax0.plot(
df_ecdc_covid_arrow_chart.datetime,
df_ecdc_covid_arrow_chart.Total,
marker=".",
linestyle="-",
color="k",
)
# Make x-ticks readable
ax0.xaxis.set_major_locator(plt.MaxNLocator(8))
# fig.autofmt_xdate(rotation=30)
ax0.set_title("Covid Cases in EU Over Time")
ax10 = plot_arrow_chart(
df_ecdc_covid_arrow_chart, x_col="Total", y_col="Total_1", ax=ax10
)
ax10.set_xlabel("Total Cases")
ax10.set_ylabel("Total Cases Lagged by 1 Day")
ax10.set_title("Covid Cases and Lagged Values")
ax11 = plot_arrow_chart(
df_ecdc_covid_arrow_chart, x_col="Total", y_col="Total_diff", ax=ax11
)
ax11.set_xlabel("Total Cases")
ax11.set_ylabel("Total Cases Change")
ax11.set_title("Covid Cases in EU Phase Portrait")
ax11.set_ylim(-100_000, 100_000)
plt.tight_layout()
Walmart¶
In [ ]:
Copied!
df_walmart = pd.read_csv(
"https://raw.githubusercontent.com/datumorphism/"
"dataset-m5-simplified/refs/heads/main/dataset/"
"m5_store_sales.csv"
)
df_walmart = pd.read_csv(
"https://raw.githubusercontent.com/datumorphism/"
"dataset-m5-simplified/refs/heads/main/dataset/"
"m5_store_sales.csv"
)
In [ ]:
Copied!
df_walmart
df_walmart
In [ ]:
Copied!
px.line(df_walmart, x="date", y="CA")
px.line(df_walmart, x="date", y="CA")
In [ ]:
Copied!
df_walmart_total = df_walmart[["date", "CA", "TX", "WI"]].copy()
df_walmart_total = df_walmart[["date", "CA", "TX", "WI"]].copy()
In [ ]:
Copied!
df_walmart_total["total"] = (
df_walmart_total.CA + df_walmart_total.TX + df_walmart_total.WI
)
df_walmart_total["datetime"] = pd.to_datetime(df_walmart_total.date, format="%Y-%m-%d")
df_walmart_total["timestamp"] = df_walmart_total.datetime.astype(int) // 10**9
df_walmart_total["total"] = (
df_walmart_total.CA + df_walmart_total.TX + df_walmart_total.WI
)
df_walmart_total["datetime"] = pd.to_datetime(df_walmart_total.date, format="%Y-%m-%d")
df_walmart_total["timestamp"] = df_walmart_total.datetime.astype(int) // 10**9
In [ ]:
Copied!
df_walmart_total["total_1"] = df_walmart_total.total.shift()
df_walmart_total["total_diff"] = df_walmart_total.total.diff()
df_walmart_total["total_1"] = df_walmart_total.total.shift()
df_walmart_total["total_diff"] = df_walmart_total.total.diff()
In [ ]:
Copied!
px.scatter(
df_walmart_total.loc[pd.to_datetime(df_walmart_total.date).dt.year == 2016],
x="total",
y="total_1",
color="timestamp",
)
px.scatter(
df_walmart_total.loc[pd.to_datetime(df_walmart_total.date).dt.year == 2016],
x="total",
y="total_1",
color="timestamp",
)
In [ ]:
Copied!
df_walmart_arrow_chart = df_walmart_total.loc[
pd.to_datetime(df_walmart_total.date).between("2016-01-01", "2016-03-01")
].copy()
df_walmart_arrow_chart = df_walmart_total.loc[
pd.to_datetime(df_walmart_total.date).between("2016-01-01", "2016-03-01")
].copy()
In [ ]:
Copied!
fig = plt.figure(figsize=(10, 8), layout="constrained")
spec = fig.add_gridspec(2, 2)
ax0 = fig.add_subplot(spec[0, :])
ax10 = fig.add_subplot(spec[1, 0])
ax11 = fig.add_subplot(spec[1, 1])
ax0.plot(
df_walmart_arrow_chart.datetime,
df_walmart_arrow_chart.total,
marker=".",
linestyle="-",
color="k",
)
# Make x-ticks readable
ax0.xaxis.set_major_locator(plt.MaxNLocator(8))
# fig.autofmt_xdate(rotation=30)
ax0.set_title("Walmart Sales Over Time")
ax10 = plot_arrow_chart(
df_walmart_arrow_chart,
x_col="total",
y_col="total_1",
ax=ax10,
arrow_head_width=500,
)
ax10.set_xlabel("Total Sales")
ax10.set_ylabel("Total Sales Lagged by 1 Day")
ax10.set_title("Walmart Sales and Lagged Sales")
ax11 = plot_arrow_chart(
df_walmart_arrow_chart,
x_col="total",
y_col="total_diff",
ax=ax11,
arrow_head_width=500,
)
ax11.set_xlabel("Total Sales")
ax11.set_ylabel("Total Sales Change")
ax11.set_title("Walmart Sales Phase Portrait")
plt.tight_layout()
fig = plt.figure(figsize=(10, 8), layout="constrained")
spec = fig.add_gridspec(2, 2)
ax0 = fig.add_subplot(spec[0, :])
ax10 = fig.add_subplot(spec[1, 0])
ax11 = fig.add_subplot(spec[1, 1])
ax0.plot(
df_walmart_arrow_chart.datetime,
df_walmart_arrow_chart.total,
marker=".",
linestyle="-",
color="k",
)
# Make x-ticks readable
ax0.xaxis.set_major_locator(plt.MaxNLocator(8))
# fig.autofmt_xdate(rotation=30)
ax0.set_title("Walmart Sales Over Time")
ax10 = plot_arrow_chart(
df_walmart_arrow_chart,
x_col="total",
y_col="total_1",
ax=ax10,
arrow_head_width=500,
)
ax10.set_xlabel("Total Sales")
ax10.set_ylabel("Total Sales Lagged by 1 Day")
ax10.set_title("Walmart Sales and Lagged Sales")
ax11 = plot_arrow_chart(
df_walmart_arrow_chart,
x_col="total",
y_col="total_diff",
ax=ax11,
arrow_head_width=500,
)
ax11.set_xlabel("Total Sales")
ax11.set_ylabel("Total Sales Change")
ax11.set_title("Walmart Sales Phase Portrait")
plt.tight_layout()
Electricity Data¶
In [ ]:
Copied!
import io
import zipfile
import io
import zipfile
In [ ]:
Copied!
import pandas as pd
import pandas as pd
In [ ]:
Copied!
import requests
# Download from remote URL
data_uri = "https://archive.ics.uci.edu/ml/machine-learning-databases/00321/LD2011_2014.txt.zip"
r = requests.get(data_uri)
z = zipfile.ZipFile(io.BytesIO(r.content))
z.extractall("tmp/data/uci_electricity/")
import requests
# Download from remote URL
data_uri = "https://archive.ics.uci.edu/ml/machine-learning-databases/00321/LD2011_2014.txt.zip"
r = requests.get(data_uri)
z = zipfile.ZipFile(io.BytesIO(r.content))
z.extractall("tmp/data/uci_electricity/")
In [ ]:
Copied!
# Load as pandas dataframe
df_electricity = (
pd.read_csv("tmp/data/uci_electricity/LD2011_2014.txt", delimiter=";", decimal=",")
.rename(columns={"Unnamed: 0": "date"})
.set_index("date")
)
df_electricity.index = pd.to_datetime(df_electricity.index)
# Load as pandas dataframe
df_electricity = (
pd.read_csv("tmp/data/uci_electricity/LD2011_2014.txt", delimiter=";", decimal=",")
.rename(columns={"Unnamed: 0": "date"})
.set_index("date")
)
df_electricity.index = pd.to_datetime(df_electricity.index)
In [ ]:
Copied!
df_electricity
df_electricity
In [ ]:
Copied!
df_electricity.loc[
(df_electricity.index >= "2012-01-01") & (df_electricity.index < "2012-02-01")
][["MT_001"]].plot()
df_electricity.loc[
(df_electricity.index >= "2012-01-01") & (df_electricity.index < "2012-02-01")
][["MT_001"]].plot()
Contributors: