I have written a program to display multiple time-series curves (about 50) in the same panel. Because each time series contains around 1,000,000 lines of data, I use the downsample1d function (from holoviews.operation.downsample import downsample1d
) before plotting. However, even with just 10 curves plotted on the same panel, when I zoom in, the curves do not update promptly, I will wait for minutes for them to finish downsampling. Also I think the parameter width
in downsample1d does not work. Please correct me if I am wrong. I am wondering if there are any ways to accelerate the loading of these curves. Thank you! The following is a simple reproducible example:
import numpy as np
import pandas as pd
import holoviews as hv
from bokeh.models import DatetimeTickFormatter
from bokeh.models import CrosshairTool, Span
from holoviews.operation.downsample import downsample1d
import panel as pn
import param
hv.extension('bokeh')
pn.extension()
# generate 1,000,000 rows of data with columns: 'time', 55 columns with name 'f0' to 'f54' of random floats, and a coloumn 'is_sample' with about 50 True values
np.random.seed(0)
n_rows = 1_000_000
start_time = pd.Timestamp('2023-01-01')
time_values = pd.date_range(start_time, periods=n_rows, freq='S')
data = np.random.rand(n_rows, 55)
columns = [f'f{i}' for i in range(55)]
is_sample = np.zeros(n_rows, dtype=bool)
is_sample[np.random.choice(n_rows, 50, replace=False)] = True
df = pd.DataFrame(data, columns=columns)
df['time'] = time_values
df['is_sample'] = is_sample
def datetime_formatter(plot, element):
p = plot.handles['xaxis']
p.formatter = DatetimeTickFormatter(
seconds="%Y-%m-%d %H:%M:%S",
minsec="%Y-%m-%d %H:%M:%S",
minutes="%Y-%m-%d %H:%M",
hourmin="%Y-%m-%d %H:%M",
hours="%Y-%m-%d %H:%M",
days="%Y-%m-%d",
months="%Y-%m",
years="%Y"
)
def draw_sampled_curve(df, line_length, line_alpha, name, extra_name, cht, show_sample=False):
curve = hv.Curve(
data = (df['time'], df[name]), kdims=['Time'], vdims=[name], ).opts(width=1500, height=400,
tools=[cht if cht else 'crosshair'],
interpolation = 'steps-post',
hover_tooltips=[('Time', '@{Time}'),
('Value', f'@{{{name}}}')])
if show_sample:
df_pos = df.loc[(df[extra_name] >= 0.5) & (df['is_sample'] == 1)]
df_neg = df.loc[(df[extra_name] < 0.5) & (df['is_sample'] == 1)]
y_sampled_start_pos = df_pos[name] - line_length / 2
y_sampled_end_pos = df_pos[name] + line_length / 2
y_sampled_start_neg = df_neg[name] - line_length / 2
y_sampled_end_neg = df_neg[name] + line_length / 2
sample_plot_pos = hv.Segments((df_pos['time'], y_sampled_start_pos, df_pos['time'], y_sampled_end_pos), kdims=['Time', name, 'X End', 'Y End'], label='Type 1').opts(tools=[cht if cht else 'crosshair', 'box_select'],
color='green',
width=1500, height=400,
line_alpha=line_alpha,
hover_tooltips=[('Time', '@{Time}')])
sample_plot_neg = hv.Segments((df_neg['time'], y_sampled_start_neg, df_neg['time'], y_sampled_end_neg), kdims=['Time', name, 'X End', 'Y End'], label='Type 2').opts(tools=[cht if cht else 'crosshair', 'box_select'],
color='red',
width=1500, height=400,
line_alpha=line_alpha,
hover_tooltips=[('Time', '@{Time}')])
overlay = hv.Overlay([sample_plot_pos, sample_plot_neg, curve])
else:
overlay = hv.Overlay([curve]) # DynamicMap must only contain one type of object, not both Curve and Overlay
return overlay.opts(
hooks=[datetime_formatter],
xformatter="%Y-%m-%d %H:%M:%S",
width=1500, height=400,
title=f"Time Series of {name}",
legend_position='top_left',
)
class SampledDashboard(pn.viewable.Viewer):
downsampling_method = param.Selector(default='minmax-lttb', objects=['lttb', 'nth', 'm4', 'minmax-lttb'], doc="""
The algorithm to use for downsampling:
- `lttb`: Largest Triangle Three Buckets downsample algorithm.
- `nth`: Selects every n-th point (default: 400).
- `m4`: Selects the min, max, first and last value in each bin (requires tsdownsample).
- `minmax-lttb`: First selects n_out * minmax_ratio min and max values (default minmax_ratio = 4),
then further reduces these to n_out values using the
Largest Triangle Three Buckets algorithm (requires tsdownsample).""")
sample_line_length = param.Number(default=0.04, bounds=(0,None), doc="Length of sampled lines")
sample_line_alpha = param.Number(default=0.5, bounds=(0,1), doc="Alpha of sampled lines")
show_sample = param.Boolean(True)
df = param.DataFrame()
cht = param.ObjectSelector()
names = param.ListSelector()
extra_name = param.String()
@param.depends('downsampling_method')
def plot(self):
# guess signal names if not provided
curves = \
[downsample1d(hv.DynamicMap(param.rx(draw_sampled_curve)(
df = self.df,
line_length = self.param.sample_line_length,
line_alpha = self.param.sample_line_alpha,
show_sample = self.param.show_sample,
name = name,
extra_name = self.extra_name,
cht = self.cht
)), algorithm=self.downsampling_method, parallel = True, width=20)
for name
in self.names
]
if len(curves) == 1:
return curves[0]
overlay = hv.Overlay(curves).collate()
overlay.opts(
hooks=[datetime_formatter],
xformatter="%Y-%m-%d %H:%M:%S",
title=f"Time Series of All features",
legend_position='top_left',
ylabel='Features')
layout = hv.Layout(curves).cols(1)
return pn.Tabs(('Overlay', overlay),
('Layout', layout))
cht = CrosshairTool(overlay=Span(dimension="height"))
dashboard = SampledDashboard(df=df, names=columns, extra_name=columns[0], cht=cht, sample_line_length = 1, sample_line_alpha = 1)
param_panel = pn.Param(dashboard, parameters=['show_sample', 'sample_line_length', 'sample_line_alpha'], name="Plot Settings")
srv = pn.serve(pn.Column(param_panel, dashboard.plot()), start=True, threaded=True)