Accelerate loading multiple downsampled curves in Holoviews

ajsfgaksjfdb · April 24, 2025, 10:40am

I have written a program to display multiple time-series curves (about 50) in the same panel. Because each time series contains around 1,000,000 lines of data, I use the downsample1d function (from holoviews.operation.downsample import downsample1d) before plotting. However, even with just 10 curves plotted on the same panel, when I zoom in, the curves do not update promptly, I will wait for minutes for them to finish downsampling. Also I think the parameter width in downsample1d does not work. Please correct me if I am wrong. I am wondering if there are any ways to accelerate the loading of these curves. Thank you! The following is a simple reproducible example:

import numpy as np
import pandas as pd
import holoviews as hv
from bokeh.models import DatetimeTickFormatter
from bokeh.models import CrosshairTool, Span
from holoviews.operation.downsample import downsample1d
import panel as pn
import param
hv.extension('bokeh')
pn.extension()


# generate 1,000,000 rows of data with columns: 'time', 55 columns with name 'f0' to 'f54' of random floats, and a coloumn 'is_sample' with about 50 True values
np.random.seed(0)
n_rows = 1_000_000
start_time = pd.Timestamp('2023-01-01')
time_values = pd.date_range(start_time, periods=n_rows, freq='S')
data = np.random.rand(n_rows, 55)
columns = [f'f{i}' for i in range(55)]
is_sample = np.zeros(n_rows, dtype=bool)
is_sample[np.random.choice(n_rows, 50, replace=False)] = True
df = pd.DataFrame(data, columns=columns)
df['time'] = time_values
df['is_sample'] = is_sample


def datetime_formatter(plot, element):
    p = plot.handles['xaxis']
    p.formatter = DatetimeTickFormatter(
        seconds="%Y-%m-%d %H:%M:%S",
        minsec="%Y-%m-%d %H:%M:%S",
        minutes="%Y-%m-%d %H:%M",
        hourmin="%Y-%m-%d %H:%M",
        hours="%Y-%m-%d %H:%M",
        days="%Y-%m-%d",
        months="%Y-%m",
        years="%Y"
    )

def draw_sampled_curve(df, line_length, line_alpha, name, extra_name, cht, show_sample=False):
    curve =  hv.Curve(
            data = (df['time'], df[name]), kdims=['Time'], vdims=[name], ).opts(width=1500, height=400,
              tools=[cht if cht else 'crosshair'],
              interpolation = 'steps-post',
              hover_tooltips=[('Time', '@{Time}'),
                              ('Value', f'@{{{name}}}')])
    if show_sample:
        df_pos = df.loc[(df[extra_name] >= 0.5) & (df['is_sample'] == 1)]
        df_neg = df.loc[(df[extra_name] < 0.5) & (df['is_sample'] == 1)]
        y_sampled_start_pos = df_pos[name] - line_length / 2
        y_sampled_end_pos = df_pos[name] + line_length / 2
        y_sampled_start_neg = df_neg[name] - line_length / 2
        y_sampled_end_neg = df_neg[name] + line_length / 2
        sample_plot_pos = hv.Segments((df_pos['time'], y_sampled_start_pos, df_pos['time'], y_sampled_end_pos), kdims=['Time', name, 'X End', 'Y End'], label='Type 1').opts(tools=[cht if cht else 'crosshair', 'box_select'],
            color='green',
            width=1500, height=400,
            line_alpha=line_alpha,
            hover_tooltips=[('Time', '@{Time}')])
        sample_plot_neg = hv.Segments((df_neg['time'], y_sampled_start_neg, df_neg['time'], y_sampled_end_neg), kdims=['Time', name, 'X End', 'Y End'], label='Type 2').opts(tools=[cht if cht else 'crosshair', 'box_select'],
            color='red',
            width=1500, height=400,
            line_alpha=line_alpha,
            hover_tooltips=[('Time', '@{Time}')])
        overlay = hv.Overlay([sample_plot_pos, sample_plot_neg, curve])
    else:
        overlay = hv.Overlay([curve]) # DynamicMap must only contain one type of object, not both Curve and Overlay
    return overlay.opts(
              hooks=[datetime_formatter],
              xformatter="%Y-%m-%d %H:%M:%S",
              width=1500, height=400,
              title=f"Time Series of {name}",
              legend_position='top_left',
          )



class SampledDashboard(pn.viewable.Viewer):
    downsampling_method = param.Selector(default='minmax-lttb', objects=['lttb', 'nth', 'm4', 'minmax-lttb'], doc="""
        The algorithm to use for downsampling:

        - `lttb`: Largest Triangle Three Buckets downsample algorithm.
        - `nth`: Selects every n-th point (default: 400).
        - `m4`: Selects the min, max, first and last value in each bin (requires tsdownsample).
        - `minmax-lttb`: First selects n_out * minmax_ratio min and max values (default minmax_ratio = 4),
                         then further reduces these to n_out values using the
                         Largest Triangle Three Buckets algorithm (requires tsdownsample).""")
    sample_line_length = param.Number(default=0.04, bounds=(0,None), doc="Length of sampled lines")
    sample_line_alpha = param.Number(default=0.5, bounds=(0,1), doc="Alpha of sampled lines")
    show_sample = param.Boolean(True)
    df = param.DataFrame()
    cht = param.ObjectSelector()
    names = param.ListSelector()
    extra_name = param.String()


    @param.depends('downsampling_method')
    def plot(self):
        # guess signal names if not provided
        curves = \
            [downsample1d(hv.DynamicMap(param.rx(draw_sampled_curve)(
            df = self.df,
            line_length = self.param.sample_line_length,
            line_alpha = self.param.sample_line_alpha,
            show_sample = self.param.show_sample,
            name = name,
            extra_name = self.extra_name,
            cht = self.cht
        )), algorithm=self.downsampling_method, parallel = True, width=20)
         for name
         in self.names
         ]
        if len(curves) == 1:
            return curves[0]

        overlay = hv.Overlay(curves).collate()
        overlay.opts(
            hooks=[datetime_formatter],
            xformatter="%Y-%m-%d %H:%M:%S",
            title=f"Time Series of All features",
            legend_position='top_left',
            ylabel='Features')

        layout = hv.Layout(curves).cols(1)

        return pn.Tabs(('Overlay', overlay),
                       ('Layout', layout))


cht = CrosshairTool(overlay=Span(dimension="height"))
dashboard = SampledDashboard(df=df, names=columns, extra_name=columns[0], cht=cht, sample_line_length = 1, sample_line_alpha = 1)
param_panel = pn.Param(dashboard, parameters=['show_sample', 'sample_line_length', 'sample_line_alpha'], name="Plot Settings")
srv = pn.serve(pn.Column(param_panel, dashboard.plot()), start=True, threaded=True)

jbednar · April 26, 2025, 6:16pm

Have you tried the LTTB downsampling? See the large time series page in the hvplot docs for details.

ajsfgaksjfdb · April 28, 2025, 2:12am

Hi @jbednar, thank you for your reply! I’ve tried LTTB before, but I thought method like minmax-lttb uses tsdownsample which is faster and more efficient. Correct me if I am wrong. Thank you!

jbednar · April 30, 2025, 4:03pm

Never mind me; I thought that downsample1d was a simpler decimation algorithm, but I just checked and it’s LTTB (via tsdownsample if available). So never mind that!

Anyway, please file a bug report with a fully reproducible example of the the width bug, and a separate issue presenting timings for a reproducible downsample example that you’d expect to be faster.

ajsfgaksjfdb · April 30, 2025, 4:08pm

I will do that. Thank you!