Hi,
I am trying to visualize time series data in parallel using DASK and Holoviews. When I call rasterize, the data is returned from memory to a single node.
import datashader as ds
import holoviews as hv
import xarray as xr
from dask import array as da
from dask.distributed import Client, progress
from dask_jobqueue import PBSCluster
from holoviews import opts
from holoviews.operation.datashader import rasterize
from bokeh.models import HoverTool
from natsort import natsorted
from pathlib import Path
import numpy as np
import shutil
import glob
import os
hv.extension('bokeh', config=dict(image_rtol=1000),logo=False)
hover = HoverTool(mode = 'vline')
/* schedule compute resources */
N_channels=1_200
N_steps_per_file=25000
N_files=500
data = da.random.normal(10, 0.1, size=(N_channels, N_files*N_steps_per_file), chunks=(N_channels,N_steps_per_file))
dset = xr.Dataset({"data": (("Channel","time"), data)},
coords={
"Channel": np.arange(N_channels),
"time": np.arange(N_steps_per_file*N_files)
})
dset.time.attrs["units"]='seconds'
hv_ds = hv.Dataset(dset)
image =hv.Image(hv_ds, kdims=["time","Channel"]).opts(colorbar=True,width=1200,height=500,cmap='jet',title='Data Field',invert_yaxis=True)
image=image.persist()
rasterImage = rasterize(image)
rasterImage
If anyone could provide some insight as to why this is happening, it would be greatly appreciated.