How to plot a huge image out of memory?

Expanded a bit on your example from Larger than memory plotting: how to set initial slice to plot - #8 by ahuang11 and am very happy with the result :slight_smile:

Here my zarr file contains two arrays that weigh ~50GB each. Depending on the zoom-level (controlled through the max_size), only every ith pixel is sampled for datashading.

One thing I haven’t managed to figure out yet is how I could make the cmap and cbar_limit options interactive.

import numpy as np
import holoviews as hv
import holoviews.streams
import xarray as xr
from holoviews.operation.datashader import rasterize
import param
import panel as pn
import numpy as np

hv.extension("bokeh")


class BigDataViz(param.Parameterized):

    dm_view = param.ClassSelector(class_=hv.DynamicMap, precedence=0)
    dm_shaded_view = param.ClassSelector(class_=hv.DynamicMap, precedence=0)

    ds: xr.DataArray = param.ClassSelector(class_=xr.Dataset, precedence=0)

    array: str = param.Selector()

    cmap: str = param.Selector(
        default="Viridis",
        objects=["Viridis", "gray", "PiYG", "flag", "Set1"],
        precedence=0,
    )
    cbar_limit: tuple = param.Tuple(default=None, length=2, precedence=0)

    max_size: int = param.Integer(default=int(7e7))

    i: int = param.Integer(default=1)
    npoints: int = param.Integer(precedence=0)

    def __init__(self, **params):

        params["ds"] = params["ds"].sortby(list(params["ds"].coords), ascending=True)

        super().__init__(**params)

        spatial_vars = [x for x in ds.data_vars if len(ds[x].dims) == 2]
        self.param["array"].objects = spatial_vars
        self.param.update(array=spatial_vars[0])

        self.init_range = self.determine_middle()

        self.range_xy = holoviews.streams.RangeXY(
            x_range=self.init_range[0], y_range=self.init_range[1]
        )

        self.dm_view = hv.DynamicMap(
            self.load_data,
            streams=[
                self.range_xy,
                self.param["array"],
                self.param["max_size"],
            ],
        )

        self.dm_shaded_view = rasterize(self.dm_view).opts(
            cmap=self.cmap,
            clim=self.cbar_limit,
            colorbar=True,
            tools=["hover"],
            responsive=True,
            framewise=True,
        )

    def determine_middle(self):
        blocksize = np.sqrt(self.max_size).astype(int)
        bb = {dim: int((ds[dim].size - blocksize) / 2) for dim in ds[self.array].dims}
        x = self.ds[self.array].isel(
            {dim: slice(idx, idx + blocksize) for dim, idx in bb.items()}
        )
        return [
            (float(x[dim].min().values), float(x[dim].max().values))
            for dim in ds[self.array].dims
        ]

    def load_data(self, x_range, y_range, array, max_size):
        i = 0
        data: xr.DataArray = self.ds[array]
        dims = self.ds[array].dims
        while data.size > max_size:
            i += 1
            kwargs = {
                dim: slice(*range_, i) for dim, range_ in zip(dims, [x_range, y_range])
            }
            data = self.ds[array].sel(**kwargs)
        self.param.update(i=i, npoints=data.size)
        image = hv.Image(data.compute(), kdims=list(dims))
        return image

    def serve(self):
        pn.serve(
            pn.Row(
                pn.pane.HoloViews(
                        self.dm_shaded_view, 
                        sizing_mode="stretch_both"
                    ),
                pn.Param(
                    self.param, 
                    display_threshold=2),
                sizing_mode="stretch_both",
                styles={"background-color": "yellow"}
            ),
        )


if __name__ == "__main__":

    file_path = "my_data.zarr3"

    ds = xr.open_zarr(file_path).transpose(
        # NOTE first dimension of variable will map to X-axis, second to Y-axis.
        "x", "y",
    ).drop_vars("spatial_ref")

    bdv = BigDataViz(ds=ds, cbar_limit=(-2, 1))

    bdv.serve()

My dataset looks like this:

<xarray.Dataset> Size: 91GB
Dimensions:                     (x: 120960, y: 47040)
Coordinates:
  * x                           (x) float64 968kB -180.0 -180.0 ... 180.0 180.0
  * y                           (y) float64 376kB 80.0 80.0 ... -59.99 -60.0
Data variables:
    KGE  (x, y) float64 46GB dask.array<chunksize=(512, 512), meta=np.ndarray>
    NSE  (x, y) float64 46GB dask.array<chunksize=(512, 512), meta=np.ndarray>
Attributes:
    units:      -
    name:       r
    long_name:  Pearson Correlation Coefficient (WAPOR - CLMS, etlook)
1 Like