How to use data uploaded through a FileInput widget in a param.Parameterized class and handle situation where no data has been uploaded yet?

I want the user to upload a file through the FileInput widget. This data needs to be accessed in a param.Parameterized class. The difficulty is handling the case in which no data has been uploaded yet.

As a minimal reproducible example, consider:

import panel as pn
import param
import pandas as pd

df = pd.read_excel("randomGeneratedData.xlsx")
pn.extension()

class dataSelector(param.Parameterized):
    select = param.Selector(sorted(df.columns.to_list(), key=str.lower))

selector = dataSelector()
pn.WidgetBox(selector.param.select).servable()

I want to:

  • Not use a hardcoded df, but use a file uploaded by the user.
  • Prompt the user with a message “Please upload data” when no data has been uploaded yet.

This post is inspired by this one: How to handle the scenario is which a figure updates based on a selected data set, but no data has been chosen yet? - #15 by randomBloke. However, I believe the solution needed for a param.Parameterized class is very different and therefore made a separate post.

Hi @randomBloke,

Will need simplified somewhat as haven’t stripped out all the unecessary but maybe an idea

#Insperation from
#@Hoxbro - https://discourse.holoviz.org/t/panel-table-doesnt-update/3137
#@Marc - https://discourse.holoviz.org/t/how-to-create-multiple-instances-of-object-and-display-methods/3157/2

from io import StringIO

import numpy as np
import pandas as pd
import panel as pn
import param
import holoviews as hv
from holoviews.operation import decimate
pn.extension("tabulator", sizing_mode="stretch_width")
import hvplot.pandas  # noqa must be called after pn.extension
ACCENT_COLOR="#0072B5"

from bokeh.core.enums import MarkerType
markers = list(MarkerType)

class Dataset(param.Parameterized):
    value = param.DataFrame()
    
class ReactiveTable(pn.viewable.Viewer):
    x                    = param.ObjectSelector(label="x")
    y                    = param.ObjectSelector(label="y")

    new_dataset_input    = param.ClassSelector(class_=pn.widgets.FileInput, constant=True)##
    dataset              = param.Selector()

    def __init__(self, **params):
        params["new_dataset_input"]=pn.widgets.FileInput(accept = '.csv')
        super().__init__(**params)
        self.param.dataset.objects=[]
        
    @pn.depends("new_dataset_input.value", watch = True)
    def _parse_new_data_set(self):
        value = self.new_dataset_input.value
        if not value:
            return
        
        string_io = StringIO(value.decode("utf8"))
        dataset = Dataset(value=pd.read_csv(string_io), name=self.new_dataset_input.filename) #instantiate Dataset class and utilise value, dataframe can now be called with dataset.value
        
        existing_datasets = self.param.dataset.objects
        self.param.dataset.objects=[*existing_datasets, dataset]
        
        self.dataset = dataset
        

    @pn.depends("dataset") #if dataset changes load new data to table
    def output(self):
        if self.dataset:
            return
        else:
            return "No Datasets Loaded"
        
        
    def __panel__(self):
        
        return pn.template.FastListTemplate(
            site="Holoviz",
            sidebar = ["Upload Dataset", 
                       self.new_dataset_input, 
                       self.param.dataset,
                      ],
            main = [
                    pn.panel(self.output)#, loading_indicator=True) #disabled loading indicator, it's great but sometimes gets stuck in on position
                   ],
            
            title="Simply, An Underestimated Ecosystem of Tools",
            theme="dark",
        )

#Create random port automatically
from random import randrange
Port = randrange(4000, 8000)    

#saves a lot of time changing between .servable and .show brilliant
if __name__ == "__main__":
    app = ReactiveTable()
    app.show(port=Port)
elif __name__.startswith("bokeh"):
    app = ReactiveTable()
    app.servable()

@carl

Thanks for the example. I feel like I am close, but my code still does not work. Since it also does not produce an error I am having a little bit trouble trying to debug it.

Any idea why this does not work:

import panel as pn
import param
import pandas as pd
import io

pn.extension()

file_upload = pn.widgets.FileInput(accept=".xlsx")

class DataSelector(param.Parameterized):
    select = param.Selector()

    def refreshData(self):
        if file_upload.value:
            data = io.BytesIO()
            file_upload.save(data)
            data.seek(0)
            df = pd.read_excel(data)
            
            self.select.object = param.Selector(sorted(df.columns.to_list(), key=str.lower)) # (1)
        else:
            self.select.object = "Please upload data"

data_selector = DataSelector()

pn.serve(pn.Column(
    file_upload,
    data_selector.param.select
))

I also tried replacing the line with (1) with:

  • self.select.object = sorted(df.columns.to_list(), key=str.lower)
  • self.param['select'].set_param(objects=sorted(df.columns.to_list(), key=str.lower))

Hi @randomBloke,

I changed the code I posted above to at least load xlsx now and made it more minimal

from io import BytesIO
import numpy as np
import pandas as pd
import panel as pn
import param

pn.extension()

class Dataset(param.Parameterized):
    value = param.DataFrame()
    
class ReactiveTable(pn.viewable.Viewer):
    new_dataset_input    = param.ClassSelector(class_=pn.widgets.FileInput, constant=True)
    dataset              = param.Selector()

    def __init__(self, **params):
        params["new_dataset_input"]=pn.widgets.FileInput(accept = '.xlsx')
        super().__init__(**params)
        self.param.dataset.objects=[]
        
    @pn.depends("new_dataset_input.value", watch=True)
    def _parse_new_data_set(self):
        if self.new_dataset_input.value:
            string_io = BytesIO(self.new_dataset_input.value)
            dataset = Dataset(value=pd.read_excel(string_io), name=self.new_dataset_input.filename)
            existing_datasets = self.param.dataset.objects
            self.param.dataset.objects=[*existing_datasets, dataset]
            self.dataset = dataset
        else:
            return
        
    @pn.depends("dataset")
    def output(self):
        if self.dataset:
            return
        else:
            return "No Datasets Loaded"
            
    def __panel__(self):
        
        return pn.panel(pn.Column("Upload Dataset", 
                                  self.new_dataset_input, 
                                  self.param.dataset,
                                  self.output
                                 )
                       )

    
app = ReactiveTable()
app.show()

Once I had made those changes then I think I saw partially why your code isn’t running, it won’t display no datasets loaded because the function doesn’t run when I use it in jupyter lab, it also isn’t running as far as I can tell because it needs to watch for the value of the file. Once your inside you need to pass data to the BytesIO at the moment nothing is being passed as I can tell. Then the last line, I’m not sure because I’ve not really looked at it at all but you’ll be able to work on it now you know your getting data in with maybe something like, please excuse the multiple prints, I use it just to see where I get to and what errors out I should really learn to make use of the debugger.

import panel as pn
import param
import pandas as pd
import io
import holoviews as hv

pn.extension()

class DataSelector(param.Parameterized):
    file_upload = pn.widgets.FileInput()
    select = param.Selector()

    @param.depends('file_upload.value', watch=True)
    def refreshData(self):
        print("I'm inside the def refreshdata")
        if self.file_upload.value:
            print("I have a value")
            data = io.BytesIO(self.file_upload.value)
            print("I have bytes IO")
            df = pd.read_excel(data)
            print("loaded data, dataframe below")
            print(df.head())
            print("now data is in it can be further processed")
        else:
            #This doesn't run because it needs to enter this def at least once without a value
            self.select.object = "Please upload data"

data_selector = DataSelector()

pn.serve(pn.Column(
    data_selector.file_upload,
    data_selector.param.select
))

I really need the file_upload section to not be part of a class. Furthermore, apparently select has no .object attribute. I guess these are my two issues.

I cannot call the function, because I cannot use @param.depends(...) on a variable that is outside the class. I am do not know how to replace the selector with a string while no data has been uploaded yet. For example, for data of type param.DataFrame() I can use param.DataFrame().object

1 Like

One step closer:


import panel as pn
import param
import pandas as pd
import io

pn.extension()

file_upload = pn.widgets.FileInput(accept=".xlsx")

class DataSelector(param.Parameterized):
    select = param.Selector(["Please upload data"])

    @param.depends('file_upload.value', watch=True)
    def refreshData(self):
        print("This is never printed")
        if file_upload.value:
            data = io.BytesIO()
            file_upload.save(data)
            data.seek(0)
            df = pd.read_excel(data)
            
            self.select = param.Selector(sorted(df.columns.to_list(), key=str.lower))

data_selector = DataSelector()

pn.Column(
    file_upload,
    data_selector.param.select
).servable()

Interestingly the following does work when the test button is pressed:


import panel as pn
import param
import pandas as pd
import io

pn.extension()

file_upload = pn.widgets.FileInput(accept=".xlsx")

class DataSelector(param.Parameterized):
    select = param.Selector(["Please upload data"])
    test = param.Event("test")
    
    @param.depends('test', watch=True)
    def refreshData(self):
        print("This is printed")
        if file_upload.value:
            data = io.BytesIO()
            file_upload.save(data)
            data.seek(0)
            df = pd.read_excel(data)
            
            self.select = param.Selector(sorted(df.columns.to_list(), key=str.lower))

data_selector = DataSelector()

pn.Column(
    file_upload,
    data_selector.param.select,
    data_selector.param.test
).servable()

So the problem must be in @param.depends('file_upload.value', watch=True).

UPDATE: Upon further inspection, the following code is close (with an additional unwanted button), but it claims the arguments of select cannot be overwritten:

import panel as pn
import param
import pandas as pd
import io

pn.extension()

file_upload = pn.widgets.FileInput(accept=".xlsx")

class DataSelector(param.Parameterized):
    select = param.Selector(["Please upload data"])
    test = param.Event("test")
    
    @param.depends('test', watch=True)
    def refreshData(self):
        print("This is printed")
        if file_upload.value:
            data = io.BytesIO()
            file_upload.save(data)
            data.seek(0)
            df = pd.read_excel(data)
            
            self.select = sorted(df.columns.to_list(), key=str.lower)

data_selector = DataSelector()

pn.Column(
    file_upload,
    data_selector.param.select,
    data_selector.param.test
).servable()

After pressing the test button, the following error is displayed: [a, b, c] not in parameter select's list of possible objects, valid options include [Please upload data] where [a, b, c] are the columns in my excel file

If you change

self.select for self.param.select.objects think that’ll help. Maybe be able to bind the file input to the class, I don’t know if possible though but if it is might be able to remove the extra button.

Wow, replacing self.select with self.param.select.objects seems like a magic solution! Thank you so much for this.

Now I just need to find a way to get rid of the test button.

I decided to make a separate topic for how to watch for changes in the global variable file_input, because this seems slightly off-topic. The solution was given here: Inside a class, how to watch for changes in a global variable? - #2 by carl. Special thanks to @carl

For documentation purposes, the final code needed to close this topic is:

import panel as pn
import param
import pandas as pd 
import io

pn.extension()

file_upload = pn.widgets.FileInput(accept=".xlsx")

class DataSelector(param.Parameterized):
    select = param.Selector(["Please upload data"])
    
    def __init__(self, file_upload, **params):
        super().__init__(**params)
        self.file_upload = file_upload
        
    def file_callback(self, event):
        if event.new:
            data = io.BytesIO()
            self.file_upload.save(data)
            data.seek(0)
            df = pd.read_excel(data)
            self.param.select.objects = sorted(df.columns.to_list(), key=str.lower)
            
data_selector = DataSelector(file_upload=file_upload) 
file_upload.param.watch(data_selector.file_callback, 'value')

pn.Column(
    file_upload, 
    data_selector.param.select
).servable()
1 Like