I want the user to upload a file through the FileInput widget. This data needs to be accessed in a param.Parameterized class. The difficulty is handling the case in which no data has been uploaded yet.
As a minimal reproducible example, consider:
import panel as pn
import param
import pandas as pd
df = pd.read_excel("randomGeneratedData.xlsx")
pn.extension()
class dataSelector(param.Parameterized):
select = param.Selector(sorted(df.columns.to_list(), key=str.lower))
selector = dataSelector()
pn.WidgetBox(selector.param.select).servable()
I want to:
Not use a hardcoded df, but use a file uploaded by the user.
Prompt the user with a message “Please upload data” when no data has been uploaded yet.
Will need simplified somewhat as haven’t stripped out all the unecessary but maybe an idea
#Insperation from
#@Hoxbro - https://discourse.holoviz.org/t/panel-table-doesnt-update/3137
#@Marc - https://discourse.holoviz.org/t/how-to-create-multiple-instances-of-object-and-display-methods/3157/2
from io import StringIO
import numpy as np
import pandas as pd
import panel as pn
import param
import holoviews as hv
from holoviews.operation import decimate
pn.extension("tabulator", sizing_mode="stretch_width")
import hvplot.pandas # noqa must be called after pn.extension
ACCENT_COLOR="#0072B5"
from bokeh.core.enums import MarkerType
markers = list(MarkerType)
class Dataset(param.Parameterized):
value = param.DataFrame()
class ReactiveTable(pn.viewable.Viewer):
x = param.ObjectSelector(label="x")
y = param.ObjectSelector(label="y")
new_dataset_input = param.ClassSelector(class_=pn.widgets.FileInput, constant=True)##
dataset = param.Selector()
def __init__(self, **params):
params["new_dataset_input"]=pn.widgets.FileInput(accept = '.csv')
super().__init__(**params)
self.param.dataset.objects=[]
@pn.depends("new_dataset_input.value", watch = True)
def _parse_new_data_set(self):
value = self.new_dataset_input.value
if not value:
return
string_io = StringIO(value.decode("utf8"))
dataset = Dataset(value=pd.read_csv(string_io), name=self.new_dataset_input.filename) #instantiate Dataset class and utilise value, dataframe can now be called with dataset.value
existing_datasets = self.param.dataset.objects
self.param.dataset.objects=[*existing_datasets, dataset]
self.dataset = dataset
@pn.depends("dataset") #if dataset changes load new data to table
def output(self):
if self.dataset:
return
else:
return "No Datasets Loaded"
def __panel__(self):
return pn.template.FastListTemplate(
site="Holoviz",
sidebar = ["Upload Dataset",
self.new_dataset_input,
self.param.dataset,
],
main = [
pn.panel(self.output)#, loading_indicator=True) #disabled loading indicator, it's great but sometimes gets stuck in on position
],
title="Simply, An Underestimated Ecosystem of Tools",
theme="dark",
)
#Create random port automatically
from random import randrange
Port = randrange(4000, 8000)
#saves a lot of time changing between .servable and .show brilliant
if __name__ == "__main__":
app = ReactiveTable()
app.show(port=Port)
elif __name__.startswith("bokeh"):
app = ReactiveTable()
app.servable()
Thanks for the example. I feel like I am close, but my code still does not work. Since it also does not produce an error I am having a little bit trouble trying to debug it.
I changed the code I posted above to at least load xlsx now and made it more minimal
from io import BytesIO
import numpy as np
import pandas as pd
import panel as pn
import param
pn.extension()
class Dataset(param.Parameterized):
value = param.DataFrame()
class ReactiveTable(pn.viewable.Viewer):
new_dataset_input = param.ClassSelector(class_=pn.widgets.FileInput, constant=True)
dataset = param.Selector()
def __init__(self, **params):
params["new_dataset_input"]=pn.widgets.FileInput(accept = '.xlsx')
super().__init__(**params)
self.param.dataset.objects=[]
@pn.depends("new_dataset_input.value", watch=True)
def _parse_new_data_set(self):
if self.new_dataset_input.value:
string_io = BytesIO(self.new_dataset_input.value)
dataset = Dataset(value=pd.read_excel(string_io), name=self.new_dataset_input.filename)
existing_datasets = self.param.dataset.objects
self.param.dataset.objects=[*existing_datasets, dataset]
self.dataset = dataset
else:
return
@pn.depends("dataset")
def output(self):
if self.dataset:
return
else:
return "No Datasets Loaded"
def __panel__(self):
return pn.panel(pn.Column("Upload Dataset",
self.new_dataset_input,
self.param.dataset,
self.output
)
)
app = ReactiveTable()
app.show()
Once I had made those changes then I think I saw partially why your code isn’t running, it won’t display no datasets loaded because the function doesn’t run when I use it in jupyter lab, it also isn’t running as far as I can tell because it needs to watch for the value of the file. Once your inside you need to pass data to the BytesIO at the moment nothing is being passed as I can tell. Then the last line, I’m not sure because I’ve not really looked at it at all but you’ll be able to work on it now you know your getting data in with maybe something like, please excuse the multiple prints, I use it just to see where I get to and what errors out I should really learn to make use of the debugger.
import panel as pn
import param
import pandas as pd
import io
import holoviews as hv
pn.extension()
class DataSelector(param.Parameterized):
file_upload = pn.widgets.FileInput()
select = param.Selector()
@param.depends('file_upload.value', watch=True)
def refreshData(self):
print("I'm inside the def refreshdata")
if self.file_upload.value:
print("I have a value")
data = io.BytesIO(self.file_upload.value)
print("I have bytes IO")
df = pd.read_excel(data)
print("loaded data, dataframe below")
print(df.head())
print("now data is in it can be further processed")
else:
#This doesn't run because it needs to enter this def at least once without a value
self.select.object = "Please upload data"
data_selector = DataSelector()
pn.serve(pn.Column(
data_selector.file_upload,
data_selector.param.select
))
I really need the file_upload section to not be part of a class. Furthermore, apparently select has no .object attribute. I guess these are my two issues.
I cannot call the function, because I cannot use @param.depends(...) on a variable that is outside the class. I am do not know how to replace the selector with a string while no data has been uploaded yet. For example, for data of type param.DataFrame() I can use param.DataFrame().object
import panel as pn
import param
import pandas as pd
import io
pn.extension()
file_upload = pn.widgets.FileInput(accept=".xlsx")
class DataSelector(param.Parameterized):
select = param.Selector(["Please upload data"])
@param.depends('file_upload.value', watch=True)
def refreshData(self):
print("This is never printed")
if file_upload.value:
data = io.BytesIO()
file_upload.save(data)
data.seek(0)
df = pd.read_excel(data)
self.select = param.Selector(sorted(df.columns.to_list(), key=str.lower))
data_selector = DataSelector()
pn.Column(
file_upload,
data_selector.param.select
).servable()
Interestingly the following does work when the test button is pressed:
import panel as pn
import param
import pandas as pd
import io
pn.extension()
file_upload = pn.widgets.FileInput(accept=".xlsx")
class DataSelector(param.Parameterized):
select = param.Selector(["Please upload data"])
test = param.Event("test")
@param.depends('test', watch=True)
def refreshData(self):
print("This is printed")
if file_upload.value:
data = io.BytesIO()
file_upload.save(data)
data.seek(0)
df = pd.read_excel(data)
self.select = param.Selector(sorted(df.columns.to_list(), key=str.lower))
data_selector = DataSelector()
pn.Column(
file_upload,
data_selector.param.select,
data_selector.param.test
).servable()
So the problem must be in @param.depends('file_upload.value', watch=True).
UPDATE: Upon further inspection, the following code is close (with an additional unwanted button), but it claims the arguments of select cannot be overwritten:
import panel as pn
import param
import pandas as pd
import io
pn.extension()
file_upload = pn.widgets.FileInput(accept=".xlsx")
class DataSelector(param.Parameterized):
select = param.Selector(["Please upload data"])
test = param.Event("test")
@param.depends('test', watch=True)
def refreshData(self):
print("This is printed")
if file_upload.value:
data = io.BytesIO()
file_upload.save(data)
data.seek(0)
df = pd.read_excel(data)
self.select = sorted(df.columns.to_list(), key=str.lower)
data_selector = DataSelector()
pn.Column(
file_upload,
data_selector.param.select,
data_selector.param.test
).servable()
After pressing the test button, the following error is displayed: [a, b, c] not in parameter select's list of possible objects, valid options include [Please upload data] where [a, b, c] are the columns in my excel file
self.select for self.param.select.objects think that’ll help. Maybe be able to bind the file input to the class, I don’t know if possible though but if it is might be able to remove the extra button.