How to filter on Panel dataframe output

Hello team,
I’m new to Panel and I love this amazing tool. Recently, I’m running into an issue with a use case and hope I can get some directions from experts in this forum. My use case is to build a Panel app and query the database. Then I need to split dataframe output to several smaller ones based on column filter condition. I’m able to query the database but unable to filter the dataframe output . Below is my code for your reference. Much appreciate if anyone can share some lights on this topic!

Error message I have:

Below is the script for your reference:
import panel as pn
import param
import datetime
pn.extension()
DATE_BOUNDS = (datetime.date(2000, 1, 1), datetime.datetime.now().date())

class query_db(param.Parameterized):
account_id = param.String(default=str(‘12345’), doc=‘Account ID’)
start_date = param.Date(default=(datetime.date.today() - datetime.timedelta(days=365)), bounds=DATE_BOUNDS)
end_date = param.Date(default=datetime.date.today(), bounds=DATE_BOUNDS)
button = param.Action(default=lambda x: x.param.trigger(‘button’), label=‘Run db query!’)

@param.depends("button")
def df_return(self): 
    #df=snowflake_run(self.account_id,cs,self.start_date.strftime('%Y-%m-%d'),self.end_date.strftime('%Y-%m-%d')) 
    # create a dummy dataset below for illustration 
    df = pd.DataFrame({
                        'int': [1, 2, 3],
                        'float': [3.14, 6.28, 9.42],
                        'type': ['A', 'B', 'C'],
                        'bool': [True, False, True],
                        'date': [dt.date(2019, 1, 1), dt.date(2020, 1, 1), dt.date(2020, 1, 10)],
                        'datetime': [dt.datetime(2019, 1, 1, 10), dt.datetime(2020, 1, 1, 12), dt.datetime(2020, 1, 10, 13)]
                    }, index=[1, 2, 3])
    return df

class Athlete(param.Parameterized):
db = param.ClassSelector(class_=query_db, default=query_db())

athlete = Athlete()

def new_class(cls, **kwargs):
“Creates a new class which overrides parameter defaults.”
return type(type(cls).name, (cls,), kwargs)

query_condition = pn.Param(
athlete.db,
default_layout=new_class(pn.GridBox, ncols=3),
show_name=False,
widgets = {
“button”: {“button_type”: “primary”},
“start_date”: pn.widgets.DatePicker,
“end_date”: pn.widgets.DatePicker } )

def return_tab():
return pn.panel(athlete.db.df_return, loading_indicator=True)

df1=return_tab()

df_A=df1._pane[df1._pane.type==‘A’]

pn.Column(query_condition,df1,df_A)

I don’t think you should be referencing an internal variable, _pane.

I believe what you want is df1.object which (if I scanned this correctly) returns a pd.DataFrame.

@ahuang11 Andrew, thanks for your reply! However, I tried df1.object but it did not work. It doesn’t recognize the column “type” from the dataframe. In my use case, I need to split the dataframe output into separate ones based on column “type”. Thanks.


AttributeError Traceback (most recent call last)
Cell In[11], line 1
----> 1 df1.object[df1.object.type==‘A’]

AttributeError: ‘function’ object has no attribute ‘type’

I see. Can you fix the formatting of your code so that I can copy/paste it and test?

Use three backticks on a new line, like this

`` <-- one more
code here on a new line
`` <-- one more backtick

Here is the sample code below. Thanks for looking into it

import panel as pn
import param
import datetime 
import pandas as pd
pn.extension()
DATE_BOUNDS = (datetime.date(2000, 1, 1), datetime.datetime.now().date())

class query_db(param.Parameterized):
    account_id = param.String(default=str('12345'), doc='Account ID')
    start_date = param.Date(default=(datetime.date.today() - datetime.timedelta(days=365)), bounds=DATE_BOUNDS)
    end_date = param.Date(default=datetime.date.today(), bounds=DATE_BOUNDS) 
    button = param.Action(default=lambda x: x.param.trigger('button'), label='Run db query!')  
    
    @param.depends("button")
    def df_return(self): 
        #df=snowflake_run(self.account_id,cs,self.start_date.strftime('%Y-%m-%d'),self.end_date.strftime('%Y-%m-%d')) 
        # create a dummy dataset below for illustration 
        df = pd.DataFrame({
                            'int': [1, 2, 3],
                            'float': [3.14, 6.28, 9.42],
                            'type': ['A', 'B', 'C'],
                            'bool': [True, False, True],
                            'date': [dt.date(2019, 1, 1), dt.date(2020, 1, 1), dt.date(2020, 1, 10)],
                            'datetime': [dt.datetime(2019, 1, 1, 10), dt.datetime(2020, 1, 1, 12), dt.datetime(2020, 1, 10, 13)]
                        }, index=[1, 2, 3])
        return df
        
class Athlete(param.Parameterized): 
    db = param.ClassSelector(class_=query_db, default=query_db())
    
athlete = Athlete()

def new_class(cls, **kwargs):
    "Creates a new class which overrides parameter defaults."
    return type(type(cls).__name__, (cls,), kwargs)

query_condition = pn.Param(
    athlete.db,
    default_layout=new_class(pn.GridBox, ncols=3),
    show_name=False,
    widgets = {
        "button": {"button_type": "primary"},
        "start_date": pn.widgets.DatePicker, 
        "end_date": pn.widgets.DatePicker  } )

def return_tab():
    return pn.panel(athlete.db.df_return, loading_indicator=True)  
 
df1=return_tab()
 
df_A=df1._pane[df1._pane.type=='A']

pn.Column(query_condition,df1,df_A)

I think I figure it out the solution below to split the dataframe based on column “string” value. However, I noticed I’m only able to get the static value for 2nd dataframe “df_A”. How do I fix this so I can desired output below for “df_A” if I change the Input_string value to be ‘True’ ? I tried to use callback but failed with Prama.Action. Thanks!

import panel as pn
import param
import datetime 
import pandas as pd
pn.extension()
pn.extension('tabulator')
DATE_BOUNDS = (datetime.date(2000, 1, 1), datetime.datetime.now().date())

class query_db(param.Parameterized):
    input_string = param.String(default=str('False'), doc='Account ID')
    start_date = param.Date(default=(datetime.date.today() - datetime.timedelta(days=365)), bounds=DATE_BOUNDS)
    end_date = param.Date(default=datetime.date.today(), bounds=DATE_BOUNDS) 
    button = param.Action(default=lambda x: x.param.trigger('button'), label='Run db query!')  
    
    @param.depends("button")
    def df_return(self): 
        #df=snowflake_run(self.account_id,cs,self.start_date.strftime('%Y-%m-%d'),self.end_date.strftime('%Y-%m-%d')) 
        # create a dummy dataset below for illustration 
        df = pd.DataFrame({
                            'int': [1, 2, 3], 
                            'type': ['B','A', 'C'],
                            'string': [self.input_string, self.input_string, 'True'],
                            'date': [dt.date(2019, 1, 1), dt.date(2020, 1, 1), dt.date(2020, 1, 10)] 
                        }, index=[1, 2, 3])
        return pn.widgets.Tabulator( df,pagination='local',page_size=6,show_index=False ) 
        
class Athlete(param.Parameterized): 
    db = param.ClassSelector(class_=query_db, default=query_db())
    
athlete = Athlete()

def new_class(cls, **kwargs):
    "Creates a new class which overrides parameter defaults."
    return type(type(cls).__name__, (cls,), kwargs)

query_condition = pn.Param(
    athlete.db,
    default_layout=new_class(pn.GridBox, ncols=3),
    show_name=False,
    widgets = {
        "button": {"button_type": "primary"},
        "start_date": pn.widgets.DatePicker, 
        "end_date": pn.widgets.DatePicker  } )

def return_tab():
    return pn.panel(athlete.db.df_return, loading_indicator=True)  
 
df1=return_tab()
 
df_A=df1._pane.value[df1._pane.value.string=='True']

pn.Column(query_condition,df1,df_A)

There’s probably a more elegant way to do this, but without diving too deep, here’s how you can get the original pd.DataFrame object:

import panel as pn
import param
import datetime as dt
import datetime 
import pandas as pd
pn.extension()
DATE_BOUNDS = (datetime.date(2000, 1, 1), datetime.datetime.now().date())

class query_db(param.Parameterized):
    account_id = param.String(default=str('12345'), doc='Account ID')
    start_date = param.Date(default=(datetime.date.today() - datetime.timedelta(days=365)), bounds=DATE_BOUNDS)
    end_date = param.Date(default=datetime.date.today(), bounds=DATE_BOUNDS) 
    button = param.Action(default=lambda x: x.param.trigger('button'), label='Run db query!')  
    
    @param.depends("button")
    def df_return(self): 
        #df=snowflake_run(self.account_id,cs,self.start_date.strftime('%Y-%m-%d'),self.end_date.strftime('%Y-%m-%d')) 
        # create a dummy dataset below for illustration 
        df = pd.DataFrame({
                            'int': [1, 2, 3],
                            'float': [3.14, 6.28, 9.42],
                            'type': ['A', 'B', 'C'],
                            'bool': [True, False, True],
                            'date': [dt.date(2019, 1, 1), dt.date(2020, 1, 1), dt.date(2020, 1, 10)],
                            'datetime': [dt.datetime(2019, 1, 1, 10), dt.datetime(2020, 1, 1, 12), dt.datetime(2020, 1, 10, 13)]
                        }, index=[1, 2, 3])
        return df
        
class Athlete(param.Parameterized): 
    db = param.ClassSelector(class_=query_db, default=query_db())
    
athlete = Athlete()

def new_class(cls, **kwargs):
    "Creates a new class which overrides parameter defaults."
    return type(type(cls).__name__, (cls,), kwargs)

query_condition = pn.Param(
    athlete.db,
    default_layout=new_class(pn.GridBox, ncols=3),
    show_name=False,
    widgets = {
        "button": {"button_type": "primary"},
        "start_date": pn.widgets.DatePicker, 
        "end_date": pn.widgets.DatePicker  } )

def return_tab():
    return pn.panel(athlete.db.df_return, loading_indicator=True)  
 
df1=return_tab()
print(type(df1.object()))
df_A = df1.object()[df1.object().type == "A"]

pn.Column(query_condition,df1,df_A)

ahuang11, thank you for your suggestion. I tested and it works. However, I got stuck now on how to refresh the 2nd dataframe df_A whenever I change the inputbox value and click the run button. I tried to create a callback to param.Action but failed to make it work. Any suggestions? Much appreciated! Refer to code below for your reference.

Hi,
If the main thing you’re after is that the Tabulator stuff gets automatically updated when you press the Button, the key thing missing is the Tabulator don’t get informed automatically about the update.
Reason is because you’re creating the Tabulator widget off a standard variable “DF”, not via pn.widgets.Tabulator.from_param(…), or via pn.Param, …

As you’re already using param.Parameterized classes anyway, you can also let Panel/Param do that automatic linkage on your behalf.

Here a slightly restructured example that hopefully does what you’re looking for:

import panel as pn
import param
import datetime 
import pandas as pd
pn.extension()
pn.extension('tabulator')
DATE_BOUNDS = (datetime.date(2000, 1, 1), datetime.datetime.now().date())

class query_db(param.Parameterized):
    # input widgets
    input_string = param.String(default=str('False'), doc='Account ID')
    start_date = param.Date(default=(datetime.date.today() - datetime.timedelta(days=365)), bounds=DATE_BOUNDS)
    end_date = param.Date(default=datetime.date.today(), bounds=DATE_BOUNDS) 

    # Trigger DB query 
    run_db_query = param.Action(default=lambda self: self.df_return())

    # output data (raw)
    df = param.DataFrame(default=None, precedence=-1)
    
    def df_return(self): 
        print('df_return()')
        #df=snowflake_run(self.account_id,cs,self.start_date.strftime('%Y-%m-%d'),self.end_date.strftime('%Y-%m-%d')) 
        # create a dummy dataset below for illustration 
        self.df = pd.DataFrame({
                            'int': [11, 22, 33], 
                            'type': ['BB','AA', 'CC'],
                            'string': [self.input_string, self.input_string, 'True'],
                            'date': [datetime.date(2019, 1, 1), datetime.date(2020, 1, 1), 
                                     datetime.date(2020, 1, 10)] 
                        }, index=[1, 2, 3])
        

        
class Athlete(param.Parameterized): 
    db = param.ClassSelector(class_=query_db, default=query_db())

    filtered_df = param.Parameter(default=None)

    @param.depends('db.df', watch=True)
    def filter_df(self):
        print('filter_df()')
        if self.db.df is not None:
            self.filtered_df = self.db.df[self.db.df['string'] == 'False']
    
    def view(self):
    
        ui = pn.Column(
            pn.Param(
                athlete.db,
                default_layout=new_class(pn.GridBox, ncols=3),
                show_name=False,
                widgets = {
                    "button": {"button_type": "primary"},
                    "start_date": pn.widgets.DatePicker, 
                    "end_date": pn.widgets.DatePicker  } ), 
            pn.widgets.Tabulator.from_param(self.db.param.df,pagination='local',page_size=6,show_index=False ), 
            pn.widgets.Tabulator.from_param(self.param.filtered_df,pagination='local',page_size=6,show_index=False ),
        )
        return ui
    
athlete = Athlete()

def new_class(cls, **kwargs):
    "Creates a new class which overrides parameter defaults."
    return type(type(cls).__name__, (cls,), kwargs)


athlete.view()