Bad code structure leading to poor performance

panelnoob · February 8, 2024, 4:23pm

I created this panel script, but the performance when changing the widgets is very poor. I wonder if the problem is based on my code structure? Unfortunately, it seems I can’t upload an example of my data frame, so I will just paste a snippet of my code here - Thanks for any help!

import pandas as pd
import numpy as np
from datetime import datetime
from datetime import timedelta
import panel as pn
import hvplot.pandas as hvplot

pn.extension()

########################################################################################## Preprocessing ###########################################################################################

# Assign all_issues data
all_issues = pd.read_csv('example.csv')

# Assign datetime object for start_date
all_issues['start_date']=[datetime.strptime(elem.split(" ")[0], '%d/%b/%y') for elem in all_issues['Created']]

# Get date by month
all_issues['start_date_period'] = all_issues['start_date'].dt.to_period('M')


############################################################################################## Label Filter ########################################################################################

# Get all label columns
label_columns = [x for x in list(all_issues.columns) if 'Label' in x]

# Get all label names
labels = []
for col in label_columns:
    labels.extend(all_issues[col].dropna().unique())
labels = sorted(set(labels))

# Label Widgets
pre_labels_selector = pn.widgets.Select(name='Labels', options=['All', 'brain', 'prostate','Select'])
labels_selector = pn.widgets.MultiSelect(name='Select Labels', options=labels, value=labels, height = 200)

# Function to filter labels
def filter_labels(pre_labels_selected, labels_selected):
    # Get the current value of the labels_selector
    current_value = labels_selector.value
    
    if pre_labels_selected == 'All':
        # Set the value directly
        labels_selector.value = labels
        label_filter = all_issues.apply(lambda row: any((label in row.values) or pd.isna(label) for label in labels_selector.value), axis=1)
    elif pre_labels_selected == 'brain':
        # Set the value directly
        labels_selector.value = ['aneurysm-segmentation', 'volumetry-assessment', 'longi-lesion-segmentation', 'lesion-segmentation', 'lesion-classification', 'lesion-assessment', 'longi-aneurysm-segmentation', 'aneurysm-segmentation', 'brain-segmentation', 'tumor-segmentation']
        label_filter = all_issues.apply(lambda row: any(label in row.values for label in labels_selector.value), axis=1)
    elif pre_labels_selected == 'prostate':
        # Set the value directly
        labels_selector.value = ['prostate-segmentation', 'prostate-classification_pirads', 'prostate-classification_other']
        label_filter = all_issues.apply(lambda row: any(label in row.values for label in labels_selector.value), axis=1)
    elif pre_labels_selected == 'Select':
        # Set the value directly
        labels_selector.value = labels_selected
        label_filter = all_issues.apply(lambda row: any(label in row.values for label in current_value), axis=1)

    return label_filter



# ############################################################################################### Plot 6 ############################################################################################

# Get column for each label
for label in labels:
    all_issues[label] = all_issues[label_columns].apply(lambda row: label in row.values, axis=1)

# Function for plot
def chart_6(pre_labels_selected, labels_selected):
    
    label_filter = filter_labels(pre_labels_selected, labels_selected)

    filtered_data_6 = all_issues[label_filter].copy()

    pipeline_6 = filtered_data_6.groupby('start_date_period')[labels_selector.value].sum().reset_index()

    # Plot bar graph using hvplot
    plot_6 = pipeline_6.hvplot.line(x='start_date_period', y=labels_selector.value, xlabel='Months',
                                            ylabel='Tickets',
                                            rot=45, height=400, width=600, legend=False, shared_axes=False)
    return plot_6

# Binding Widgets to plot
bound_plot_6 = pn.bind(
    chart_6,
    pre_labels_selected=pre_labels_selector,
    labels_selected=labels_selector
)

# ############################################################################################### Plot 7 ############################################################################################

# Get time spent column in hours
all_issues["time_spent"] = all_issues["Time Spent"] / 60
all_issues = all_issues.dropna(subset=["time_spent"])

# Get new coulmn with time spent
for label in labels:
    all_issues[label + '_time_spent'] = all_issues[label].astype(float) * all_issues['time_spent']

# Function for plot
def chart_7(pre_labels_selected, labels_selected):
    
    label_filter = filter_labels(pre_labels_selected, labels_selected)

    filtered_data_7 = all_issues[label_filter].copy()

    selected_labels_7 = [label + '_time_spent' for label in labels_selected]
    
    pipeline_7 = filtered_data_7.groupby('start_date_period')[selected_labels_7].mean().reset_index()

    plot_7 = pipeline_7.hvplot.line(x='start_date_period', y=selected_labels_7, xlabel='Months',
                                            ylabel='Minutes',
                                            rot=45, height=400, width=600, legend=False, shared_axes=False)
    return plot_7

# Binding Widgets to plot
bound_plot_7 = pn.bind(
    chart_7,
    pre_labels_selected=pre_labels_selector,
    labels_selected=labels_selector
)

# ############################################################################################### Plot 8 ############################################################################################

# Get time spent column in hours
all_issues["time_spent_cum"] = all_issues["Time Spent"] / 60
all_issues = all_issues.dropna(subset=["time_spent_cum"])

# Get new coulmn with time spent
for label in labels:
    all_issues[label + '_time_spent_cum'] = all_issues[label].astype(float) * all_issues['time_spent_cum']

# Function for plot
def chart_8(pre_labels_selected, labels_selected):
    
    label_filter = filter_labels(pre_labels_selected, labels_selected)

    filtered_data_8 = all_issues[label_filter].copy()

    selected_labels_8 = [label + '_time_spent_cum' for label in labels_selected]
    
    pipeline_8 = filtered_data_8.groupby('start_date_period')[selected_labels_8].sum().cumsum().reset_index()

    plot_8 = pipeline_8.hvplot.line(x='start_date_period', y=selected_labels_8, xlabel='Months',
                                            ylabel='Minutes',
                                            rot=45, height=400, width= 600, legend=False, shared_axes=False
    )
    return plot_8

# Binding Widgets to plot
bound_plot_8 = pn.bind(
    chart_8,
    pre_labels_selected=pre_labels_selector,
    labels_selected=labels_selector
)




############################################################################################### Panel Template #####################################################################################

# Dashboard Template

page_1 = pn.Column(
    pn.Row(pn.Card(bound_plot_6, title='Labels')),
    pn.Row(pn.Card(bound_plot_7, title='Time spent per Label (average)'), pn.Card(bound_plot_8, title='Time spent per Label (cumulative)')),
    
)

tabs = pn.Tabs(
    ('Overview', page_1),
)

template = pn.template.BootstrapTemplate( 
    title = 'Support Dashboard',
    sidebar=[pre_labels_selector, labels_selector],
    main=[tabs],
)

# Make dashboard servable
template.servable();

Marc · February 8, 2024, 5:09pm

Hi @panelnoob

Could you try posting the csv file in a “private” message to me here on Discourse? Then I will upload it. It can be hard to solve performance problem without being able to reproduce it.

panelnoob · February 13, 2024, 8:51am

Thanks I did so on Discord! @Marc