Is there any way to fix the attribute of a param? As an example, lets say I create a param.ClassSelector called x which holds an empty pd.Series with dtype = int. Now I know that x will always be a pd.Series (otherwise param raises an error), but I want to be sure that the dtype of x is always an integer:
import pandas as pd
import param
class A(param.Parameterized):
x = param.ClassSelector(default = pd.Series(dtype = int), class_ = pd.Series)
def __init__(self, **params):
super().__init__(**params)
def change_dtype(self):
print(self.x.dtype)
self.x = pd.Series([1.2])
print(self.x.dtype)
a = A()
a.change_dtype()
>>> int64
>>> float64
So, how to make sure the dtype attribute of x cannot change?
Thanks @ahuang11 ! With your example and the explanation in the docs over here I managed to make something for param.DataFrame.
With this class I can now give the columns a dictionary specifying the column names and the expected dtypes or pass a dictionary to dtypes directly:
class SuperDataFrame(param.DataFrame):
__slots__ = ['dtypes']
_slot_defaults = param._utils._dict_update(
param.DataFrame._slot_defaults, dtypes = None
)
def __init__(self, **params):
self.dtypes = params.pop("dtypes", {})
if isinstance(params.get("columns", None), dict):
self.dtypes = params["columns"]
params["columns"] = set(params["columns"].keys())
super().__init__(**params)
def _validate(self, val):
super()._validate(val)
invalid_dtypes = [(k, val.dtypes[k], v) for k, v in self.dtypes.items() if not val.dtypes[k] == v]
if invalid_dtypes:
errors = [f"column `{x[0]}` should be of type `{getattr(x[2], "__name__")}` but is `{x[1]}`." for x in invalid_dtypes]
raise ValueError(
f'{param._utils._validate_error_prefix(self)}: {" and, ".join(errors)} '
)
class A(param.Parameterized):
df = SuperDataFrame(
default = pd.DataFrame({"int_data": [1,2,3], "float_data": [1.2, 3.4, 5.6]}),
columns = {"int_data": int, "float_data": float},
)
def __init__(self, **params):
super().__init__(**params)
def invalid_data_1(self):
self.df = pd.DataFrame({"int_data": [1.1,2.1,3.1], "float_data": [1.2, 3.4, 5.6]})
def invalid_data_2(self):
self.df["int_data"] = pd.Series(["1","2","3"], dtype = str)
# self.param.trigger("df")
a = A()
a.invalid_data_2()
a.invalid_data_1()
>>> ValueError: SuperDataFrame parameter 'A.df': column `int_data` should be of type `int` but is `float64`.
I did notice however that the _validate method only gets called when I change the df entirely and NOT when I change only 1 column (as is done in .invalid_data_2()).
Is there any simple way to also trigger the check when a single columns gets changed, besides adding self.param.trigger("df") everywhere?