def generate_classification_target(
dataframe: pd.DataFrame,
column_names: str | list[str],
) -> "ClassificationTarget":
from .schemas import (
ClassificationTarget, # Local import to avoid circular dependency
)
if isinstance(column_names, str):
column_names = [column_names]
# create one column labels from multiple columns
label = dataframe[column_names].apply(tuple, axis=1)
# Convert tuples to strings with '__' delimiter
label = label.apply(lambda x: "__".join(x))
# encode to numbers
encoded_np, encoding_np = pd.factorize(label)
encoded = pd.Series(encoded_np, name="encoded")
encoding = pd.Series(encoding_np, name="encoding")
return ClassificationTarget(label=label, value=encoded, encoding=encoding)