Skip to content

Helpers

siapy.datasets.helpers

generate_classification_target

generate_classification_target(
    dataframe: DataFrame, column_names: str | list[str]
) -> ClassificationTarget

Generate a classification target from DataFrame columns.

Creates a classification target by combining one or more DataFrame columns into encoded labels suitable for machine learning classification tasks. Multiple columns are combined using a '__' delimiter and then factorized into numeric values.

PARAMETER DESCRIPTION
dataframe

The input DataFrame containing the target data.

TYPE: DataFrame

column_names

Name(s) of the column(s) to use for generating the classification target. Can be a single column name as string or multiple column names as list.

TYPE: str | list[str]

RETURNS DESCRIPTION
ClassificationTarget

A ClassificationTarget object containing the original labels, encoded numeric values, and the encoding mapping.

Example
import pandas as pd
from siapy.datasets.helpers import generate_classification_target

df = pd.DataFrame({
    'category': ['A', 'B', 'A', 'C'],
    'subcategory': ['X', 'Y', 'X', 'Z']
})

# Single column
target = generate_classification_target(df, 'category')

# Multiple columns
target = generate_classification_target(df, ['category', 'subcategory'])
Source code in siapy/datasets/helpers.py
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
def generate_classification_target(
    dataframe: pd.DataFrame,
    column_names: str | list[str],
) -> "ClassificationTarget":
    """Generate a classification target from DataFrame columns.

    Creates a classification target by combining one or more DataFrame columns into
    encoded labels suitable for machine learning classification tasks. Multiple columns
    are combined using a '__' delimiter and then factorized into numeric values.

    Args:
        dataframe: The input DataFrame containing the target data.
        column_names: Name(s) of the column(s) to use for generating the classification target.
            Can be a single column name as string or multiple column names as list.

    Returns:
        A ClassificationTarget object containing the original labels, encoded numeric values, and the encoding mapping.

    Example:
        ```python
        import pandas as pd
        from siapy.datasets.helpers import generate_classification_target

        df = pd.DataFrame({
            'category': ['A', 'B', 'A', 'C'],
            'subcategory': ['X', 'Y', 'X', 'Z']
        })

        # Single column
        target = generate_classification_target(df, 'category')

        # Multiple columns
        target = generate_classification_target(df, ['category', 'subcategory'])
        ```
    """
    from .schemas import (
        ClassificationTarget,  # Local import to avoid circular dependency
    )

    if isinstance(column_names, str):
        column_names = [column_names]
    # create one column labels from multiple columns
    label = dataframe[column_names].apply(tuple, axis=1)
    # Convert tuples to strings with '__' delimiter
    label = label.apply(lambda x: "__".join(x))
    # encode to numbers
    encoded_np, encoding_np = pd.factorize(label)
    encoded = pd.Series(encoded_np, name="encoded")
    encoding = pd.Series(encoding_np, name="encoding")
    return ClassificationTarget(label=label, value=encoded, encoding=encoding)

generate_regression_target

generate_regression_target(
    dataframe: DataFrame, column_name: str
) -> RegressionTarget

Generate a regression target from a DataFrame column.

Creates a regression target from a single DataFrame column for use in machine learning regression tasks.

PARAMETER DESCRIPTION
dataframe

The input DataFrame containing the target data.

TYPE: DataFrame

column_name

Name of the column to use for generating the regression target.

TYPE: str

RETURNS DESCRIPTION
RegressionTarget

A RegressionTarget object containing the column name and values.

Example
import pandas as pd
from siapy.datasets.helpers import generate_regression_target

df = pd.DataFrame({
    'temperature': [20.1, 25.3, 18.7, 22.9],
    'humidity': [45.2, 60.8, 38.1, 52.3]
})

target = generate_regression_target(df, 'temperature')
Source code in siapy/datasets/helpers.py
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
def generate_regression_target(
    dataframe: pd.DataFrame,
    column_name: str,
) -> "RegressionTarget":
    """Generate a regression target from a DataFrame column.

    Creates a regression target from a single DataFrame column for use in
    machine learning regression tasks.

    Args:
        dataframe: The input DataFrame containing the target data.
        column_name: Name of the column to use for generating the regression target.

    Returns:
        A RegressionTarget object containing the column name and values.

    Example:
        ```python
        import pandas as pd
        from siapy.datasets.helpers import generate_regression_target

        df = pd.DataFrame({
            'temperature': [20.1, 25.3, 18.7, 22.9],
            'humidity': [45.2, 60.8, 38.1, 52.3]
        })

        target = generate_regression_target(df, 'temperature')
        ```
    """
    from .schemas import (
        RegressionTarget,
    )  # Local import to avoid circular dependency

    return RegressionTarget(name=column_name, value=dataframe[column_name])

merge_signals_from_multiple_cameras

merge_signals_from_multiple_cameras(
    data: TabularDatasetData,
) -> None

Merge signals from multiple cameras into a unified dataset.

This function combines spectral or imaging data collected from multiple camera sources into a single coherent dataset structure. The implementation details depend on the specific camera configuration and data format requirements.

PARAMETER DESCRIPTION
data

The tabular dataset data containing signals from multiple cameras that need to be merged.

TYPE: TabularDatasetData

RETURNS DESCRIPTION
None

The function modifies the input data in-place.

TYPE: None

Note

This function is currently not implemented and serves as a placeholder for future development of multi-camera signal merging capabilities.

Todo

Implement the actual merging logic based on camera specifications and data alignment requirements.

Source code in siapy/datasets/helpers.py
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
def merge_signals_from_multiple_cameras(data: "TabularDatasetData") -> None:
    """Merge signals from multiple cameras into a unified dataset.

    This function combines spectral or imaging data collected from multiple camera
    sources into a single coherent dataset structure. The implementation details
    depend on the specific camera configuration and data format requirements.

    Args:
        data: The tabular dataset data containing signals from multiple cameras
            that need to be merged.

    Returns:
        None: The function modifies the input data in-place.

    Note:
        This function is currently not implemented and serves as a placeholder
        for future development of multi-camera signal merging capabilities.

    Todo:
        Implement the actual merging logic based on camera specifications
        and data alignment requirements.
    """
    pass