Skip to content

Schemas

siapy.datasets.schemas

Target

Bases: BaseModel, ABC

model_config class-attribute instance-attribute

model_config = ConfigDict(arbitrary_types_allowed=True)

value instance-attribute

value: Series

from_dict abstractmethod classmethod

from_dict(data: dict[str, Any]) -> Target
Source code in siapy/datasets/schemas.py
30
31
32
@classmethod
@abstractmethod
def from_dict(cls, data: dict[str, Any]) -> "Target": ...

from_iterable abstractmethod classmethod

from_iterable(data: Iterable[Any]) -> Target
Source code in siapy/datasets/schemas.py
34
35
36
@classmethod
@abstractmethod
def from_iterable(cls, data: Iterable[Any]) -> "Target": ...

to_dict abstractmethod

to_dict() -> dict[str, Any]
Source code in siapy/datasets/schemas.py
38
39
@abstractmethod
def to_dict(self) -> dict[str, Any]: ...

to_dataframe abstractmethod

to_dataframe() -> DataFrame
Source code in siapy/datasets/schemas.py
41
42
@abstractmethod
def to_dataframe(self) -> pd.DataFrame: ...

reset_index abstractmethod

reset_index() -> Target
Source code in siapy/datasets/schemas.py
44
45
@abstractmethod
def reset_index(self) -> "Target": ...

ClassificationTarget

Bases: Target

label instance-attribute

label: Series

value instance-attribute

value: Series

encoding instance-attribute

encoding: Series

model_config class-attribute instance-attribute

model_config = ConfigDict(arbitrary_types_allowed=True)

from_iterable classmethod

from_iterable(data: Iterable[Any]) -> ClassificationTarget
Source code in siapy/datasets/schemas.py
61
62
63
64
@classmethod
def from_iterable(cls, data: Iterable[Any]) -> "ClassificationTarget":
    label = pd.DataFrame(data, columns=["label"])
    return generate_classification_target(label, "label")

from_dict classmethod

from_dict(data: dict[str, Any]) -> ClassificationTarget
Source code in siapy/datasets/schemas.py
66
67
68
69
70
71
@classmethod
def from_dict(cls, data: dict[str, Any]) -> "ClassificationTarget":
    label = pd.Series(data["label"], name="label")
    value = pd.Series(data["value"], name="value")
    encoding = pd.Series(data["encoding"], name="encoding")
    return cls(label=label, value=value, encoding=encoding)

to_dict

to_dict() -> dict[str, Any]
Source code in siapy/datasets/schemas.py
73
74
75
76
77
78
def to_dict(self) -> dict[str, Any]:
    return {
        "label": self.label.to_list(),
        "value": self.value.to_list(),
        "encoding": self.encoding.to_list(),
    }

to_dataframe

to_dataframe() -> DataFrame
Source code in siapy/datasets/schemas.py
80
81
def to_dataframe(self) -> pd.DataFrame:
    return pd.concat([self.value, self.label], axis=1)

reset_index

reset_index() -> ClassificationTarget
Source code in siapy/datasets/schemas.py
83
84
85
86
87
88
def reset_index(self) -> "ClassificationTarget":
    return ClassificationTarget(
        label=self.label.reset_index(drop=True),
        value=self.value.reset_index(drop=True),
        encoding=self.encoding,
    )

RegressionTarget

Bases: Target

value instance-attribute

value: Series

name class-attribute instance-attribute

name: str = 'value'

model_config class-attribute instance-attribute

model_config = ConfigDict(arbitrary_types_allowed=True)

from_iterable classmethod

from_iterable(data: Iterable[Any]) -> RegressionTarget
Source code in siapy/datasets/schemas.py
102
103
104
105
@classmethod
def from_iterable(cls, data: Iterable[Any]) -> "RegressionTarget":
    value = pd.DataFrame(data, columns=["value"])
    return generate_regression_target(value, "value")

from_dict classmethod

from_dict(data: dict[str, Any]) -> RegressionTarget
Source code in siapy/datasets/schemas.py
107
108
109
110
111
@classmethod
def from_dict(cls, data: dict[str, Any]) -> "RegressionTarget":
    value = pd.Series(data["value"], name="value")
    name = data["name"] if "name" in data else "value"
    return cls(value=value, name=name)

to_dict

to_dict() -> dict[str, Any]
Source code in siapy/datasets/schemas.py
113
114
115
116
117
def to_dict(self) -> dict[str, Any]:
    return {
        "value": self.value.to_list(),
        "name": self.name,
    }

to_dataframe

to_dataframe() -> DataFrame
Source code in siapy/datasets/schemas.py
119
120
def to_dataframe(self) -> pd.DataFrame:
    return pd.DataFrame(self.value)

reset_index

reset_index() -> RegressionTarget
Source code in siapy/datasets/schemas.py
122
123
def reset_index(self) -> "RegressionTarget":
    return RegressionTarget(value=self.value.reset_index(drop=True), name=self.name)

TabularDatasetData dataclass

TabularDatasetData(
    signatures: Signatures,
    metadata: DataFrame,
    target: Target | None = None,
)

signatures instance-attribute

signatures: Signatures

metadata instance-attribute

metadata: DataFrame

target class-attribute instance-attribute

target: Target | None = None

from_dict classmethod

from_dict(data: dict[str, Any]) -> TabularDatasetData
Source code in siapy/datasets/schemas.py
149
150
151
152
153
154
@classmethod
def from_dict(cls, data: dict[str, Any]) -> "TabularDatasetData":
    signatures = Signatures.from_dict({"pixels": data["pixels"], "signals": data["signals"]})
    metadata = pd.DataFrame(data["metadata"])
    target = TabularDatasetData.target_from_dict(data.get("target", None))
    return cls(signatures=signatures, metadata=metadata, target=target)

target_from_dict staticmethod

target_from_dict(
    data: dict[str, Any] | None = None,
) -> Optional[Target]
Source code in siapy/datasets/schemas.py
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
@staticmethod
def target_from_dict(data: dict[str, Any] | None = None) -> Optional[Target]:
    if data is None:
        return None

    regression_keys = set(RegressionTarget.model_fields.keys())
    classification_keys = set(ClassificationTarget.model_fields.keys())
    data_keys = set(data.keys())

    if data_keys.issubset(regression_keys):
        return RegressionTarget.from_dict(data)
    elif data_keys.issubset(classification_keys):
        return ClassificationTarget.from_dict(data)
    else:
        raise InvalidInputError(data, "Invalid target dict.")

set_attributes

set_attributes(
    *,
    signatures: Signatures | None = None,
    metadata: DataFrame | None = None,
    target: Target | None = None,
) -> TabularDatasetData
Source code in siapy/datasets/schemas.py
190
191
192
193
194
195
196
197
198
199
200
201
def set_attributes(
    self,
    *,
    signatures: Signatures | None = None,
    metadata: pd.DataFrame | None = None,
    target: Target | None = None,
) -> "TabularDatasetData":
    current_data = self.copy()
    signatures = signatures if signatures is not None else current_data.signatures
    metadata = metadata if metadata is not None else current_data.metadata
    target = target if target is not None else current_data.target
    return TabularDatasetData(signatures=signatures, metadata=metadata, target=target)

to_dict

to_dict() -> dict[str, Any]
Source code in siapy/datasets/schemas.py
203
204
205
206
207
208
209
210
def to_dict(self) -> dict[str, Any]:
    signatures_dict = self.signatures.to_dict()
    return {
        "pixels": signatures_dict["pixels"],
        "signals": signatures_dict["signals"],
        "metadata": self.metadata.to_dict(),
        "target": self.target.to_dict() if self.target is not None else None,
    }

to_dataframe

to_dataframe() -> DataFrame
Source code in siapy/datasets/schemas.py
212
213
214
215
216
217
def to_dataframe(self) -> pd.DataFrame:
    combined_df = pd.concat([self.signatures.to_dataframe(), self.metadata], axis=1)
    if self.target is not None:
        target_series = self.target.to_dataframe()
        combined_df = pd.concat([combined_df, target_series], axis=1)
    return combined_df

to_dataframe_multiindex

to_dataframe_multiindex() -> DataFrame
Source code in siapy/datasets/schemas.py
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
def to_dataframe_multiindex(self) -> pd.DataFrame:
    signatures_df = self.signatures.to_dataframe_multiindex()

    metadata_columns = pd.MultiIndex.from_tuples(
        [("metadata", col) for col in self.metadata.columns], names=["category", "field"]
    )
    metadata_df = pd.DataFrame(self.metadata.values, columns=metadata_columns)

    combined_df = pd.concat([signatures_df, metadata_df], axis=1)

    if self.target is not None:
        target_df = self.target.to_dataframe()
        if isinstance(self.target, ClassificationTarget):
            target_columns = pd.MultiIndex.from_tuples(
                [("target", col) for col in target_df.columns],
                names=["category", "field"],
            )
        elif isinstance(self.target, RegressionTarget):
            target_columns = pd.MultiIndex.from_tuples(
                [("target", self.target.name)],
                names=["category", "field"],
            )
        else:
            raise InvalidInputError(
                self.target,
                "Invalid target type. Expected ClassificationTarget or RegressionTarget.",
            )
        target_df = pd.DataFrame(target_df.values, columns=target_columns)
        combined_df = pd.concat([combined_df, target_df], axis=1)

    return combined_df

reset_index

reset_index() -> TabularDatasetData
Source code in siapy/datasets/schemas.py
251
252
253
254
255
256
def reset_index(self) -> "TabularDatasetData":
    return TabularDatasetData(
        signatures=self.signatures.reset_index(),
        metadata=self.metadata.reset_index(drop=True),
        target=self.target.reset_index() if self.target is not None else None,
    )

copy

Source code in siapy/datasets/schemas.py
258
259
260
261
262
263
def copy(self) -> "TabularDatasetData":
    return TabularDatasetData(
        signatures=self.signatures.copy(),
        metadata=self.metadata.copy(),
        target=self.target.model_copy() if self.target is not None else None,
    )