Skip to content

Tabular

siapy.datasets.tabular

MetaDataEntity

Bases: BaseModel

image_idx instance-attribute

image_idx: int

image_filepath instance-attribute

image_filepath: Path

camera_id instance-attribute

camera_id: str

shape_idx instance-attribute

shape_idx: int

shape_type instance-attribute

shape_type: str

shape_label instance-attribute

shape_label: str | None

geometry_idx instance-attribute

geometry_idx: int

TabularDataEntity

Bases: MetaDataEntity

model_config class-attribute instance-attribute

model_config = ConfigDict(arbitrary_types_allowed=True)

signatures instance-attribute

signatures: Signatures

image_idx instance-attribute

image_idx: int

image_filepath instance-attribute

image_filepath: Path

camera_id instance-attribute

camera_id: str

shape_idx instance-attribute

shape_idx: int

shape_type instance-attribute

shape_type: str

shape_label instance-attribute

shape_label: str | None

geometry_idx instance-attribute

geometry_idx: int

TabularDataset dataclass

TabularDataset(container: ImageContainerType)
Source code in siapy/datasets/tabular.py
36
37
38
def __init__(self, container: ImageContainerType):
    self._image_set = SpectralImageSet([container]) if isinstance(container, SpectralImage) else container
    self._data_entities: list[TabularDataEntity] = []

image_set property

image_set: SpectralImageSet

data_entities property

data_entities: list[TabularDataEntity]

process_image_data

process_image_data() -> None
Source code in siapy/datasets/tabular.py
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
def process_image_data(self) -> None:
    self.data_entities.clear()
    for image_idx, image in enumerate(self.image_set):
        for shape_idx, shape in enumerate(image.geometric_shapes.shapes):
            signatures_hull = get_signatures_within_convex_hull(image, shape)
            for geometry_idx, signatures in enumerate(signatures_hull):
                entity = TabularDataEntity(
                    image_idx=image_idx,
                    shape_idx=shape_idx,
                    geometry_idx=geometry_idx,
                    image_filepath=image.filepath,
                    camera_id=image.camera_id,
                    shape_type=shape.shape_type,
                    shape_label=shape.label,
                    signatures=signatures,
                )
                self.data_entities.append(entity)

generate_dataset_data

generate_dataset_data(
    mean_signatures: bool = True,
) -> TabularDatasetData
Source code in siapy/datasets/tabular.py
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
def generate_dataset_data(self, mean_signatures: bool = True) -> TabularDatasetData:
    self._check_data_entities()
    signatures_dfs = []
    metadata_dfs = []
    for entity in self.data_entities:
        signatures_df = entity.signatures.to_dataframe().dropna()
        if mean_signatures:
            signatures_df = signatures_df.mean().to_frame().T

        signatures_len = len(signatures_df)
        metadata_df = pd.DataFrame(
            {
                "image_idx": [str(entity.image_idx)] * signatures_len,
                "image_filepath": [str(entity.image_filepath)] * signatures_len,
                "camera_id": [entity.camera_id] * signatures_len,
                "shape_idx": [str(entity.shape_idx)] * signatures_len,
                "shape_type": [entity.shape_type] * signatures_len,
                "shape_label": [entity.shape_label] * signatures_len,
                "geometry_idx": [str(entity.geometry_idx)] * signatures_len,
            }
        )

        assert list(metadata_df.columns) == list(MetaDataEntity.model_fields.keys()), (
            "Sanity check failed! The columns in metadata_df do not match MetaDataEntity fields."
        )

        signatures_dfs.append(signatures_df)
        metadata_dfs.append(metadata_df)

    signatures_concat = pd.concat(signatures_dfs, ignore_index=True)
    metadata_concat = pd.concat(metadata_dfs, ignore_index=True)
    signatures = Signatures.from_dataframe(signatures_concat)
    return TabularDatasetData(signatures=signatures, metadata=metadata_concat)