def generate_dataset_data(self, mean_signatures=True) -> TabularDatasetData:
self._check_data_entities()
pixels_dfs = []
signals_dfs = []
metadata_dfs = []
for entity in self.data_entities:
signatures_df = entity.signatures.to_dataframe().dropna()
if mean_signatures:
signatures_df = signatures_df.mean().to_frame().T
signatures_len = len(signatures_df)
metadata_df = pd.DataFrame(
{
"image_idx": [str(entity.image_idx)] * signatures_len,
"image_filepath": [str(entity.image_filepath)] * signatures_len,
"camera_id": [entity.camera_id] * signatures_len,
"shape_idx": [str(entity.shape_idx)] * signatures_len,
"shape_type": [entity.shape_type] * signatures_len,
"shape_label": [entity.shape_label] * signatures_len,
}
)
assert (
list(metadata_df.columns) == list(MetaDataEntity.model_fields.keys())
), "Sanity check failed! The columns in metadata_df do not match MetaDataEntity fields."
signatures = Signatures.from_dataframe(signatures_df)
pixels_dfs.append(signatures.pixels.df)
signals_dfs.append(signatures.signals.df)
metadata_dfs.append(metadata_df)
return TabularDatasetData(
pixels=pd.concat(pixels_dfs, ignore_index=True),
signals=pd.concat(signals_dfs, ignore_index=True),
metadata=pd.concat(metadata_dfs, ignore_index=True),
)