Skip to content

Helpers

siapy.features.helpers

FeatureSelectorConfig

Bases: BaseModel

k_features class-attribute instance-attribute

k_features: int | str | tuple[int, ...] = (1, 20)

cv class-attribute instance-attribute

cv: int = 3

forward class-attribute instance-attribute

forward: bool = True

floating class-attribute instance-attribute

floating: bool = True

verbose class-attribute instance-attribute

verbose: int = 2

n_jobs class-attribute instance-attribute

n_jobs: int = 1

pre_dispatch class-attribute instance-attribute

pre_dispatch: int | str = '2*n_jobs'

model_config class-attribute instance-attribute

model_config = ConfigDict(arbitrary_types_allowed=True, validate_assignment=True)

feature_selector_factory

feature_selector_factory(problem_type: Literal['regression', 'classification'], *, k_features: int | str | tuple[int, ...] = (1, 20), cv: int = 3, forward: bool = True, floating: bool = True, verbose: int = 2, n_jobs: int = 1, pre_dispatch: int | str = '2*n_jobs', config: FeatureSelectorConfig | None = None) -> Pipeline
PARAMETER DESCRIPTION
problem_type

TYPE: Literal['regression', 'classification']

k_features

TYPE: int | str | tuple[int, ...] DEFAULT: (1, 20)

cv

TYPE: int DEFAULT: 3

forward

TYPE: bool DEFAULT: True

floating

TYPE: bool DEFAULT: True

verbose

TYPE: int DEFAULT: 2

n_jobs

TYPE: int DEFAULT: 1

pre_dispatch

TYPE: int | str DEFAULT: '2*n_jobs'

config

TYPE: FeatureSelectorConfig | None DEFAULT: None

Source code in siapy/features/helpers.py
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
def feature_selector_factory(
    problem_type: Literal["regression", "classification"],
    *,
    k_features: Annotated[
        int | str | tuple[int, ...],
        "can be: 'best' - most extensive, (1, n) - check range of features, n - exact number of features",
    ] = (1, 20),
    cv: int = 3,
    forward: Annotated[bool, "selection in forward direction"] = True,
    floating: Annotated[
        bool, "floating algorithm - can go back and remove features once added"
    ] = True,
    verbose: int = 2,
    n_jobs: int = 1,
    pre_dispatch: int | str = "2*n_jobs",
    config: Annotated[
        FeatureSelectorConfig | None,
        "If provided, other arguments are overwritten by config values",
    ] = None,
) -> Pipeline:
    if config:
        k_features = config.k_features
        cv = config.cv
        forward = config.forward
        floating = config.floating
        verbose = config.verbose
        n_jobs = config.n_jobs
        pre_dispatch = config.pre_dispatch

    if problem_type == "regression":
        algo = Ridge()
        scoring = "neg_mean_squared_error"
    elif problem_type == "classification":
        algo = RidgeClassifier()
        scoring = "f1_weighted"
    else:
        raise InvalidInputError(
            problem_type,
            "Invalid problem type, possible values are: 'regression' or 'classification'",
        )
    sfs = SequentialFeatureSelector(
        estimator=algo,
        k_features=k_features,  # type: ignore # noqa
        forward=forward,
        floating=floating,
        verbose=verbose,
        scoring=scoring,
        cv=cv,
        n_jobs=n_jobs,
        pre_dispatch=pre_dispatch,  # type: ignore
    )
    return make_pipeline(RobustScaler(), sfs, memory=None)