Variance filter calling stats::var()
.
Argument na.rm
defaults to TRUE
here.
References
For a benchmark of filter methods:
Bommert A, Sun X, Bischl B, Rahnenführer J, Lang M (2020). “Benchmark for filter methods for feature selection in high-dimensional classification data.” Computational Statistics & Data Analysis, 143, 106839. doi:10.1016/j.csda.2019.106839 .
See also
PipeOpFilter for filter-based feature selection.
Other Filter:
Filter
,
mlr_filters
,
mlr_filters_anova
,
mlr_filters_auc
,
mlr_filters_boruta
,
mlr_filters_carscore
,
mlr_filters_carsurvscore
,
mlr_filters_cmim
,
mlr_filters_correlation
,
mlr_filters_disr
,
mlr_filters_find_correlation
,
mlr_filters_importance
,
mlr_filters_information_gain
,
mlr_filters_jmi
,
mlr_filters_jmim
,
mlr_filters_kruskal_test
,
mlr_filters_mim
,
mlr_filters_mrmr
,
mlr_filters_njmim
,
mlr_filters_performance
,
mlr_filters_permutation
,
mlr_filters_relief
,
mlr_filters_selected_features
,
mlr_filters_univariate_cox
Super class
mlr3filters::Filter
-> FilterVariance
Examples
task = mlr3::tsk("mtcars")
filter = flt("variance")
filter$calculate(task)
head(filter$scores, 3)
#> disp hp qsec
#> 15360.799829 4700.866935 3.193166
as.data.table(filter)
#> feature score
#> <char> <num>
#> 1: disp 1.536080e+04
#> 2: hp 4.700867e+03
#> 3: qsec 3.193166e+00
#> 4: cyl 3.189516e+00
#> 5: carb 2.608871e+00
#> 6: wt 9.573790e-01
#> 7: gear 5.443548e-01
#> 8: drat 2.858814e-01
#> 9: vs 2.540323e-01
#> 10: am 2.489919e-01
if (mlr3misc::require_namespaces(c("mlr3pipelines", "rpart"), quietly = TRUE)) {
library("mlr3pipelines")
task = mlr3::tsk("spam")
# Note: `filter.frac` is selected randomly and should be tuned.
graph = po("filter", filter = flt("variance"), filter.frac = 0.5) %>>%
po("learner", mlr3::lrn("classif.rpart"))
graph$train(task)
}
#> $classif.rpart.output
#> NULL
#>