Skip to content

Commit 7b3b12c

Browse files
authored
Merge pull request #1128 from JuliaAI/dev
For a 0.20.6 release
2 parents a0d7a08 + 33d8852 commit 7b3b12c

10 files changed

+163
-74
lines changed

Project.toml

+42-3
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,14 @@
11
name = "MLJ"
22
uuid = "add582a8-e3ab-11e8-2d5e-e98b27df1bc7"
33
authors = ["Anthony D. Blaom <[email protected]>"]
4-
version = "0.20.5"
4+
version = "0.20.6"
55

66
[deps]
77
CategoricalArrays = "324d7699-5711-5eae-9e2f-1d82baa6b597"
88
ComputationalResources = "ed09eef8-17a6-5b46-8889-db040fac31e3"
99
Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b"
1010
Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
11+
FeatureSelection = "33837fe5-dbff-4c9e-8c2f-c5612fe2b8b6"
1112
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
1213
MLJBalancing = "45f359ea-796d-4f51-95a5-deb1a414c586"
1314
MLJBase = "a7f614a8-145f-11e9-1d2a-a57a1082229d"
@@ -31,12 +32,13 @@ Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c"
3132
CategoricalArrays = "0.8,0.9, 0.10"
3233
ComputationalResources = "0.3"
3334
Distributions = "0.21,0.22,0.23, 0.24, 0.25"
35+
FeatureSelection = "0.1.1"
3436
MLJBalancing = "0.1"
3537
MLJBase = "1"
3638
MLJEnsembles = "0.4"
3739
MLJFlow = "0.5"
3840
MLJIteration = "0.6"
39-
MLJModels = "0.16"
41+
MLJModels = "0.17"
4042
MLJTestIntegration = "0.5.0"
4143
MLJTuning = "0.8"
4244
OpenML = "0.2,0.3"
@@ -89,4 +91,41 @@ SymbolicRegression = "8254be44-1295-4e6a-a16d-46603ac705cb"
8991
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
9092

9193
[targets]
92-
test = ["BetaML", "CatBoost", "EvoLinear", "EvoTrees", "Imbalance", "InteractiveUtils", "LightGBM", "MLJClusteringInterface", "MLJDecisionTreeInterface", "MLJFlux", "MLJGLMInterface", "MLJLIBSVMInterface", "MLJLinearModels", "MLJMultivariateStatsInterface", "MLJNaiveBayesInterface", "MLJScikitLearnInterface", "MLJTSVDInterface", "MLJTestInterface", "MLJTestIntegration", "MLJText", "MLJXGBoostInterface", "Markdown", "NearestNeighborModels", "OneRule", "OutlierDetectionNeighbors", "OutlierDetectionPython", "ParallelKMeans", "PartialLeastSquaresRegressor", "PartitionedLS", "SelfOrganizingMaps", "SIRUS", "SymbolicRegression", "StableRNGs", "Suppressor","Test"]
94+
test = [
95+
"BetaML",
96+
"CatBoost",
97+
"EvoLinear",
98+
"EvoTrees",
99+
"Imbalance",
100+
"InteractiveUtils",
101+
"LightGBM",
102+
"MLJClusteringInterface",
103+
"MLJDecisionTreeInterface",
104+
"MLJFlux",
105+
"MLJGLMInterface",
106+
"MLJLIBSVMInterface",
107+
"MLJLinearModels",
108+
"MLJMultivariateStatsInterface",
109+
"MLJNaiveBayesInterface",
110+
"MLJScikitLearnInterface",
111+
"MLJTSVDInterface",
112+
"MLJTestInterface",
113+
"MLJTestIntegration",
114+
"MLJText",
115+
"MLJXGBoostInterface",
116+
"Markdown",
117+
"NearestNeighborModels",
118+
"OneRule",
119+
"OutlierDetectionNeighbors",
120+
"OutlierDetectionPython",
121+
"ParallelKMeans",
122+
"PartialLeastSquaresRegressor",
123+
"PartitionedLS",
124+
"SelfOrganizingMaps",
125+
"SIRUS",
126+
"SymbolicRegression",
127+
"StableRNGs",
128+
"Suppressor",
129+
"Test",
130+
]
131+

docs/ModelDescriptors.toml

+23-13
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,9 @@ AutoEncoder_BetaML = ["dimension_reduction"]
1111
BM25Transformer_MLJText = ["encoders", "text_analysis"]
1212
BaggingClassifier_MLJScikitLearnInterface = ["classification", "ensemble_models"]
1313
BaggingRegressor_MLJScikitLearnInterface = ["regression", "ensemble_models"]
14-
BalancedBaggingClassifier_MLJBalancing = ["class_imbalance", "classification"]
14+
BalancedBaggingClassifier_MLJBalancing = ["class_imbalance", "classification", "meta_algorithms"]
15+
BinaryThresholdPredictor_MLJModels = ["meta_algorithms", "classification"]
16+
BalancedModel_MLJBalancing = ["class_imbalance", "meta_algorithms"]
1517
BayesianLDA_MultivariateStats = ["dimension_reduction", "classification", "Bayesian_models"]
1618
BayesianLDA_MLJScikitLearnInterface = ["dimension_reduction", "classification", "Bayesian_models"]
1719
BayesianQDA_MLJScikitLearnInterface = ["dimension_reduction", "classification", "Bayesian_models"]
@@ -52,6 +54,7 @@ ElasticNetCVRegressor_MLJScikitLearnInterface = ["regression"]
5254
ElasticNetRegressor_MLJLinearModels = ["regression"]
5355
ElasticNetRegressor_MLJScikitLearnInterface = ["regression"]
5456
ENNUndersampler_Imbalance = ["class_imbalance"]
57+
EnsembleModel_MLJEnsembles = ["ensemble_models", "meta_algorithms"]
5558
EpsilonSVR_LIBSVM = ["regression"]
5659
EvoLinearRegressor_EvoLinear = ["regression"]
5760
EvoTreeClassifier_EvoTrees = ["classification", "ensemble_models", "iterative_models"]
@@ -63,8 +66,8 @@ EvoSplineRegressor_EvoLinear = ["regression", "ensemble_models", "iterative_mode
6366
ExtraTreesClassifier_MLJScikitLearnInterface = ["classification", "iterative_models"]
6467
ExtraTreesRegressor_MLJScikitLearnInterface = ["regression", "iterative_models"]
6568
FactorAnalysis_MultivariateStats = ["dimension_reduction", ]
66-
FeatureAgglomeration_MLJScikitLearnInterface = ["clustering", "static_models"]
67-
FeatureSelector_MLJModels = ["dimension_reduction", ]
69+
FeatureAgglomeration_MLJScikitLearnInterface = ["clustering", "static_models", "feature_engineering"]
70+
FeatureSelector_FeatureSelection = ["dimension_reduction", "feature_engineering"]
6871
FillImputer_MLJModels = ["missing_value_imputation", ]
6972
GaussianMixtureClusterer_BetaML = ["clustering", "distribution_fitter"]
7073
GaussianMixtureImputer_BetaML = ["missing_value_imputation", "distribution_fitter"]
@@ -88,7 +91,8 @@ ICA_MultivariateStats = ["encoders"]
8891
IForestDetector_OutlierDetectionPython = ["outlier_detection"]
8992
ImageClassifier_MLJFlux = ["classification", "image_processing", "iterative_models"]
9093
INNEDetector_OutlierDetectionPython = ["outlier_detection"]
91-
InteractionTransformer_MLJModels = ["static_models"]
94+
InteractionTransformer_MLJModels = ["static_models", "feature_engineering"]
95+
IteratedModel_MLJIteration = ["iterative_models", "meta_algorithms"]
9296
KDEDetector_OutlierDetectionPython = ["outlier_detection"]
9397
KMeansClusterer_BetaML = ["clustering"]
9498
KMeans_Clustering = ["clustering", "dimension_reduction", ]
@@ -104,7 +108,7 @@ KNeighborsClassifier_MLJScikitLearnInterface = ["classification"]
104108
KNeighborsRegressor_MLJScikitLearnInterface = ["regression"]
105109
KPLSRegressor_PartialLeastSquaresRegressor = ["regression"]
106110
KernelPCA_MultivariateStats = ["dimension_reduction", ]
107-
KernelPerceptronClassifier_BetaML = ["classification"]
111+
KernelPerceptronClassifier_BetaML = ["classification", "neural networks"]
108112
LADRegressor_MLJLinearModels = ["regression"]
109113
LDA_MultivariateStats = ["classification", "dimension_reduction", ]
110114
LGBMClassifier_LightGBM = ["classification", "ensemble_models", "iterative_models"]
@@ -146,14 +150,14 @@ MultitargetGaussianMixtureRegressor_BetaML = ["regression", "distribution_fitter
146150
MultitargetKNNClassifier_NearestNeighborModels = ["classification"]
147151
MultitargetKNNRegressor_NearestNeighborModels = ["regression"]
148152
MultitargetLinearRegressor_MultivariateStats = ["regression"]
149-
MultitargetNeuralNetworkRegressor_BetaML = ["regression"]
150-
MultitargetNeuralNetworkRegressor_MLJFlux = ["regression", "iterative_models"]
153+
MultitargetNeuralNetworkRegressor_BetaML = ["regression", "neural networks"]
154+
MultitargetNeuralNetworkRegressor_MLJFlux = ["regression", "iterative_models", "neural networks"]
151155
MultitargetRidgeRegressor_MultivariateStats = ["regression"]
152156
MultitargetSRRegressor_SymbolicRegression = ["regression"]
153-
NeuralNetworkClassifier_BetaML = ["classification"]
154-
NeuralNetworkClassifier_MLJFlux = ["classification", "iterative_models"]
155-
NeuralNetworkRegressor_BetaML = ["regression"]
156-
NeuralNetworkRegressor_MLJFlux = ["regression", "iterative_models"]
157+
NeuralNetworkClassifier_BetaML = ["classification", "neural networks"]
158+
NeuralNetworkClassifier_MLJFlux = ["classification", "iterative_models", "neural networks"]
159+
NeuralNetworkRegressor_BetaML = ["regression", "neural networks"]
160+
NeuralNetworkRegressor_MLJFlux = ["regression", "iterative_models", "neural networks"]
157161
NuSVC_LIBSVM = ["classification"]
158162
NuSVR_LIBSVM = ["regression"]
159163
OCSVMDetector_OutlierDetectionPython = ["outlier_detection"]
@@ -171,8 +175,9 @@ PartLS_PartitionedLS = ["regression"]
171175
PassiveAggressiveClassifier_MLJScikitLearnInterface = ["classification"]
172176
PassiveAggressiveRegressor_MLJScikitLearnInterface = ["regression"]
173177
PegasosClassifier_BetaML = ["classification"]
174-
PerceptronClassifier_BetaML = ["classification", "iterative_models"]
175-
PerceptronClassifier_MLJScikitLearnInterface = ["classification", "iterative_models"]
178+
PerceptronClassifier_BetaML = ["classification", "iterative_models", "neural networks"]
179+
PerceptronClassifier_MLJScikitLearnInterface = ["classification", "iterative_models", "neural networks"]
180+
Pipeline_MLJBase = ["meta_algorithms"]
176181
ProbabilisticNuSVC_LIBSVM = ["classification"]
177182
ProbabilisticSGDClassifier_MLJScikitLearnInterface = ["classification"]
178183
ProbabilisticSVC_LIBSVM = ["classification"]
@@ -190,6 +195,8 @@ RandomForestImputer_BetaML = ["missing_value_imputation", "ensemble_models", "it
190195
RandomForestRegressor_BetaML = ["regression", "ensemble_models", "iterative_models"]
191196
RandomForestRegressor_DecisionTree = ["regression", "ensemble_models", "iterative_models"]
192197
RandomForestRegressor_MLJScikitLearnInterface = ["regression", "ensemble_models", "iterative_models"]
198+
RecursiveFeatureElimination_FeatureSelection = ["dimension_reduction", "meta_algorithms", "feature_engineering"]
199+
Resampler_MLJBase = ["meta_algorithms"]
193200
RidgeCVClassifier_MLJScikitLearnInterface = ["classification"]
194201
RidgeCVRegressor_MLJScikitLearnInterface = ["classification"]
195202
RidgeClassifier_MLJScikitLearnInterface = ["classification"]
@@ -210,6 +217,7 @@ StableForestClassifier_SIRUS = ["classification"]
210217
StableForestRegressor_SIRUS = ["regression"]
211218
StableRulesClassifier_SIRUS = ["classification"]
212219
StableRulesRegressor_SIRUS = ["regression"]
220+
Stack_MLJBase = ["meta_algorithms", "ensemble_models"]
213221
SVC_LIBSVM = ["classification"]
214222
SVMClassifier_MLJScikitLearnInterface = ["classification"]
215223
SVMLinearClassifier_MLJScikitLearnInterface = ["classification"]
@@ -222,9 +230,11 @@ SpectralClustering_MLJScikitLearnInterface = ["clustering", "static_models"]
222230
Standardizer_MLJModels = ["encoders"]
223231
SubspaceLDA_MultivariateStats = ["classification", "dimension_reduction"]
224232
TomekUndersampler_Imbalance = ["class_imbalance"]
233+
TunedModel_MLJTuning = ["meta_algorithms"]
225234
TSVDTransformer_TSVD = ["dimension_reduction"]
226235
TfidfTransformer_MLJText = ["encoders", "text_analysis"]
227236
TheilSenRegressor_MLJScikitLearnInterface = ["regression"]
237+
TransformedTargetModel_MLJBase = ["meta_algorithms", "outlier_detection"]
228238
UnivariateBoxCoxTransformer_MLJModels = ["encoders"]
229239
UnivariateDiscretizer_MLJModels = ["encoders"]
230240
UnivariateFillImputer_MLJModels = ["missing_value_imputation"]

docs/make.jl

+58-39
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ import MLJ.MLJModels
1515
import MLJ.MLJEnsembles
1616
import MLJ.ScientificTypes
1717
import MLJ.MLJBalancing
18+
import MLJ.FeatureSelection
1819
import ScientificTypesBase
1920
import Distributions
2021
using CategoricalArrays
@@ -37,7 +38,7 @@ isempty(problems) || error(
3738
# compose the individual model docstring pages:
3839
@info "Getting individual model docstrings from the registry and generating "*
3940
"pages for them, written at /docs/src/models/ ."
40-
for model in models()
41+
for model in models(wrappers=true)
4142
write_page(model)
4243
end
4344

@@ -54,45 +55,62 @@ pages = [
5455
"Model Browser" => "model_browser.md",
5556
"About MLJ" => "about_mlj.md",
5657
"Learning MLJ" => "learning_mlj.md",
57-
"Getting Started" => "getting_started.md",
58-
"Common MLJ Workflows" => "common_mlj_workflows.md",
59-
"Working with Categorical Data" => "working_with_categorical_data.md",
60-
"Model Search" => "model_search.md",
61-
"Loading Model Code" => "loading_model_code.md",
62-
"Machines" => "machines.md",
63-
"Evaluating Model Performance" => "evaluating_model_performance.md",
64-
"Performance Measures" => "performance_measures.md",
65-
"Weights" => "weights.md",
66-
"Tuning Models" => "tuning_models.md",
67-
"Learning Curves" => "learning_curves.md",
68-
"Preparing Data" => "preparing_data.md",
69-
"Transformers and Other Unsupervised models" => "transformers.md",
70-
"More on Probabilistic Predictors" => "more_on_probabilistic_predictors.md",
71-
"Composing Models" => "composing_models.md",
72-
"Linear Pipelines" => "linear_pipelines.md",
73-
"Target Transformations" => "target_transformations.md",
74-
"Homogeneous Ensembles" => "homogeneous_ensembles.md",
75-
"Correcting Class Imbalance" => "correcting_class_imbalance.md",
76-
"Model Stacking" => "model_stacking.md",
77-
"Learning Networks" => "learning_networks.md",
78-
"Controlling Iterative Models" => "controlling_iterative_models.md",
79-
"Generating Synthetic Data" => "generating_synthetic_data.md",
80-
"Logging Workflows" => "logging_workflows.md",
81-
"OpenML Integration" => "openml_integration.md",
82-
"Acceleration and Parallelism" => "acceleration_and_parallelism.md",
83-
"Simple User Defined Models" => "simple_user_defined_models.md",
84-
"Quick-Start Guide to Adding Models" =>
85-
"quick_start_guide_to_adding_models.md",
86-
"Adding Models for General Use" => "adding_models_for_general_use.md",
87-
"Modifying Behavior" => "modifying_behavior.md",
88-
"Internals" => "internals.md",
89-
"List of Supported Models" => "list_of_supported_models.md",
90-
"Third Party Packages" => "third_party_packages.md",
91-
"Glossary" => "glossary.md",
92-
"MLJ Cheatsheet" => "mlj_cheatsheet.md",
93-
"FAQ" => "frequently_asked_questions.md",
58+
"Basics" => [
59+
"Getting Started" => "getting_started.md",
60+
"Common MLJ Workflows" => "common_mlj_workflows.md",
61+
"Machines" => "machines.md",
62+
"MLJ Cheatsheet" => "mlj_cheatsheet.md",
63+
],
64+
"Data" => [
65+
"Working with Categorical Data" => "working_with_categorical_data.md",
66+
"Preparing Data" => "preparing_data.md",
67+
"Generating Synthetic Data" => "generating_synthetic_data.md",
68+
"OpenML Integration" => "openml_integration.md",
69+
],
70+
"Model Basics" => [
71+
"Model Search" => "model_search.md",
72+
"Loading Model Code" => "loading_model_code.md",
73+
"Transformers and Other Unsupervised models" => "transformers.md",
74+
"List of Supported Models" => "list_of_supported_models.md",
75+
],
76+
"Meta-algorithms" => [
77+
"Evaluating Model Performance" => "evaluating_model_performance.md",
78+
"Tuning Models" => "tuning_models.md",
79+
"Learning Curves" => "learning_curves.md",
80+
"Controlling Iterative Models" => "controlling_iterative_models.md",
81+
"Correcting Class Imbalance" => "correcting_class_imbalance.md",
82+
"Thresholding Probabilistic Predictors" =>
83+
"thresholding_probabilistic_predictors.md",
84+
"Target Transformations" => "target_transformations.md",
85+
"Homogeneous Ensembles" => "homogeneous_ensembles.md",
86+
],
87+
"Model Composition" => [
88+
"Composing Models" => "composing_models.md",
89+
"Linear Pipelines" => "linear_pipelines.md",
90+
"Model Stacking" => "model_stacking.md",
91+
"Learning Networks" => "learning_networks.md",
92+
],
93+
"Third Party Tools" => [
94+
"Logging Workflows using MLflow" => "logging_workflows.md",
95+
"Third Party Packages" => "third_party_packages.md",
96+
],
97+
"Customization and Extension" => [
98+
"Simple User Defined Models" => "simple_user_defined_models.md",
99+
"Quick-Start Guide to Adding Models" =>
100+
"quick_start_guide_to_adding_models.md",
101+
"Adding Models for General Use" => "adding_models_for_general_use.md",
102+
"Modifying Behavior" => "modifying_behavior.md",
103+
"Internals" => "internals.md",
104+
],
105+
"Miscellaneous" => [
106+
"Performance Measures" => "performance_measures.md",
107+
"Weights" => "weights.md",
108+
"Acceleration and Parallelism" => "acceleration_and_parallelism.md",
109+
"Glossary" => "glossary.md",
110+
"FAQ" => "frequently_asked_questions.md",
111+
],
94112
"Index of Methods" => "api.md",
95-
]
113+
]
96114

97115
for (k, v) in pages
98116
println("$k\t=>$v")
@@ -118,6 +136,7 @@ makedocs(
118136
IterationControl,
119137
CategoricalDistributions,
120138
StatisticalMeasures,
139+
FeatureSelection,
121140
],
122141
pages = pages,
123142
warnonly = [:cross_references, :missing_docs],

docs/model_docstring_tools.jl

+6-5
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ const PATH_TO_MODEL_DOCS = joinpath(@__DIR__, "src", "models")
55
"""
66
remove_doc_refs(str::AbstractString)
77
8-
Removes `@ref` references from `str. For example, a substring of the form
8+
Removes `@ref` references from `str`. For example, a substring of the form
99
"[`some.thing_like_this123!`](@ref)" is replaced with "`some.thing_like_this123!`".
1010
1111
"""
@@ -27,8 +27,8 @@ handle(model) = model.name*"_"*model.package_name
2727
**Private method.**
2828
2929
Compose and write to file the documentation page for `model`. Here `model` is an entry in
30-
the MLJ Model Registry, i.e., an element of `MLJModels.models()`. The file name has the
31-
form `"ModelName_PackageName.md"`, for example,
30+
the MLJ Model Registry, i.e., an element of `MLJModels.models(; wrappers=true)`. The file
31+
name has the form `"ModelName_PackageName.md"`, for example,
3232
`"DecisionTreeClassifier_DecisionTree.md"`. Such a page can be referenced from any other
3333
markdown page in /docs/src/ like this: `[DecisionTreeClassifier](@ref
3434
DecisionTreeClassifier_DecisionTree)`.
@@ -56,6 +56,7 @@ const DESCRIPTORS_GIVEN_HANDLE =
5656
# determined the list of all descriptors, ranked by frequency:
5757
const descriptors = vcat(values(DESCRIPTORS_GIVEN_HANDLE)...)
5858
const ranking = MLJBase.countmap(descriptors)
59+
ranking["meta algorithms"] = 1e10
5960
const DESCRIPTORS = sort(unique(descriptors), by=d -> ranking[d], rev=true)
6061
const HANDLES = keys(DESCRIPTORS_GIVEN_HANDLE)
6162

@@ -67,7 +68,7 @@ handle as key in /docs/src/ModelDescriptors.toml.
6768
6869
"""
6970
function models_missing_descriptors()
70-
handles = handle.(models())
71+
handles = handle.(models(wrappers=true))
7172
filter(handles) do h
7273
!(h in HANDLES)
7374
end
@@ -82,7 +83,7 @@ Return the list of models with a given `descriptor`, such as "regressor", as
8283
these appear in /src/docs/ModelDescriptors.toml.
8384
8485
"""
85-
modelswith(descriptor) = filter(models()) do model
86+
modelswith(descriptor) = filter(models(wrappers=true)) do model
8687
descriptor in DESCRIPTORS_GIVEN_HANDLE[handle(model)]
8788
end
8889

docs/src/index.md

+4-3
Original file line numberDiff line numberDiff line change
@@ -56,18 +56,19 @@ To support MLJ development, please cite these works or star the repo:
5656
[Model Search](@ref model_search) |
5757
[Loading Model Code](@ref) |
5858
[Transformers and Other Unsupervised Models](@ref) |
59-
[More on Probabilistic Predictors](@ref) |
60-
[Composing Models](@ref) |
6159
[Simple User Defined Models](@ref) |
6260
[List of Supported Models](@ref model_list) |
6361
[Third Party Packages](@ref)
6462

6563
### Meta-algorithms
6664
[Evaluating Model Performance](@ref) |
6765
[Tuning Models](@ref) |
66+
[Composing Models](@ref) |
6867
[Controlling Iterative Models](@ref) |
6968
[Learning Curves](@ref)|
70-
[Correcting Class Imbalance](@ref)
69+
[Correcting Class Imbalance](@ref) |
70+
[Thresholding Probabilistic Predictors](@ref)
71+
7172

7273
### Composition
7374
[Composing Models](@ref) |

0 commit comments

Comments
 (0)