diff --git a/R/commonMachineLearningRegression.R b/R/commonMachineLearningRegression.R index 39dee9b1..b9278a59 100644 --- a/R/commonMachineLearningRegression.R +++ b/R/commonMachineLearningRegression.R @@ -770,9 +770,9 @@ table <- createJaspTable(title = gettext("Feature Importance Metrics")) table$position <- position if (purpose == "regression") { - table$dependOn(options = c(.mlRegressionDependencies(options), "featureImportanceTable")) + table$dependOn(options = c(.mlRegressionDependencies(options), "featureImportanceTable", "featureImportancePermutations")) } else { - table$dependOn(options = c(.mlClassificationDependencies(options), "featureImportanceTable")) + table$dependOn(options = c(.mlClassificationDependencies(options), "featureImportanceTable", "featureImportancePermutations")) } table$addColumnInfo(name = "predictor", title = "", type = "string") table$addColumnInfo(name = "dl", title = gettext("Mean dropout loss"), type = "number") @@ -786,13 +786,13 @@ ) .setSeedJASP(options) # Set the seed to make results reproducible if (purpose == "regression") { - fi <- DALEX::model_parts(result[["explainer"]], B = 50) + fi <- DALEX::model_parts(result[["explainer"]], B = options[["featureImportancePermutations"]]) } else if (purpose == "classification") { - fi <- DALEX::model_parts(result[["explainer_fi"]], B = 50) + fi <- DALEX::model_parts(result[["explainer_fi"]], B = options[["featureImportancePermutations"]]) } fi <- aggregate(x = fi[["dropout_loss"]], by = list(y = fi[["variable"]]), FUN = mean) df <- data.frame(predictor = options[["predictors"]], dl = fi[match(options[["predictors"]], fi[["y"]]), "x"]) df <- df[order(-df[["dl"]]), ] table$setData(df) - table$addFootnote(gettext("Mean dropout loss is based on 50 permutations.")) + table$addFootnote(gettextf("Mean dropout loss is based on %1$s permutations.", options[["featureImportancePermutations"]])) } diff --git a/R/mlRegressionBoosting.R b/R/mlRegressionBoosting.R index d39ea2ed..1d094976 100644 --- a/R/mlRegressionBoosting.R +++ b/R/mlRegressionBoosting.R @@ -160,9 +160,9 @@ mlRegressionBoosting <- function(jaspResults, dataset, options, ...) { table <- createJaspTable(title = gettext("Feature Importance Metrics")) table$position <- position if (purpose == "regression") { - table$dependOn(options = c("featureImportanceTable", .mlRegressionDependencies())) + table$dependOn(options = c("featureImportanceTable", .mlRegressionDependencies(), "featureImportancePermutations")) } else { - table$dependOn(options = c("featureImportanceTable", .mlClassificationDependencies())) + table$dependOn(options = c("featureImportanceTable", .mlClassificationDependencies(), "featureImportancePermutations")) } table$addColumnInfo(name = "predictor", title = "", type = "string") table$addColumnInfo(name = "relIn", title = gettext("Relative Influence"), type = "number") @@ -180,13 +180,13 @@ mlRegressionBoosting <- function(jaspResults, dataset, options, ...) { table[["relIn"]] <- result[["relInf"]]$rel.inf .setSeedJASP(options) # Set the seed to make results reproducible if (purpose == "regression") { - fi <- DALEX::model_parts(result[["explainer"]], B = 50) + fi <- DALEX::model_parts(result[["explainer"]], B = options[["featureImportancePermutations"]]) } else if (purpose == "classification") { - fi <- DALEX::model_parts(result[["explainer_fi"]], B = 50) + fi <- DALEX::model_parts(result[["explainer_fi"]], B = options[["featureImportancePermutations"]]) } fi <- aggregate(x = fi[["dropout_loss"]], by = list(y = fi[["variable"]]), FUN = mean) table[["dl"]] <- fi[match(vars, fi[["y"]]), "x"] - table$addFootnote(gettext("Mean dropout loss is based on 50 permutations.")) + table$addFootnote(gettextf("Mean dropout loss is based on %1$s permutations.", options[["featureImportancePermutations"]])) } .mlBoostingPlotOobImprovement <- function(options, jaspResults, ready, position, purpose) { diff --git a/R/mlRegressionDecisionTree.R b/R/mlRegressionDecisionTree.R index 04dc5ff1..1c4285f7 100644 --- a/R/mlRegressionDecisionTree.R +++ b/R/mlRegressionDecisionTree.R @@ -145,7 +145,7 @@ mlRegressionDecisionTree <- function(jaspResults, dataset, options, state = NULL table <- createJaspTable(title = gettext("Feature Importance Metrics")) table$position <- position table$dependOn(options = c( - "featureImportanceTable", "trainingDataManual", "scaleVariables", "target", "predictors", "seed", "setSeed", + "featureImportanceTable", "featureImportancePermutations", "trainingDataManual", "scaleVariables", "target", "predictors", "seed", "setSeed", "testSetIndicatorVariable", "testSetIndicator", "holdoutData", "testDataManual", "minObservationsForSplit", "minObservationsInNode", "interactionDepth", "complexityParameter" )) table$addColumnInfo(name = "predictor", title = " ", type = "string") @@ -169,13 +169,13 @@ mlRegressionDecisionTree <- function(jaspResults, dataset, options, state = NULL table[["imp"]] <- as.numeric(varImpOrder) / sum(as.numeric(varImpOrder)) * 100 .setSeedJASP(options) # Set the seed to make results reproducible if (purpose == "regression") { - fi <- DALEX::model_parts(result[["explainer"]], B = 50) + fi <- DALEX::model_parts(result[["explainer"]], B = options[["featureImportancePermutations"]]) } else if (purpose == "classification") { - fi <- DALEX::model_parts(result[["explainer_fi"]], B = 50) + fi <- DALEX::model_parts(result[["explainer_fi"]], B = options[["featureImportancePermutations"]]) } fi <- aggregate(x = fi[["dropout_loss"]], by = list(y = fi[["variable"]]), FUN = mean) table[["dl"]] <- fi[match(vars, fi[["y"]]), "x"] - table$addFootnote(gettext("Mean dropout loss is based on 50 permutations.")) + table$addFootnote(gettextf("Mean dropout loss is based on %1$s permutations.", options[["featureImportancePermutations"]])) } .mlDecisionTreeTableSplits <- function(options, jaspResults, ready, position, purpose) { diff --git a/R/mlRegressionRandomForest.R b/R/mlRegressionRandomForest.R index 0f60483a..80ad00a3 100644 --- a/R/mlRegressionRandomForest.R +++ b/R/mlRegressionRandomForest.R @@ -162,9 +162,9 @@ mlRegressionRandomForest <- function(jaspResults, dataset, options, ...) { table <- createJaspTable(title = gettext("Feature Importance Metrics")) table$position <- position if (purpose == "regression") { - table$dependOn(options = c("featureImportanceTable", .mlRegressionDependencies())) + table$dependOn(options = c("featureImportanceTable", .mlRegressionDependencies(), "featureImportancePermutations")) } else { - table$dependOn(options = c("featureImportanceTable", .mlClassificationDependencies())) + table$dependOn(options = c("featureImportanceTable", .mlClassificationDependencies(), "featureImportancePermutations")) } table$addColumnInfo(name = "predictor", title = " ", type = "string") table$addColumnInfo(name = "MDiA", title = gettext("Mean decrease in accuracy"), type = "number") @@ -184,13 +184,13 @@ mlRegressionRandomForest <- function(jaspResults, dataset, options, ...) { table[["MDiNI"]] <- result[["varImp"]]$TotalDecrNodeImp .setSeedJASP(options) # Set the seed to make results reproducible if (purpose == "regression") { - fi <- DALEX::model_parts(result[["explainer"]], B = 50) + fi <- DALEX::model_parts(result[["explainer"]], B = options[["featureImportancePermutations"]]) } else if (purpose == "classification") { - fi <- DALEX::model_parts(result[["explainer_fi"]], B = 50) + fi <- DALEX::model_parts(result[["explainer_fi"]], B = options[["featureImportancePermutations"]]) } fi <- aggregate(x = fi[["dropout_loss"]], by = list(y = fi[["variable"]]), FUN = mean) table[["dl"]] <- fi[match(vars, fi[["y"]]), "x"] - table$addFootnote(gettext("Mean dropout loss is based on 50 permutations.")) + table$addFootnote(gettextf("Mean dropout loss is based on %1$s permutations.", options[["featureImportancePermutations"]])) } .mlRandomForestPlotError <- function(options, jaspResults, ready, position, purpose) { diff --git a/inst/help/mlClassificationBoosting.md b/inst/help/mlClassificationBoosting.md index 107c0ca5..3661ce2e 100644 --- a/inst/help/mlClassificationBoosting.md +++ b/inst/help/mlClassificationBoosting.md @@ -18,6 +18,7 @@ Boosting works by sequentially adding features to an decision tree ensemble, eac - Class proportions: Displays a table that shows the proportions of each class in the data set, training (and validaton), and test set. - Model performance: Shows commonly used classification evaluation metrics like precision, recall, the F1-score, support and AUC (area under the ROC curve). - Feature importance: Shows the available feature importance metrics for the fitted model. + - Permutations: Sets the number of permutations on which the mean dropout loss is based. - Explain predictions: Shows the decomposition of the model’s prediction into contributions that can be attributed to different explanatory variables. #### Plots diff --git a/inst/help/mlClassificationKnn.md b/inst/help/mlClassificationKnn.md index fa6d2ef1..524843f6 100644 --- a/inst/help/mlClassificationKnn.md +++ b/inst/help/mlClassificationKnn.md @@ -18,6 +18,7 @@ K-nearest neighbors is a method of classification that looks at the *k* number o - Class proportions: Displays a table that shows the proportions of each class in the data set, training (and validaton), and test set. - Model performance: Shows commonly used classification evaluation metrics like precision, recall, the F1-score, support and AUC (area under the ROC curve). - Feature importance: Shows the available feature importance metrics for the fitted model. + - Permutations: Sets the number of permutations on which the mean dropout loss is based. - Explain predictions: Shows the decomposition of the model’s prediction into contributions that can be attributed to different explanatory variables. #### Plots diff --git a/inst/help/mlClassificationLda.md b/inst/help/mlClassificationLda.md index 9572e523..cd643332 100644 --- a/inst/help/mlClassificationLda.md +++ b/inst/help/mlClassificationLda.md @@ -21,6 +21,7 @@ Linear Discriminant Analysis (LDA) is a method of classification that aims to fi - Class proportions: Displays a table that shows the proportions of each class in the data set, training (and validaton), and test set. - Model performance: Shows commonly used classification evaluation metrics like precision, recall, the F1-score, support and AUC (area under the ROC curve). - Feature importance: Shows the available feature importance metrics for the fitted model. + - Permutations: Sets the number of permutations on which the mean dropout loss is based. - Explain predictions: Shows the decomposition of the model’s prediction into contributions that can be attributed to different explanatory variables. - Coefficients: Shows the coefficients for the linear discriminants. - Prior and posterior probabilities: Shows the prior and posterior group probabilities. Prior probabilities are the proportions in the training set. diff --git a/inst/help/mlClassificationNeuralNetwork.md b/inst/help/mlClassificationNeuralNetwork.md index 521c186f..eda92c51 100644 --- a/inst/help/mlClassificationNeuralNetwork.md +++ b/inst/help/mlClassificationNeuralNetwork.md @@ -18,6 +18,7 @@ Feedforward neural networks are predictive algorithms inspired by the biological - Class proportions: Displays a table that shows the proportions of each class in the data set, training (and validaton), and test set. - Model performance: Shows commonly used classification evaluation metrics like precision, recall, the F1-score, support and AUC (area under the ROC curve). - Feature importance: Shows the available feature importance metrics for the fitted model. + - Permutations: Sets the number of permutations on which the mean dropout loss is based. - Explain predictions: Shows the decomposition of the model’s prediction into contributions that can be attributed to different explanatory variables. - Network weights: Shows the connections in the neural network together with their weights. diff --git a/inst/help/mlClassificationRandomForest.md b/inst/help/mlClassificationRandomForest.md index 24cda8cd..40211998 100644 --- a/inst/help/mlClassificationRandomForest.md +++ b/inst/help/mlClassificationRandomForest.md @@ -18,6 +18,7 @@ Random Forest is a method of classification that creates a set of decision trees - Class proportions: Displays a table that shows the proportions of each class in the data set, training (and validaton), and test set. - Model performance: Shows commonly used classification evaluation metrics like precision, recall, the F1-score, support and AUC (area under the ROC curve). - Feature importance: Shows the available feature importance metrics for the fitted model. + - Permutations: Sets the number of permutations on which the mean dropout loss is based. - Explain predictions: Shows the decomposition of the model’s prediction into contributions that can be attributed to different explanatory variables. #### Plots diff --git a/inst/help/mlRegressionBoosting.md b/inst/help/mlRegressionBoosting.md index 4c627da1..f8a81d47 100644 --- a/inst/help/mlRegressionBoosting.md +++ b/inst/help/mlRegressionBoosting.md @@ -16,6 +16,7 @@ Boosting works by sequentially adding features to an decision tree ensemble, eac #### Tables - Model performance: Shows commonly used classification evaluation metrics like mean squared error (MSE), root mean squared error (RMSE) and R2. - Feature importance: Shows the available feature importance metrics for the fitted model. + - Permutations: Sets the number of permutations on which the mean dropout loss is based. - Explain predictions: Shows the decomposition of the model’s prediction into contributions that can be attributed to different explanatory variables. #### Plots diff --git a/inst/help/mlRegressionKnn.md b/inst/help/mlRegressionKnn.md index b15de03e..e8e61b45 100644 --- a/inst/help/mlRegressionKnn.md +++ b/inst/help/mlRegressionKnn.md @@ -16,6 +16,7 @@ K-nearest neighbors is a method of regression that looks at the *k* number of fe #### Tables - Model performance: Shows commonly used classification evaluation metrics like mean squared error (MSE), root mean squared error (RMSE) and R2. - Feature importance: Shows the available feature importance metrics for the fitted model. + - Permutations: Sets the number of permutations on which the mean dropout loss is based. - Explain predictions: Shows the decomposition of the model’s prediction into contributions that can be attributed to different explanatory variables. #### Plots diff --git a/inst/help/mlRegressionNeuralNetwork.md b/inst/help/mlRegressionNeuralNetwork.md index 058d6b55..1a313b41 100644 --- a/inst/help/mlRegressionNeuralNetwork.md +++ b/inst/help/mlRegressionNeuralNetwork.md @@ -16,6 +16,7 @@ Feedforward neural networks are predictive algorithms inspired by the biological #### Tables - Model performance: Shows commonly used classification evaluation metrics like mean squared error (MSE), root mean squared error (RMSE) and R2. - Feature importance: Shows the available feature importance metrics for the fitted model. + - Permutations: Sets the number of permutations on which the mean dropout loss is based. - Explain predictions: Shows the decomposition of the model’s prediction into contributions that can be attributed to different explanatory variables. - Network weights: Shows the connections in the neural network together with their weights. diff --git a/inst/help/mlRegressionRandomForest.md b/inst/help/mlRegressionRandomForest.md index a1aefc39..c50349cc 100644 --- a/inst/help/mlRegressionRandomForest.md +++ b/inst/help/mlRegressionRandomForest.md @@ -16,6 +16,7 @@ Random Forest is a method of regression that creates a set of decision trees tha #### Tables - Model performance: Shows commonly used classification evaluation metrics like mean squared error (MSE), root mean squared error (RMSE) and R2. - Feature importance: Shows the available feature importance metrics for the fitted model. + - Permutations: Sets the number of permutations on which the mean dropout loss is based. - Explain predictions: Shows the decomposition of the model’s prediction into contributions that can be attributed to different explanatory variables. #### Plots diff --git a/inst/help/mlRegressionRegularized.md b/inst/help/mlRegressionRegularized.md index 2bf98f11..d669569d 100644 --- a/inst/help/mlRegressionRegularized.md +++ b/inst/help/mlRegressionRegularized.md @@ -18,6 +18,7 @@ Regularized linear regression is an adaptation of linear regression in which the #### Tables - Model performance: Shows commonly used classification evaluation metrics like mean squared error (MSE), root mean squared error (RMSE) and R2. - Feature importance: Shows the available feature importance metrics for the fitted model. + - Permutations: Sets the number of permutations on which the mean dropout loss is based. - Explain predictions: Shows the decomposition of the model’s prediction into contributions that can be attributed to different explanatory variables. - Regression coefficients: Gives the regression coefficient for each feature. diff --git a/inst/help/mlclassificationdecisiontree.md b/inst/help/mlclassificationdecisiontree.md index d87d6502..df3f764e 100644 --- a/inst/help/mlclassificationdecisiontree.md +++ b/inst/help/mlclassificationdecisiontree.md @@ -18,6 +18,7 @@ Decision Trees is a supervised learning algorithm that uses a decision tree as a - Class proportions: Displays a table that shows the proportions of each class in the data set, training (and validaton), and test set. - Model performance: Shows commonly used classification evaluation metrics like precision, recall, the F1-score, support and AUC (area under the ROC curve). - Feature importance: Shows the available feature importance metrics for the fitted model. + - Permutations: Sets the number of permutations on which the mean dropout loss is based. - Explain predictions: Shows the decomposition of the model’s prediction into contributions that can be attributed to different explanatory variables. - Splits in tree: Shows the split variables, their split point, and the number of observations (which are not missing and are of positive weight) sent left or right by the split. It also shows the improvement in deviance given by this split. diff --git a/inst/help/mlclassificationsvm.md b/inst/help/mlclassificationsvm.md index d716cd27..f86ebcd2 100644 --- a/inst/help/mlclassificationsvm.md +++ b/inst/help/mlclassificationsvm.md @@ -18,6 +18,7 @@ Support Vector Machines is a supervised learning algorithm that maps training ex - Class proportions: Displays a table that shows the proportions of each class in the data set, training (and validaton), and test set. - Model performance: Shows commonly used classification evaluation metrics like precision, recall, the F1-score, support and AUC (area under the ROC curve). - Feature importance: Shows the available feature importance metrics for the fitted model. + - Permutations: Sets the number of permutations on which the mean dropout loss is based. - Explain predictions: Shows the decomposition of the model’s prediction into contributions that can be attributed to different explanatory variables. - Support vectors: Shows a table containing the data (points) indicated as support vectors by the algorithm. diff --git a/inst/help/mlregressiondecisiontree.md b/inst/help/mlregressiondecisiontree.md index 16c778f6..0c2ecf3b 100644 --- a/inst/help/mlregressiondecisiontree.md +++ b/inst/help/mlregressiondecisiontree.md @@ -16,6 +16,7 @@ Decision Trees is a supervised learning algorithm that uses a decision tree as a #### Tables - Model performance: Shows commonly used classification evaluation metrics like mean squared error (MSE), root mean squared error (RMSE) and R2. - Feature importance: Shows the available feature importance metrics for the fitted model. + - Permutations: Sets the number of permutations on which the mean dropout loss is based. - Explain predictions: Shows the decomposition of the model’s prediction into contributions that can be attributed to different explanatory variables. - Splits in tree: Shows the split variables, their split point, and the number of observations (which are not missing and are of positive weight) sent left or right by the split. It also shows the improvement in deviance given by this split. diff --git a/inst/help/mlregressionsvm.md b/inst/help/mlregressionsvm.md index 983e7b3b..0a8d686c 100644 --- a/inst/help/mlregressionsvm.md +++ b/inst/help/mlregressionsvm.md @@ -16,6 +16,7 @@ Support Vector Machines is a supervised learning algorithm that maps training ex #### Tables - Model performance: Shows commonly used classification evaluation metrics like mean squared error (MSE), root mean squared error (RMSE) and R2. - Feature importance: Shows the available feature importance metrics for the fitted model. + - Permutations: Sets the number of permutations on which the mean dropout loss is based. - Explain predictions: Shows the decomposition of the model’s prediction into contributions that can be attributed to different explanatory variables. - Support vectors: Shows a table containing the data (points) indicated as support vectors by the algorithm. diff --git a/inst/qml/common/tables/FeatureImportance.qml b/inst/qml/common/tables/FeatureImportance.qml index edb0da1a..b3d5860d 100644 --- a/inst/qml/common/tables/FeatureImportance.qml +++ b/inst/qml/common/tables/FeatureImportance.qml @@ -23,7 +23,16 @@ import JASP.Widgets 1.0 CheckBox { - name: "featureImportanceTable" - text: qsTr("Feature importance") - info: qsTr("Shows the available feature importance metrics for the fitted model.") + name: "featureImportanceTable" + text: qsTr("Feature importance") + info: qsTr("Shows the available feature importance metrics for the fitted model.") + + IntegerField + { + name: "featureImportancePermutations" + text: qsTr("Permutations") + defaultValue: 50 + min: 10 + info: qsTr("Sets the number of permutations on which the mean dropout loss is based.") + } } \ No newline at end of file