diff --git a/R/commonMachineLearningClassification.R b/R/commonMachineLearningClassification.R index 47a2d391..bdb12043 100644 --- a/R/commonMachineLearningClassification.R +++ b/R/commonMachineLearningClassification.R @@ -42,12 +42,7 @@ } .mlClassificationReadData <- function(dataset, options) { - if (is.null(dataset)) { - dataset <- .readDataClassificationRegressionAnalyses(dataset, options) - } - if (length(unlist(options[["predictors"]])) > 0 && options[["scaleVariables"]]) { - dataset[, options[["predictors"]]] <- .scaleNumericData(dataset[, options[["predictors"]], drop = FALSE]) - } + dataset <- .readDataClassificationRegressionAnalyses(dataset, options, include_weights = FALSE) if (options[["target"]] != "") { dataset[, options[["target"]]] <- factor(dataset[, options[["target"]]], ordered = FALSE) } diff --git a/R/commonMachineLearningClustering.R b/R/commonMachineLearningClustering.R index 16b6519f..c13c428d 100644 --- a/R/commonMachineLearningClustering.R +++ b/R/commonMachineLearningClustering.R @@ -33,9 +33,7 @@ .mlClusteringReadData <- function(dataset, options) { predictors <- unlist(options[["predictors"]]) predictors <- predictors[predictors != ""] - if (is.null(dataset)) { - dataset <- .readAndAddCompleteRowIndices(options, "predictors") - } + dataset <- jaspBase::excludeNaListwise(dataset, predictors) if (options[["scaleVariables"]] && length(unlist(options[["predictors"]])) > 0) { dataset <- .scaleNumericData(dataset) } diff --git a/R/commonMachineLearningRegression.R b/R/commonMachineLearningRegression.R index 34f9f6dc..ede6fc57 100644 --- a/R/commonMachineLearningRegression.R +++ b/R/commonMachineLearningRegression.R @@ -44,23 +44,39 @@ return(opt) } -.readDataRegressionAnalyses <- function(dataset, options, jaspResults) { - if (is.null(dataset)) { - dataset <- .readDataClassificationRegressionAnalyses(dataset, options) - } - if (length(unlist(options[["predictors"]])) > 0 && options[["scaleVariables"]]) { - dataset[, options[["predictors"]]] <- .scaleNumericData(dataset[, options[["predictors"]], drop = FALSE]) - } +.readDataRegressionAnalyses <- function(dataset, options, jaspResults, include_weights = FALSE) { + dataset <- .readDataClassificationRegressionAnalyses(dataset, options, include_weights) return(dataset) } -.readDataClassificationRegressionAnalyses <- function(dataset, options) { +.readDataClassificationRegressionAnalyses <- function(dataset, options, include_weights) { + target <- NULL + weights <- NULL testSetIndicator <- NULL - if (options[["testSetIndicatorVariable"]] != "" && options[["holdoutData"]] == "testSetIndicator") - testSetIndicator <- "testSetIndicatorVariable" + if (options[["target"]] != "") { + target <- options[["target"]] + } + if (include_weights && options[["weights"]] != "") { + weights <- options[["weights"]] + } + if (options[["testSetIndicatorVariable"]] != "" && options[["holdoutData"]] == "testSetIndicator") { + testSetIndicator <- options[["testSetIndicatorVariable"]] + } + + predictors <- unlist(options["predictors"]) + predictors <- predictors[predictors != ""] + dataset <- jaspBase::excludeNaListwise(dataset, c(target, predictors, weights, testSetIndicator)) - return(.readAndAddCompleteRowIndices(options, c("target", "predictors"), testSetIndicator)) + # Scale numeric predictors + if (length(unlist(options[["predictors"]])) > 0 && options[["scaleVariables"]]) { + dataset[, options[["predictors"]]] <- .scaleNumericData(dataset[, options[["predictors"]], drop = FALSE]) + } + # Make sure the test set indicator is numeric + if (options[["testSetIndicatorVariable"]] != "" && options[["holdoutData"]] == "testSetIndicator") + dataset[[options[["testSetIndicatorVariable"]]]] <- as.numeric(dataset[[options[["testSetIndicatorVariable"]]]]) + + return(dataset) } .readAndAddCompleteRowIndices <- function(options, optionNames = NULL, optionNamesAsNumeric = NULL) { @@ -72,9 +88,7 @@ options[[name2]] <- rep("scale", length(options[[name]])) } dataset <- jaspBase::readDataSetByVariableTypes(options, c(optionNames, optionNamesAsNumeric)) - complete.index <- which(complete.cases(dataset)) - dataset <- na.omit(dataset) - rownames(dataset) <- as.character(complete.index) + dataset <- jaspBase::excludeNaListwise(dataset, c(options[["target"]], options[["predictors"]])) return(dataset) } diff --git a/R/mlPrediction.R b/R/mlPrediction.R index 13dcee07..328e6a42 100644 --- a/R/mlPrediction.R +++ b/R/mlPrediction.R @@ -422,7 +422,7 @@ is.jaspMachineLearning <- function(x) { selection <- predictions[indexes] cols <- list(row = indexes, pred = selection) if (options[["predictionsTableFeatures"]]) { - for (i in model[["jaspVars"]][["encoded"]]$predictors) { + for (i in colnames(dataset)) { if (.columnIsNominal(i)) { table$addColumnInfo(name = i, title = i, type = "string") var <- levels(dataset[[i]])[dataset[[i]]] diff --git a/R/mlRegressionLinear.R b/R/mlRegressionLinear.R index e3b9b54f..71b844c0 100644 --- a/R/mlRegressionLinear.R +++ b/R/mlRegressionLinear.R @@ -18,7 +18,7 @@ mlRegressionLinear <- function(jaspResults, dataset, options, ...) { # Preparatory work - dataset <- .mlRegressionRegularizedReadData(dataset, options) + dataset <- .readDataRegressionAnalyses(dataset, options, include_weights = TRUE) .mlRegressionErrorHandling(dataset, options, type = "lm") # Check if analysis is ready to run diff --git a/R/mlRegressionRegularized.R b/R/mlRegressionRegularized.R index 2e5af926..b02575ec 100644 --- a/R/mlRegressionRegularized.R +++ b/R/mlRegressionRegularized.R @@ -18,7 +18,7 @@ mlRegressionRegularized <- function(jaspResults, dataset, options, ...) { # Preparatory work - dataset <- .mlRegressionRegularizedReadData(dataset, options) + dataset <- .readDataRegressionAnalyses(dataset, options, include_weights = TRUE) .mlRegressionErrorHandling(dataset, options, type = "regularized") # Check if analysis is ready to run @@ -58,31 +58,6 @@ mlRegressionRegularized <- function(jaspResults, dataset, options, ...) { .mlRegressionRegularizedPlotLambda(options, jaspResults, ready, position = 10) } -# Read dataset -.mlRegressionRegularizedReadData <- function(dataset, options) { - target <- NULL - weights <- NULL - testSetIndicator <- NULL - if (options[["target"]] != "") { - target <- options[["target"]] - } - if (options[["weights"]] != "") { - weights <- options[["weights"]] - } - if (options[["testSetIndicatorVariable"]] != "" && options[["holdoutData"]] == "testSetIndicator") - testSetIndicator <- "testSetIndicatorVariable" - - predictors <- unlist(options["predictors"]) - predictors <- predictors[predictors != ""] - if (is.null(dataset)) { - dataset <- .readAndAddCompleteRowIndices(options, c("target", "predictors", "weights"), testSetIndicator) - } - if (length(unlist(options[["predictors"]])) > 0 && options[["scaleVariables"]]) { - dataset[, options[["predictors"]]] <- .scaleNumericData(dataset[, options[["predictors"]], drop = FALSE]) - } - return(dataset) -} - .regularizedRegression <- function(dataset, options, jaspResults) { # Set model-specific parameters alpha <- switch(options[["penalty"]], diff --git a/inst/help/mlPrediction.md b/inst/help/mlPrediction.md index cf63d315..30611f6f 100644 --- a/inst/help/mlPrediction.md +++ b/inst/help/mlPrediction.md @@ -1,6 +1,8 @@ Prediction === +The prediction analysis enables you to load a trained machine learning model and apply it to new data. It is important that the features in the new dataset have the same names as in the original dataset used for training. + ### Input #### Trained Model diff --git a/inst/help/mlPrediction_nl.md b/inst/help/mlPrediction_nl.md index f457a963..2cc993de 100644 --- a/inst/help/mlPrediction_nl.md +++ b/inst/help/mlPrediction_nl.md @@ -1,6 +1,8 @@ Voorspellen === +Met de voorspellingsanalyse kun je een getraind machine-learningmodel laden en toepassen op nieuwe gegevens. Het is belangrijk dat de kenmerken in de nieuwe dataset dezelfde namen hebben als in de oorspronkelijke dataset die voor de training is gebruikt. + ### Invoer #### Getraind model diff --git a/inst/qml/mlPrediction.qml b/inst/qml/mlPrediction.qml index 133850b1..5a182646 100644 --- a/inst/qml/mlPrediction.qml +++ b/inst/qml/mlPrediction.qml @@ -26,7 +26,7 @@ import "./common/tables" as TAB Form { - info: qsTr("The prediction analysis enables you to load a trained machine learning model and apply it to new data.") + info: qsTr("The prediction analysis enables you to load a trained machine learning model and apply it to new data. It is important that the features in the new dataset have the same names as in the original dataset used for training.") FileSelector { @@ -53,7 +53,7 @@ Form id: predictors name: "predictors" title: qsTr("Features") - allowedColumns: ["scale", "ordinal", "nominal"] + allowedColumns: ["scale", "nominal"] allowAnalysisOwnComputedColumns: false } }