Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Better use of preload data #381

Merged
merged 5 commits into from
Nov 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 1 addition & 6 deletions R/commonMachineLearningClassification.R
Original file line number Diff line number Diff line change
Expand Up @@ -42,12 +42,7 @@
}

.mlClassificationReadData <- function(dataset, options) {
if (is.null(dataset)) {
dataset <- .readDataClassificationRegressionAnalyses(dataset, options)
}
if (length(unlist(options[["predictors"]])) > 0 && options[["scaleVariables"]]) {
dataset[, options[["predictors"]]] <- .scaleNumericData(dataset[, options[["predictors"]], drop = FALSE])
}
dataset <- .readDataClassificationRegressionAnalyses(dataset, options, include_weights = FALSE)
if (options[["target"]] != "") {
dataset[, options[["target"]]] <- factor(dataset[, options[["target"]]], ordered = FALSE)
}
Expand Down
4 changes: 1 addition & 3 deletions R/commonMachineLearningClustering.R
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,7 @@
.mlClusteringReadData <- function(dataset, options) {
predictors <- unlist(options[["predictors"]])
predictors <- predictors[predictors != ""]
if (is.null(dataset)) {
dataset <- .readAndAddCompleteRowIndices(options, "predictors")
}
dataset <- jaspBase::excludeNaListwise(dataset, predictors)
if (options[["scaleVariables"]] && length(unlist(options[["predictors"]])) > 0) {
dataset <- .scaleNumericData(dataset)
}
Expand Down
42 changes: 28 additions & 14 deletions R/commonMachineLearningRegression.R
Original file line number Diff line number Diff line change
Expand Up @@ -44,23 +44,39 @@
return(opt)
}

.readDataRegressionAnalyses <- function(dataset, options, jaspResults) {
if (is.null(dataset)) {
dataset <- .readDataClassificationRegressionAnalyses(dataset, options)
}
if (length(unlist(options[["predictors"]])) > 0 && options[["scaleVariables"]]) {
dataset[, options[["predictors"]]] <- .scaleNumericData(dataset[, options[["predictors"]], drop = FALSE])
}
.readDataRegressionAnalyses <- function(dataset, options, jaspResults, include_weights = FALSE) {
dataset <- .readDataClassificationRegressionAnalyses(dataset, options, include_weights)
return(dataset)
}

.readDataClassificationRegressionAnalyses <- function(dataset, options) {
.readDataClassificationRegressionAnalyses <- function(dataset, options, include_weights) {

target <- NULL
weights <- NULL
testSetIndicator <- NULL
if (options[["testSetIndicatorVariable"]] != "" && options[["holdoutData"]] == "testSetIndicator")
testSetIndicator <- "testSetIndicatorVariable"
if (options[["target"]] != "") {
target <- options[["target"]]
}
if (include_weights && options[["weights"]] != "") {
weights <- options[["weights"]]
}
if (options[["testSetIndicatorVariable"]] != "" && options[["holdoutData"]] == "testSetIndicator") {
testSetIndicator <- options[["testSetIndicatorVariable"]]
}

predictors <- unlist(options["predictors"])
predictors <- predictors[predictors != ""]
dataset <- jaspBase::excludeNaListwise(dataset, c(target, predictors, weights, testSetIndicator))

return(.readAndAddCompleteRowIndices(options, c("target", "predictors"), testSetIndicator))
# Scale numeric predictors
if (length(unlist(options[["predictors"]])) > 0 && options[["scaleVariables"]]) {
dataset[, options[["predictors"]]] <- .scaleNumericData(dataset[, options[["predictors"]], drop = FALSE])
}
# Make sure the test set indicator is numeric
if (options[["testSetIndicatorVariable"]] != "" && options[["holdoutData"]] == "testSetIndicator")
dataset[[options[["testSetIndicatorVariable"]]]] <- as.numeric(dataset[[options[["testSetIndicatorVariable"]]]])

return(dataset)
}

.readAndAddCompleteRowIndices <- function(options, optionNames = NULL, optionNamesAsNumeric = NULL) {
Expand All @@ -72,9 +88,7 @@
options[[name2]] <- rep("scale", length(options[[name]]))
}
dataset <- jaspBase::readDataSetByVariableTypes(options, c(optionNames, optionNamesAsNumeric))
complete.index <- which(complete.cases(dataset))
dataset <- na.omit(dataset)
rownames(dataset) <- as.character(complete.index)
dataset <- jaspBase::excludeNaListwise(dataset, c(options[["target"]], options[["predictors"]]))
return(dataset)
}

Expand Down
2 changes: 1 addition & 1 deletion R/mlPrediction.R
Original file line number Diff line number Diff line change
Expand Up @@ -422,7 +422,7 @@ is.jaspMachineLearning <- function(x) {
selection <- predictions[indexes]
cols <- list(row = indexes, pred = selection)
if (options[["predictionsTableFeatures"]]) {
for (i in model[["jaspVars"]][["encoded"]]$predictors) {
for (i in colnames(dataset)) {
if (.columnIsNominal(i)) {
table$addColumnInfo(name = i, title = i, type = "string")
var <- levels(dataset[[i]])[dataset[[i]]]
Expand Down
2 changes: 1 addition & 1 deletion R/mlRegressionLinear.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
mlRegressionLinear <- function(jaspResults, dataset, options, ...) {

# Preparatory work
dataset <- .mlRegressionRegularizedReadData(dataset, options)
dataset <- .readDataRegressionAnalyses(dataset, options, include_weights = TRUE)
.mlRegressionErrorHandling(dataset, options, type = "lm")

# Check if analysis is ready to run
Expand Down
27 changes: 1 addition & 26 deletions R/mlRegressionRegularized.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
mlRegressionRegularized <- function(jaspResults, dataset, options, ...) {

# Preparatory work
dataset <- .mlRegressionRegularizedReadData(dataset, options)
dataset <- .readDataRegressionAnalyses(dataset, options, include_weights = TRUE)
.mlRegressionErrorHandling(dataset, options, type = "regularized")

# Check if analysis is ready to run
Expand Down Expand Up @@ -58,31 +58,6 @@ mlRegressionRegularized <- function(jaspResults, dataset, options, ...) {
.mlRegressionRegularizedPlotLambda(options, jaspResults, ready, position = 10)
}

# Read dataset
.mlRegressionRegularizedReadData <- function(dataset, options) {
target <- NULL
weights <- NULL
testSetIndicator <- NULL
if (options[["target"]] != "") {
target <- options[["target"]]
}
if (options[["weights"]] != "") {
weights <- options[["weights"]]
}
if (options[["testSetIndicatorVariable"]] != "" && options[["holdoutData"]] == "testSetIndicator")
testSetIndicator <- "testSetIndicatorVariable"

predictors <- unlist(options["predictors"])
predictors <- predictors[predictors != ""]
if (is.null(dataset)) {
dataset <- .readAndAddCompleteRowIndices(options, c("target", "predictors", "weights"), testSetIndicator)
}
if (length(unlist(options[["predictors"]])) > 0 && options[["scaleVariables"]]) {
dataset[, options[["predictors"]]] <- .scaleNumericData(dataset[, options[["predictors"]], drop = FALSE])
}
return(dataset)
}

.regularizedRegression <- function(dataset, options, jaspResults) {
# Set model-specific parameters
alpha <- switch(options[["penalty"]],
Expand Down
2 changes: 2 additions & 0 deletions inst/help/mlPrediction.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
Prediction
===

The prediction analysis enables you to load a trained machine learning model and apply it to new data. It is important that the features in the new dataset have the same names as in the original dataset used for training.

### Input

#### Trained Model
Expand Down
2 changes: 2 additions & 0 deletions inst/help/mlPrediction_nl.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
Voorspellen
===

Met de voorspellingsanalyse kun je een getraind machine-learningmodel laden en toepassen op nieuwe gegevens. Het is belangrijk dat de kenmerken in de nieuwe dataset dezelfde namen hebben als in de oorspronkelijke dataset die voor de training is gebruikt.

### Invoer

#### Getraind model
Expand Down
4 changes: 2 additions & 2 deletions inst/qml/mlPrediction.qml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ import "./common/tables" as TAB

Form
{
info: qsTr("The prediction analysis enables you to load a trained machine learning model and apply it to new data.")
info: qsTr("The prediction analysis enables you to load a trained machine learning model and apply it to new data. It is important that the features in the new dataset have the same names as in the original dataset used for training.")

FileSelector
{
Expand All @@ -53,7 +53,7 @@ Form
id: predictors
name: "predictors"
title: qsTr("Features")
allowedColumns: ["scale", "ordinal", "nominal"]
allowedColumns: ["scale", "nominal"]
allowAnalysisOwnComputedColumns: false
}
}
Expand Down
Loading