From a83a2796ede96a72236fb892ccb73efaf2cda690 Mon Sep 17 00:00:00 2001 From: Koen Derks Date: Mon, 9 Sep 2024 22:50:29 +0200 Subject: [PATCH 01/30] First implementation --- DESCRIPTION | 3 +- NAMESPACE | 1 + R/commonMachineLearningClassification.R | 20 ++- R/mlClassificationLogistic.R | 132 ++++++++++++++++++ inst/Description.qml | 6 + inst/qml/mlClassificationLogistic.qml | 82 +++++++++++ .../testthat/test-mlclassificationlogistic.R | 66 +++++++++ 7 files changed, 305 insertions(+), 5 deletions(-) create mode 100644 R/mlClassificationLogistic.R create mode 100644 inst/qml/mlClassificationLogistic.qml create mode 100644 tests/testthat/test-mlclassificationlogistic.R diff --git a/DESCRIPTION b/DESCRIPTION index 0512eca1..b56e2b4b 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -41,7 +41,8 @@ Imports: rpart (>= 4.1.16), ROCR, Rtsne, - signal + signal, + VGAM Suggests: testthat Remotes: diff --git a/NAMESPACE b/NAMESPACE index 20ca9f20..44b04af0 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -51,6 +51,7 @@ export(mlClassificationBoosting) export(mlClassificationDecisionTree) export(mlClassificationKnn) export(mlClassificationLda) +export(mlClassificationLogistic) export(mlClassificationNaiveBayes) export(mlClassificationNeuralNetwork) export(mlClassificationRandomForest) diff --git a/R/commonMachineLearningClassification.R b/R/commonMachineLearningClassification.R index 0408e4f9..5364ef05 100644 --- a/R/commonMachineLearningClassification.R +++ b/R/commonMachineLearningClassification.R @@ -62,7 +62,7 @@ if (type == "lda" || type == "randomForest" || type == "boosting") { # Require at least 2 features ready <- length(options[["predictors"]][options[["predictors"]] != ""]) >= 2 && options[["target"]] != "" - } else if (type == "knn" || type == "neuralnet" || type == "rpart" || type == "svm" || type == "naivebayes") { + } else if (type == "knn" || type == "neuralnet" || type == "rpart" || type == "svm" || type == "naivebayes" || type == "logistic") { # Require at least 1 features ready <- length(options[["predictors"]][options[["predictors"]] != ""]) >= 1 && options[["target"]] != "" } @@ -93,7 +93,8 @@ "neuralnet" = .neuralnetClassification(dataset, options, jaspResults), "rpart" = .decisionTreeClassification(dataset, options, jaspResults), "svm" = .svmClassification(dataset, options, jaspResults), - "naivebayes" = .naiveBayesClassification(dataset, options, jaspResults) + "naivebayes" = .naiveBayesClassification(dataset, options, jaspResults), + "logistic" = .logisticRegressionClassification(dataset, options, jaspResults) ) }) if (isTryError(p)) { # Fail gracefully @@ -116,7 +117,8 @@ "neuralnet" = gettext("Neural Network Classification"), "rpart" = gettext("Decision Tree Classification"), "svm" = gettext("Support Vector Machine Classification"), - "naivebayes" = gettext("Naive Bayes Classification") + "naivebayes" = gettext("Naive Bayes Classification"), + "logistic" = gettext("Logistic / Multinomial Regression") ) tableTitle <- gettextf("Model Summary: %1$s", title) table <- createJaspTable(tableTitle) @@ -147,6 +149,8 @@ table$addColumnInfo(name = "vectors", title = gettext("Support Vectors"), type = "integer") } else if (type == "naivebayes") { table$addColumnInfo(name = "smoothing", title = gettext("Smoothing"), type = "number") + } else if (type == "logistic") { + table$addColumnInfo(name = "family", title = gettext("Family"), type = "string") } # Add common columns table$addColumnInfo(name = "nTrain", title = gettext("n(Train)"), type = "integer") @@ -164,7 +168,7 @@ } # If no analysis is run, specify the required variables in a footnote if (!ready) { - table$addFootnote(gettextf("Please provide a target variable and at least %i feature variable(s).", if (type == "knn" || type == "neuralnet" || type == "rpart" || type == "svm") 1L else 2L)) + table$addFootnote(gettextf("Please provide a target variable and at least %i feature variable(s).", if (type == "knn" || type == "neuralnet" || type == "rpart" || type == "svm" || type == "logistic") 1L else 2L)) } if (options[["savePath"]] != "") { validNames <- (length(grep(" ", decodeColNames(colnames(dataset)))) == 0) && (length(grep("_", decodeColNames(colnames(dataset)))) == 0) @@ -312,6 +316,14 @@ testAcc = classificationResult[["testAcc"]] ) table$addRows(row) + } else if (type == "logistic") { + row <- data.frame( + family = classificationResult[["family"]], + nTrain = nTrain, + nTest = classificationResult[["ntest"]], + testAcc = classificationResult[["testAcc"]] + ) + table$addRows(row) } # Save the applied model if requested if (options[["saveModel"]] && options[["savePath"]] != "") { diff --git a/R/mlClassificationLogistic.R b/R/mlClassificationLogistic.R new file mode 100644 index 00000000..d4827a34 --- /dev/null +++ b/R/mlClassificationLogistic.R @@ -0,0 +1,132 @@ +# +# Copyright (C) 2013-2021 University of Amsterdam +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# + +mlClassificationLogistic <- function(jaspResults, dataset, options, ...) { + + # Preparatory work + dataset <- .mlClassificationReadData(dataset, options) + .mlClassificationErrorHandling(dataset, options, type = "logistic") + + # Check if analysis is ready to run + ready <- .mlClassificationReady(options, type = "logistic") + + # Compute results and create the model summary table + .mlClassificationTableSummary(dataset, options, jaspResults, ready, position = 1, type = "logistic") + + # If the user wants to add the classes to the data set + .mlClassificationAddPredictionsToData(dataset, options, jaspResults, ready) + + # Add test set indicator to data + .mlAddTestIndicatorToData(options, jaspResults, ready, purpose = "classification") + + # Create the data split plot + .mlPlotDataSplit(dataset, options, jaspResults, ready, position = 2, purpose = "classification", type = "logistic") + + # Create the confusion table + .mlClassificationTableConfusion(dataset, options, jaspResults, ready, position = 3) + + # Create the class proportions table + .mlClassificationTableProportions(dataset, options, jaspResults, ready, position = 4) + + # Create the validation measures table + .mlClassificationTableMetrics(dataset, options, jaspResults, ready, position = 5) + +# # Create the variable importance table +# .mlTableFeatureImportance(options, jaspResults, ready, position = 6, purpose = "classification") + +# # Create the shap table +# .mlTableShap(dataset, options, jaspResults, ready, position = 7, purpose = "classification") + +# # Create the ROC curve +# .mlClassificationPlotRoc(dataset, options, jaspResults, ready, position = 8, type = "logistic") + + # Create the Andrews curves + .mlClassificationPlotAndrews(dataset, options, jaspResults, ready, position = 9) + +# # Decision boundaries +# .mlClassificationPlotBoundaries(dataset, options, jaspResults, ready, position = 10, type = "logistic") +} + +.logisticRegressionClassification <- function(dataset, options, jaspResults, ready) { + # Import model formula from jaspResults + formula <- jaspResults[["formula"]]$object + # Split the data into training and test sets + if (options[["holdoutData"]] == "testSetIndicator" && options[["testSetIndicatorVariable"]] != "") { + # Select observations according to a user-specified indicator (included when indicator = 1) + trainingIndex <- which(dataset[, options[["testSetIndicatorVariable"]]] == 0) + } else { + # Sample a percentage of the total data set + trainingIndex <- sample.int(nrow(dataset), size = ceiling((1 - options[["testDataManual"]]) * nrow(dataset))) + } + trainingSet <- dataset[trainingIndex, ] + # Create the generated test set indicator + testIndicatorColumn <- rep(1, nrow(dataset)) + testIndicatorColumn[trainingIndex] <- 0 + # Just create a train and a test set (no optimization) + testSet <- dataset[-trainingIndex, ] + if (nlevels(trainingSet[[options[["target"]]]]) == 2) { + family = "binomial" + trainingFit <- stats::glm(formula, data = trainingSet, family = family) + # Use the specified model to make predictions for dataset + testPredictions <- levels(trainingSet[[options[["target"]]]])[round(predict(trainingFit, newdata = testSet, type = "response"), 0) + 1] + dataPredictions <- levels(trainingSet[[options[["target"]]]])[round(predict(trainingFit, newdata = dataset, type = "response"), 0) + 1] + } else { + family <- "multinomial" + trainingFit <- VGAM::vglm(formula, data = trainingSet, family = family) + # Use the specified model to make predictions for dataset + testPredictions <- .mlClassificationMultinomialPredictions(trainingSet, options, predict(trainingFit, newdata = testSet)) + dataPredictions <- .mlClassificationMultinomialPredictions(trainingSet, options, predict(trainingFit, newdata = dataset)) + } + # Create results object + result <- list() + result[["formula"]] <- formula + result[["family"]] <- family + result[["model"]] <- trainingFit + result[["confTable"]] <- table("Pred" = testPredictions, "Real" = testSet[, options[["target"]]]) + result[["testAcc"]] <- sum(diag(prop.table(result[["confTable"]]))) +# result[["auc"]] <- .classificationCalcAUC(testSet, trainingSet, options, "logisticClassification") + result[["ntrain"]] <- nrow(trainingSet) + result[["ntest"]] <- nrow(testSet) + result[["testReal"]] <- testSet[, options[["target"]]] + result[["testPred"]] <- testPredictions + result[["train"]] <- trainingSet + result[["test"]] <- testSet + result[["testIndicatorColumn"]] <- testIndicatorColumn + result[["classes"]] <- dataPredictions +# result[["explainer"]] <- DALEX::explain(result[["model"]], type = "classification", data = result[["train"]], y = result[["train"]][, options[["target"]]], predict_function = function(model, data) predict(model, newdata = data, type = "raw")) +# if (nlevels(result[["testReal"]]) == 2) { +# result[["explainer_fi"]] <- DALEX::explain(result[["model"]], type = "classification", data = result[["train"]], y = as.numeric(result[["train"]][, options[["target"]]]) - 1, predict_function = function(model, data) predict(model, newdata = data, type = "class")) +# } else { +# result[["explainer_fi"]] <- DALEX::explain(result[["model"]], type = "multiclass", data = result[["train"]], y = result[["train"]][, options[["target"]]] , predict_function = function(model, data) predict(model, newdata = data, type = "raw")) +# } + return(result) +} + +.mlClassificationMultinomialPredictions <- function(trainingSet, options, predictions) { + num_categories <- ncol(predictions) + 1 + probs <- matrix(0, nrow = nrow(predictions), ncol = num_categories) + for (i in 1:(num_categories - 1)) { + probs[, i] <- exp(predictions[, i]) + } + probs[, num_categories] <- 1 + row_sums <- rowSums(probs) + probs <- probs / row_sums + predicted_category <- apply(probs, 1, which.max) + categories <- levels(trainingSet[[options[["target"]]]]) + predicted_categories <- categories[predicted_category] + return(predicted_categories) +} diff --git a/inst/Description.qml b/inst/Description.qml index 52cea7c1..bb65c4aa 100644 --- a/inst/Description.qml +++ b/inst/Description.qml @@ -98,6 +98,12 @@ Description func: "mlClassificationLda" } Analysis + { + menu: qsTr("Logistic / Multinomial") + title: qsTr("Logistic / Multinomial Classification") + func: "mlClassificationLogistic" + } + Analysis { menu: qsTr("Naive Bayes") title: qsTr("Naive Bayes Classification") diff --git a/inst/qml/mlClassificationLogistic.qml b/inst/qml/mlClassificationLogistic.qml new file mode 100644 index 00000000..97d01d76 --- /dev/null +++ b/inst/qml/mlClassificationLogistic.qml @@ -0,0 +1,82 @@ +// +// Copyright (C) 2013-2021 University of Amsterdam +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as +// published by the Free Software Foundation, either version 3 of the +// License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public +// License along with this program. If not, see +// . +// + +import QtQuick 2.8 +import QtQuick.Layouts 1.3 +import JASP.Controls 1.0 +import JASP.Widgets 1.0 + +import "./common/ui" as UI +import "./common/tables" as TAB +import "./common/figures" as FIG + +Form +{ + info: qsTr("Logistic regression.") + + UI.VariablesFormClassification { id: vars } + + Group + { + title: qsTr("Tables") + + TAB.ConfusionMatrix { } + TAB.ClassProportions { } + TAB.ModelPerformance { } + TAB.FeatureImportance { } + TAB.ExplainPredictions { } + } + + Group + { + title: qsTr("Plots") + + FIG.DataSplit { } + FIG.RocCurve { } + FIG.AndrewsCurve { } + FIG.DecisionBoundary { } + } + + UI.ExportResults { enabled: vars.predictorCount > 0 && vars.targetCount > 0 } + UI.DataSplit { trainingValidationSplit: false } + + Section + { + title: qsTr("Training Parameters") + + Group + { + title: qsTr("Algorithmic Settings") + + UI.ScaleVariables { } + UI.SetSeed { } + } + + RadioButtonGroup + { + name: "modelOptimization" + visible: false + + RadioButton + { + name: "manual" + checked: true + } + } + } +} diff --git a/tests/testthat/test-mlclassificationlogistic.R b/tests/testthat/test-mlclassificationlogistic.R new file mode 100644 index 00000000..72440090 --- /dev/null +++ b/tests/testthat/test-mlclassificationlogistic.R @@ -0,0 +1,66 @@ +context("Machine Learning Logistic Regression Classification") + +# Test fixed model ############################################################# +options <- initMlOptions("mlClassificationLogistic") +options$addIndicator <- FALSE +options$addPredictions <- FALSE +options$classProportionsTable <- TRUE +options$holdoutData <- "holdoutManual" +options$modelOptimization <- "manual" +options$modelValid <- "validationManual" +options$predictionsColumn <- "" +options$predictors <- c("Sepal.Length", "Sepal.Width", "Petal.Length", "Petal.Width") +options$predictors.types <- rep("scale", 4) +options$saveModel <- FALSE +options$savePath <- "" +options$setSeed <- TRUE +options$target <- "Species" +options$target.types <- "nominal" +options$testDataManual <- 0.2 +options$testIndicatorColumn <- "" +options$testSetIndicatorVariable <- "" +options$validationDataManual <- 0.2 +options$validationMeasures <- TRUE +options$tableShap <- TRUE +options$fromIndex <- 1 +options$toIndex <- 5 +options$featureImportanceTable <- TRUE +set.seed(1) +results <- jaspTools::runAnalysis("mlClassificationLogistic", "iris.csv", options) + +test_that("Class Proportions table results match", { + table <- results[["results"]][["classProportionsTable"]][["data"]] + jaspTools::expect_equal_tables(table, + list(0.333333333333333, "setosa", 0.333333333333333, 0.333333333333333, + 0.333333333333333, "versicolor", 0.266666666666667, 0.35, 0.333333333333333, + "virginica", 0.4, 0.316666666666667)) +}) + +test_that("Model Summary: Logistic / Multinomial Regression table results match", { + table <- results[["results"]][["classificationTable"]][["data"]] + jaspTools::expect_equal_tables(table, + list("multinomial", 30, 120, 1)) +}) + +test_that("Confusion Matrix table results match", { + table <- results[["results"]][["confusionTable"]][["data"]] + jaspTools::expect_equal_tables(table, + list("Observed", "setosa", 10, 0, 0, "", "versicolor", 0, 8, 0, "", + "virginica", 0, 0, 12)) +}) + +test_that("Data Split plot matches", { + plotName <- results[["results"]][["plotDataSplit"]][["data"]] + testPlot <- results[["state"]][["figures"]][[plotName]][["obj"]] + jaspTools::expect_equal_plots(testPlot, "data-split") +}) + +test_that("Model Performance Metrics table results match", { + table <- results[["results"]][["validationMeasures"]][["data"]] + jaspTools::expect_equal_tables(table, + list(1, "", 1, 0, 0, 0, 0, "setosa", 1, 1, 1, 1, 0.333333333333333, + 10, 1, "", 1, 1, 0, 0, 0, 0, "versicolor", 1, 1, 1, + 1, 0.266666666666667, 8, 1, "", 1, 1, 0, 0, 0, 0, "virginica", + 1, 1, 1, 1, 0.4, 12, 1, "", 1, 1, 0, 0, 0, 0, "Average / Total", + 1, 1, 1, 1, 1, 30, 1, "")) +}) \ No newline at end of file From ac4d14ec328e4a4b79b5160411de4f01c40eb65b Mon Sep 17 00:00:00 2001 From: Koen Derks Date: Mon, 9 Sep 2024 23:10:44 +0200 Subject: [PATCH 02/30] Update mlClassificationLogistic.R --- R/mlClassificationLogistic.R | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/R/mlClassificationLogistic.R b/R/mlClassificationLogistic.R index d4827a34..528eaa11 100644 --- a/R/mlClassificationLogistic.R +++ b/R/mlClassificationLogistic.R @@ -82,8 +82,8 @@ mlClassificationLogistic <- function(jaspResults, dataset, options, ...) { family = "binomial" trainingFit <- stats::glm(formula, data = trainingSet, family = family) # Use the specified model to make predictions for dataset - testPredictions <- levels(trainingSet[[options[["target"]]]])[round(predict(trainingFit, newdata = testSet, type = "response"), 0) + 1] - dataPredictions <- levels(trainingSet[[options[["target"]]]])[round(predict(trainingFit, newdata = dataset, type = "response"), 0) + 1] + testPredictions <- .mlClassificationLogisticPredictions(trainingSet, options, predict(trainingFit, newdata = testSet, type = "response")) + dataPredictions <- .mlClassificationLogisticPredictions(trainingSet, options, predict(trainingFit, newdata = dataset, type = "response")) } else { family <- "multinomial" trainingFit <- VGAM::vglm(formula, data = trainingSet, family = family) @@ -116,17 +116,23 @@ mlClassificationLogistic <- function(jaspResults, dataset, options, ...) { return(result) } -.mlClassificationMultinomialPredictions <- function(trainingSet, options, predictions) { - num_categories <- ncol(predictions) + 1 - probs <- matrix(0, nrow = nrow(predictions), ncol = num_categories) - for (i in 1:(num_categories - 1)) { - probs[, i] <- exp(predictions[, i]) +.mlClassificationLogisticPredictions <- function(trainingSet, options, probabilities) { + categories <- levels(trainingSet[[options[["target"]]]]) + predicted_categories <- categories[round(probabilities, 0) + 1] + return(predicted_categories) +} + +.mlClassificationMultinomialPredictions <- function(trainingSet, options, logodds) { + ncategories <- ncol(logodds) + 1 + probabilities <- matrix(0, nrow = nrow(logodds), ncol = ncategories) + for (i in seq_len(ncategories - 1)) { + probabilities[, i] <- exp(logodds[, i]) } - probs[, num_categories] <- 1 - row_sums <- rowSums(probs) - probs <- probs / row_sums - predicted_category <- apply(probs, 1, which.max) + probabilities[, ncategories] <- 1 + row_sums <- rowSums(probabilities) + probabilities <- probabilities / row_sums + predicted_columns <- apply(probabilities, 1, which.max) categories <- levels(trainingSet[[options[["target"]]]]) - predicted_categories <- categories[predicted_category] + predicted_categories <- categories[predicted_columns] return(predicted_categories) } From 7c86ce269e52c5abf73ee8f6ac6773b36bb52173 Mon Sep 17 00:00:00 2001 From: Koen Derks Date: Tue, 10 Sep 2024 16:57:02 +0200 Subject: [PATCH 03/30] Add intercept option --- R/commonMachineLearningClassification.R | 3 ++- R/mlClassificationLogistic.R | 6 ++++++ inst/qml/mlClassificationLogistic.qml | 2 ++ tests/testthat/helper-ml.R | 2 +- 4 files changed, 11 insertions(+), 2 deletions(-) diff --git a/R/commonMachineLearningClassification.R b/R/commonMachineLearningClassification.R index 5364ef05..c8eaff71 100644 --- a/R/commonMachineLearningClassification.R +++ b/R/commonMachineLearningClassification.R @@ -32,7 +32,8 @@ "mutationMethod", "survivalMethod", "elitismProportion", "candidates", # Neural network "noOfTrees", "maxTrees", "baggingFraction", "noOfPredictors", "numberOfPredictors", # Random forest "complexityParameter", "degree", "gamma", "cost", "tolerance", "epsilon", "maxCost", # Support vector machine - "smoothingParameter" # Naive Bayes + "smoothingParameter", # Naive Bayes + "intercept" # Logistic ) if (includeSaveOptions) { opt <- c(opt, "saveModel", "savePath") diff --git a/R/mlClassificationLogistic.R b/R/mlClassificationLogistic.R index 528eaa11..e05e7092 100644 --- a/R/mlClassificationLogistic.R +++ b/R/mlClassificationLogistic.R @@ -78,6 +78,12 @@ mlClassificationLogistic <- function(jaspResults, dataset, options, ...) { testIndicatorColumn[trainingIndex] <- 0 # Just create a train and a test set (no optimization) testSet <- dataset[-trainingIndex, ] + # Create the formula + if (options[["intercept"]]) { + formula <- formula(paste(options[["target"]], "~ 1 + ", paste(options[["predictors"]], collapse = " + "))) + } else { + formula <- formula(paste(options[["target"]], "~ 0 + ", paste(options[["predictors"]], collapse = " + "))) + } if (nlevels(trainingSet[[options[["target"]]]]) == 2) { family = "binomial" trainingFit <- stats::glm(formula, data = trainingSet, family = family) diff --git a/inst/qml/mlClassificationLogistic.qml b/inst/qml/mlClassificationLogistic.qml index 97d01d76..ee11b775 100644 --- a/inst/qml/mlClassificationLogistic.qml +++ b/inst/qml/mlClassificationLogistic.qml @@ -24,6 +24,7 @@ import JASP.Widgets 1.0 import "./common/ui" as UI import "./common/tables" as TAB import "./common/figures" as FIG +import "./common/analyses/regularized" as REGU Form { @@ -63,6 +64,7 @@ Form { title: qsTr("Algorithmic Settings") + REGU.Intercept { } UI.ScaleVariables { } UI.SetSeed { } } diff --git a/tests/testthat/helper-ml.R b/tests/testthat/helper-ml.R index 04b8528f..4a21f66b 100644 --- a/tests/testthat/helper-ml.R +++ b/tests/testthat/helper-ml.R @@ -27,7 +27,7 @@ mlOptions <- function(analysis) { files <- c(files, list.files(testthat::test_path("..", "..", "inst", "qml", "common", "analyses", "randomforest"), full.names = TRUE)) } else if (analysis %in% c("mlClassificationSvm", "mlRegressionSvm")) { files <- c(files, list.files(testthat::test_path("..", "..", "inst", "qml", "common", "analyses", "svm"), full.names = TRUE)) - } else if (analysis %in% c("mlRegressionLinear", "mlRegressionRegularized")) { + } else if (analysis %in% c("mlClassificationLogistic", "mlRegressionLinear", "mlRegressionRegularized")) { files <- c(files, list.files(testthat::test_path("..", "..", "inst", "qml", "common", "analyses", "regularized"), full.names = TRUE)) } options <- lapply(files, jaspTools:::readQML) |> From 1818d85371dd78de48b7c1e8b6e64217428d6bab Mon Sep 17 00:00:00 2001 From: Koen Derks Date: Tue, 10 Sep 2024 17:50:56 +0200 Subject: [PATCH 04/30] Add coefficients table for logistic --- R/mlClassificationLogistic.R | 71 +++++++++++++++++++++++++-- inst/qml/mlClassificationLogistic.qml | 1 + 2 files changed, 68 insertions(+), 4 deletions(-) diff --git a/R/mlClassificationLogistic.R b/R/mlClassificationLogistic.R index e05e7092..00b6e505 100644 --- a/R/mlClassificationLogistic.R +++ b/R/mlClassificationLogistic.R @@ -51,14 +51,16 @@ mlClassificationLogistic <- function(jaspResults, dataset, options, ...) { # # Create the shap table # .mlTableShap(dataset, options, jaspResults, ready, position = 7, purpose = "classification") + .mlClassificationLogisticTableCoef(options, jaspResults, ready, position = 8) + # # Create the ROC curve -# .mlClassificationPlotRoc(dataset, options, jaspResults, ready, position = 8, type = "logistic") +# .mlClassificationPlotRoc(dataset, options, jaspResults, ready, position = 10, type = "logistic") # position + 1 for regression equation # Create the Andrews curves - .mlClassificationPlotAndrews(dataset, options, jaspResults, ready, position = 9) + .mlClassificationPlotAndrews(dataset, options, jaspResults, ready, position = 11) # # Decision boundaries -# .mlClassificationPlotBoundaries(dataset, options, jaspResults, ready, position = 10, type = "logistic") +# .mlClassificationPlotBoundaries(dataset, options, jaspResults, ready, position = 12, type = "logistic") } .logisticRegressionClassification <- function(dataset, options, jaspResults, ready) { @@ -86,7 +88,7 @@ mlClassificationLogistic <- function(jaspResults, dataset, options, ...) { } if (nlevels(trainingSet[[options[["target"]]]]) == 2) { family = "binomial" - trainingFit <- stats::glm(formula, data = trainingSet, family = family) + trainingFit <- glm(formula, data = trainingSet, family = family) # Use the specified model to make predictions for dataset testPredictions <- .mlClassificationLogisticPredictions(trainingSet, options, predict(trainingFit, newdata = testSet, type = "response")) dataPredictions <- .mlClassificationLogisticPredictions(trainingSet, options, predict(trainingFit, newdata = dataset, type = "response")) @@ -122,6 +124,67 @@ mlClassificationLogistic <- function(jaspResults, dataset, options, ...) { return(result) } +.mlClassificationLogisticTableCoef <- function(options, jaspResults, ready, position) { + if (!is.null(jaspResults[["coefTable"]]) || !options[["coefTable"]]) { + return() + } + table <- createJaspTable(gettext("Regression Coefficients")) + table$position <- position + table$dependOn(options = c("coefTable", "coefTableConfInt", "coefTableConfIntLevel", "formula", .mlClassificationDependencies())) + table$addColumnInfo(name = "var", title = "", type = "string") + table$addColumnInfo(name = "coefs", title = gettextf("Coefficient (%s)", "\u03B2"), type = "number") + table$addColumnInfo(name = "se", title = gettext("Standard Error"), type = "number") + table$addColumnInfo(name = "t", title = gettext("t"), type = "number") + table$addColumnInfo(name = "p", title = gettext("p"), type = "pvalue") + if (options[["coefTableConfInt"]]) { + overtitle <- gettextf("%1$s%% Confidence interval", round(options[["coefTableConfIntLevel"]] * 100, 3)) + table$addColumnInfo(name = "lower", title = gettext("Lower"), type = "number", overtitle = overtitle) + table$addColumnInfo(name = "upper", title = gettext("Upper"), type = "number", overtitle = overtitle) + } + if (options[["scaleVariables"]]) { + table$addFootnote(gettext("The regression coefficients for numeric features are standardized.")) + } else { + table$addFootnote(gettext("The regression coefficients are unstandardized.")) + } + jaspResults[["coefTable"]] <- table + if (!ready) { + if (options[["target"]] == "" && length(unlist(options[["predictors"]])) > 0) { + table[["var"]] <- c(if (options[["intercept"]]) "(Intercept)" else NULL, options[["predictors"]]) + } + return() + } + classificationResult <- jaspResults[["classificationResult"]]$object + model <- classificationResult[["model"]] + coefs <- summary(model)$coefficients + conf_int <- confint(model, level = options[["coefTableConfIntLevel"]]) + coefs <- cbind(coefs, lower = conf_int[, 1], upper = conf_int[, 2]) + table[["var"]] <- rownames(coefs) + table[["coefs"]] <- as.numeric(coefs[, 1]) + table[["se"]] <- as.numeric(coefs[, 2]) + table[["t"]] <- as.numeric(coefs[, 3]) + table[["p"]] <- as.numeric(coefs[, 4]) + if (options[["coefTableConfInt"]]) { + table[["lower"]] <- coefs[, "lower"] + table[["upper"]] <- coefs[, "upper"] + } + if (options[["formula"]]) { + if (options[["intercept"]]) { + regform <- paste0("logit(", options[["target"]], ") = ", round(as.numeric(coefs[, 1])[1], 3)) + start <- 2 + } else { + regform <- paste0("logit(", options[["target"]], ") = ") + start <- 1 + } + for (i in start:nrow(coefs)) { + regform <- paste0(regform, if (round(as.numeric(coefs[, 1])[i], 3) < 0) " - " else " + ", abs(round(as.numeric(coefs[, 1])[i], 3)), " x ", rownames(coefs)[i]) + } + formula <- createJaspHtml(gettextf("Regression equation:\n%1$s", regform), "p") + formula$position <- position + 1 + formula$dependOn(options = c("coefTable", "formula"), optionsFromObject = jaspResults[["classificationResult"]]) + jaspResults[["regressionFormula"]] <- formula + } +} + .mlClassificationLogisticPredictions <- function(trainingSet, options, probabilities) { categories <- levels(trainingSet[[options[["target"]]]]) predicted_categories <- categories[round(probabilities, 0) + 1] diff --git a/inst/qml/mlClassificationLogistic.qml b/inst/qml/mlClassificationLogistic.qml index ee11b775..8ad33a4a 100644 --- a/inst/qml/mlClassificationLogistic.qml +++ b/inst/qml/mlClassificationLogistic.qml @@ -41,6 +41,7 @@ Form TAB.ModelPerformance { } TAB.FeatureImportance { } TAB.ExplainPredictions { } + REGU.CoefficientTable { confint: true } } Group From 140e5db32d72923b72c4c2acefcd9557ae0c395b Mon Sep 17 00:00:00 2001 From: Koen Derks Date: Tue, 10 Sep 2024 18:34:45 +0200 Subject: [PATCH 05/30] Update --- R/commonMachineLearningClassification.R | 7 ++++++- R/mlClassificationLogistic.R | 5 +++-- inst/Description.qml | 2 +- 3 files changed, 10 insertions(+), 4 deletions(-) diff --git a/R/commonMachineLearningClassification.R b/R/commonMachineLearningClassification.R index c8eaff71..48b24c41 100644 --- a/R/commonMachineLearningClassification.R +++ b/R/commonMachineLearningClassification.R @@ -119,7 +119,7 @@ "rpart" = gettext("Decision Tree Classification"), "svm" = gettext("Support Vector Machine Classification"), "naivebayes" = gettext("Naive Bayes Classification"), - "logistic" = gettext("Logistic / Multinomial Regression") + "logistic" = gettext("Logistic / Multinomial Regression Classification") ) tableTitle <- gettextf("Model Summary: %1$s", title) table <- createJaspTable(tableTitle) @@ -318,6 +318,11 @@ ) table$addRows(row) } else if (type == "logistic") { + if (classificationResult[["family"]] == "binomial") { + table$title <- gettext("Model Summary: Logistic Regression Classification") + } else { + table$title <- gettext("Model Summary: Multinomial Regression Classification") + } row <- data.frame( family = classificationResult[["family"]], nTrain = nTrain, diff --git a/R/mlClassificationLogistic.R b/R/mlClassificationLogistic.R index 00b6e505..9b073f19 100644 --- a/R/mlClassificationLogistic.R +++ b/R/mlClassificationLogistic.R @@ -168,11 +168,12 @@ mlClassificationLogistic <- function(jaspResults, dataset, options, ...) { table[["upper"]] <- coefs[, "upper"] } if (options[["formula"]]) { + one_cat <- levels(factor(classificationResult[["train"]][[options[["target"]]]]))[2] if (options[["intercept"]]) { - regform <- paste0("logit(", options[["target"]], ") = ", round(as.numeric(coefs[, 1])[1], 3)) + regform <- paste0("logit(p", options[["target"]], " = ", one_cat, ") = ", round(as.numeric(coefs[, 1])[1], 3)) start <- 2 } else { - regform <- paste0("logit(", options[["target"]], ") = ") + regform <- paste0("logit(p", options[["target"]], " = ", one_cat, ") = ") start <- 1 } for (i in start:nrow(coefs)) { diff --git a/inst/Description.qml b/inst/Description.qml index bb65c4aa..aed78219 100644 --- a/inst/Description.qml +++ b/inst/Description.qml @@ -100,7 +100,7 @@ Description Analysis { menu: qsTr("Logistic / Multinomial") - title: qsTr("Logistic / Multinomial Classification") + title: qsTr("Logistic / Multinomial Regression Classification") func: "mlClassificationLogistic" } Analysis From 79e2fe59df935fb1f614691d35497841695c6815 Mon Sep 17 00:00:00 2001 From: Koen Derks Date: Tue, 10 Sep 2024 20:19:08 +0200 Subject: [PATCH 06/30] Change file names --- NAMESPACE | 2 +- R/commonMachineLearningClassification.R | 2 +- ...cationLogistic.R => mlClassificationLogisticMultinomial.R} | 0 inst/Description.qml | 2 +- ...onLogistic.qml => mlClassificationLogisticMultinomial.qml} | 0 ...nlogistic.R => test-mlclassificationlogisticmultinomial.R} | 4 ++-- 6 files changed, 5 insertions(+), 5 deletions(-) rename R/{mlClassificationLogistic.R => mlClassificationLogisticMultinomial.R} (100%) rename inst/qml/{mlClassificationLogistic.qml => mlClassificationLogisticMultinomial.qml} (100%) rename tests/testthat/{test-mlclassificationlogistic.R => test-mlclassificationlogisticmultinomial.R} (94%) diff --git a/NAMESPACE b/NAMESPACE index 44b04af0..7efc6821 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -51,7 +51,7 @@ export(mlClassificationBoosting) export(mlClassificationDecisionTree) export(mlClassificationKnn) export(mlClassificationLda) -export(mlClassificationLogistic) +export(mlClassificationLogisticMultinomial) export(mlClassificationNaiveBayes) export(mlClassificationNeuralNetwork) export(mlClassificationRandomForest) diff --git a/R/commonMachineLearningClassification.R b/R/commonMachineLearningClassification.R index 48b24c41..29799a32 100644 --- a/R/commonMachineLearningClassification.R +++ b/R/commonMachineLearningClassification.R @@ -95,7 +95,7 @@ "rpart" = .decisionTreeClassification(dataset, options, jaspResults), "svm" = .svmClassification(dataset, options, jaspResults), "naivebayes" = .naiveBayesClassification(dataset, options, jaspResults), - "logistic" = .logisticRegressionClassification(dataset, options, jaspResults) + "logistic" = .logisticMultinomialClassification(dataset, options, jaspResults) ) }) if (isTryError(p)) { # Fail gracefully diff --git a/R/mlClassificationLogistic.R b/R/mlClassificationLogisticMultinomial.R similarity index 100% rename from R/mlClassificationLogistic.R rename to R/mlClassificationLogisticMultinomial.R diff --git a/inst/Description.qml b/inst/Description.qml index aed78219..72422c39 100644 --- a/inst/Description.qml +++ b/inst/Description.qml @@ -101,7 +101,7 @@ Description { menu: qsTr("Logistic / Multinomial") title: qsTr("Logistic / Multinomial Regression Classification") - func: "mlClassificationLogistic" + func: "mlClassificationLogisticMultinomial" } Analysis { diff --git a/inst/qml/mlClassificationLogistic.qml b/inst/qml/mlClassificationLogisticMultinomial.qml similarity index 100% rename from inst/qml/mlClassificationLogistic.qml rename to inst/qml/mlClassificationLogisticMultinomial.qml diff --git a/tests/testthat/test-mlclassificationlogistic.R b/tests/testthat/test-mlclassificationlogisticmultinomial.R similarity index 94% rename from tests/testthat/test-mlclassificationlogistic.R rename to tests/testthat/test-mlclassificationlogisticmultinomial.R index 72440090..5193e819 100644 --- a/tests/testthat/test-mlclassificationlogistic.R +++ b/tests/testthat/test-mlclassificationlogisticmultinomial.R @@ -1,7 +1,7 @@ context("Machine Learning Logistic Regression Classification") # Test fixed model ############################################################# -options <- initMlOptions("mlClassificationLogistic") +options <- initMlOptions("mlClassificationLogisticMultinomial") options$addIndicator <- FALSE options$addPredictions <- FALSE options$classProportionsTable <- TRUE @@ -26,7 +26,7 @@ options$fromIndex <- 1 options$toIndex <- 5 options$featureImportanceTable <- TRUE set.seed(1) -results <- jaspTools::runAnalysis("mlClassificationLogistic", "iris.csv", options) +results <- jaspTools::runAnalysis("mlClassificationLogisticMultinomial", "iris.csv", options) test_that("Class Proportions table results match", { table <- results[["results"]][["classProportionsTable"]][["data"]] From 775b9485ad58240a9d3e6b431477917da987034a Mon Sep 17 00:00:00 2001 From: Koen Derks Date: Tue, 10 Sep 2024 20:19:19 +0200 Subject: [PATCH 07/30] Update mlClassificationLogisticMultinomial.R --- R/mlClassificationLogisticMultinomial.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/mlClassificationLogisticMultinomial.R b/R/mlClassificationLogisticMultinomial.R index 9b073f19..73f21fb9 100644 --- a/R/mlClassificationLogisticMultinomial.R +++ b/R/mlClassificationLogisticMultinomial.R @@ -15,7 +15,7 @@ # along with this program. If not, see . # -mlClassificationLogistic <- function(jaspResults, dataset, options, ...) { +mlClassificationLogisticMultinomial <- function(jaspResults, dataset, options, ...) { # Preparatory work dataset <- .mlClassificationReadData(dataset, options) @@ -63,7 +63,7 @@ mlClassificationLogistic <- function(jaspResults, dataset, options, ...) { # .mlClassificationPlotBoundaries(dataset, options, jaspResults, ready, position = 12, type = "logistic") } -.logisticRegressionClassification <- function(dataset, options, jaspResults, ready) { +.logisticMultinomialClassification <- function(dataset, options, jaspResults, ready) { # Import model formula from jaspResults formula <- jaspResults[["formula"]]$object # Split the data into training and test sets From 4dc4a3851428b59f992d426e9fb112cecb4651ed Mon Sep 17 00:00:00 2001 From: Koen Derks Date: Tue, 10 Sep 2024 21:12:28 +0200 Subject: [PATCH 08/30] Update coef table --- R/mlClassificationLogisticMultinomial.R | 56 ++++++++++++++++--- tests/testthat/helper-ml.R | 2 +- ...test-mlclassificationlogisticmultinomial.R | 2 +- 3 files changed, 50 insertions(+), 10 deletions(-) diff --git a/R/mlClassificationLogisticMultinomial.R b/R/mlClassificationLogisticMultinomial.R index 73f21fb9..b1f39784 100644 --- a/R/mlClassificationLogisticMultinomial.R +++ b/R/mlClassificationLogisticMultinomial.R @@ -139,7 +139,7 @@ mlClassificationLogisticMultinomial <- function(jaspResults, dataset, options, . if (options[["coefTableConfInt"]]) { overtitle <- gettextf("%1$s%% Confidence interval", round(options[["coefTableConfIntLevel"]] * 100, 3)) table$addColumnInfo(name = "lower", title = gettext("Lower"), type = "number", overtitle = overtitle) - table$addColumnInfo(name = "upper", title = gettext("Upper"), type = "number", overtitle = overtitle) + table$addColumnInfo(name = "upper", title = gettext("Upper"), type = "number", overtitle = overtitle) } if (options[["scaleVariables"]]) { table$addFootnote(gettext("The regression coefficients for numeric features are standardized.")) @@ -155,14 +155,54 @@ mlClassificationLogisticMultinomial <- function(jaspResults, dataset, options, . } classificationResult <- jaspResults[["classificationResult"]]$object model <- classificationResult[["model"]] - coefs <- summary(model)$coefficients - conf_int <- confint(model, level = options[["coefTableConfIntLevel"]]) - coefs <- cbind(coefs, lower = conf_int[, 1], upper = conf_int[, 2]) + if (classificationResult[["family"]] == "binomial") { + coefs <- summary(model)$coefficients + conf_int <- confint(model, level = options[["coefTableConfIntLevel"]]) + coefs <- cbind(coefs, lower = conf_int[, 1], upper = conf_int[, 2]) + colnames(coefs) <- c("est", "se", "t", "p", "lower", "upper") + vars <- rownames(coefs) + for (i in seq_along(vars)) { + if (!(vars[i] %in% options[["predictors"]]) && vars[i] != "(Intercept)") { + for (j in options[["predictors"]]) { + vars[i] <- gsub(pattern = j, replacement = paste0(j, " ("), x = vars[i]) + } + vars[i] <- paste0(vars[i], ")") + } + } + rownames(coefs) <- vars + } else { + coefs <- cbind(model@coefficients, confint(model, level = options[["coefTableConfIntLevel"]])) + colnames(coefs) <- c("est", "lower", "upper") + vars <- rownames(coefs) + for (i in seq_along(vars)) { + for (j in c("(Intercept)", options[["predictors"]])) { + if (!grepl(j, vars[i])) { + next + } + splitvar <- strsplit(vars[i], split = ":")[[1]] + if (grepl(paste0(j, "[A-Za-z]+:"), vars[i])) { + repl_part1 <- paste0(gsub(pattern = j, replacement = paste0(j, " ("), x = splitvar[1]), ")") + } else { + repl_part1 <- j + } + repl_part2 <- levels(factor(classificationResult[["train"]][[options[["target"]]]]))[as.numeric(splitvar[2])] + vars[i] <- paste0(repl_part1, " : ", repl_part2) + } + } + rownames(coefs) <- vars + } table[["var"]] <- rownames(coefs) - table[["coefs"]] <- as.numeric(coefs[, 1]) - table[["se"]] <- as.numeric(coefs[, 2]) - table[["t"]] <- as.numeric(coefs[, 3]) - table[["p"]] <- as.numeric(coefs[, 4]) + table[["coefs"]] <- as.numeric(coefs[, "est"]) + if (classificationResult[["family"]] == "binomial") { + table[["se"]] <- as.numeric(coefs[, "se"]) + table[["t"]] <- as.numeric(coefs[, "t"]) + table[["p"]] <- as.numeric(coefs[, "p"]) + } else { + table[["se"]] <- rep(".", nrow(coefs)) + table[["t"]] <- rep(".", nrow(coefs)) + table[["p"]] <- rep(".", nrow(coefs)) + table$addFootnote(gettext("Standard errors, t-values and p-values are not available for multinomial regression coefficients.")) + } if (options[["coefTableConfInt"]]) { table[["lower"]] <- coefs[, "lower"] table[["upper"]] <- coefs[, "upper"] diff --git a/tests/testthat/helper-ml.R b/tests/testthat/helper-ml.R index 4a21f66b..0c82a34a 100644 --- a/tests/testthat/helper-ml.R +++ b/tests/testthat/helper-ml.R @@ -27,7 +27,7 @@ mlOptions <- function(analysis) { files <- c(files, list.files(testthat::test_path("..", "..", "inst", "qml", "common", "analyses", "randomforest"), full.names = TRUE)) } else if (analysis %in% c("mlClassificationSvm", "mlRegressionSvm")) { files <- c(files, list.files(testthat::test_path("..", "..", "inst", "qml", "common", "analyses", "svm"), full.names = TRUE)) - } else if (analysis %in% c("mlClassificationLogistic", "mlRegressionLinear", "mlRegressionRegularized")) { + } else if (analysis %in% c("mlClassificationLogisticMultinomial", "mlRegressionLinear", "mlRegressionRegularized")) { files <- c(files, list.files(testthat::test_path("..", "..", "inst", "qml", "common", "analyses", "regularized"), full.names = TRUE)) } options <- lapply(files, jaspTools:::readQML) |> diff --git a/tests/testthat/test-mlclassificationlogisticmultinomial.R b/tests/testthat/test-mlclassificationlogisticmultinomial.R index 5193e819..c41948c8 100644 --- a/tests/testthat/test-mlclassificationlogisticmultinomial.R +++ b/tests/testthat/test-mlclassificationlogisticmultinomial.R @@ -1,4 +1,4 @@ -context("Machine Learning Logistic Regression Classification") +context("Machine Learning Logistic / Multinomial Regression Classification") # Test fixed model ############################################################# options <- initMlOptions("mlClassificationLogisticMultinomial") From 50a04d341f1c86894d57d14e714a3a407a0f9dbc Mon Sep 17 00:00:00 2001 From: Koen Derks Date: Tue, 10 Sep 2024 21:24:29 +0200 Subject: [PATCH 09/30] Make AUC work --- R/commonMachineLearningClassification.R | 21 +++++++++++++++++++++ R/mlClassificationLogisticMultinomial.R | 4 ++-- 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/R/commonMachineLearningClassification.R b/R/commonMachineLearningClassification.R index 29799a32..0500f642 100644 --- a/R/commonMachineLearningClassification.R +++ b/R/commonMachineLearningClassification.R @@ -1138,3 +1138,24 @@ score <- max.col(predict(fit, test, type = "raw")) return(score) } + +.calcAUCScore.logisticClassification <- function(AUCformula, test, typeData, options, jaspResults, ...) { + fit <- glm(AUCformula, data = typeData, family = "binomial") + score <- round(predict(fit, test, type = "response"), 0) + return(score) +} + +.calcAUCScore.multinomialClassification <- function(AUCformula, test, typeData, options, jaspResults, ...) { + fit <- VGAM::vglm(AUCformula, data = typeData, family = "multinomial") + logodds <- as.data.frame(predict(fit, test)) + ncategories <- ncol(logodds) + 1 + probabilities <- matrix(0, nrow = nrow(logodds), ncol = ncategories) + for (i in seq_len(ncategories - 1)) { + probabilities[, i] <- exp(logodds[, i]) + } + probabilities[, ncategories] <- 1 + row_sums <- rowSums(probabilities) + probabilities <- probabilities / row_sums + score <- apply(probabilities, 1, which.max) + return(score) +} diff --git a/R/mlClassificationLogisticMultinomial.R b/R/mlClassificationLogisticMultinomial.R index b1f39784..ce3bad7f 100644 --- a/R/mlClassificationLogisticMultinomial.R +++ b/R/mlClassificationLogisticMultinomial.R @@ -106,7 +106,7 @@ mlClassificationLogisticMultinomial <- function(jaspResults, dataset, options, . result[["model"]] <- trainingFit result[["confTable"]] <- table("Pred" = testPredictions, "Real" = testSet[, options[["target"]]]) result[["testAcc"]] <- sum(diag(prop.table(result[["confTable"]]))) -# result[["auc"]] <- .classificationCalcAUC(testSet, trainingSet, options, "logisticClassification") + result[["auc"]] <- .classificationCalcAUC(testSet, trainingSet, options, if (family == "binomial") "logisticClassification" else "multinomialClassification") result[["ntrain"]] <- nrow(trainingSet) result[["ntest"]] <- nrow(testSet) result[["testReal"]] <- testSet[, options[["target"]]] @@ -207,7 +207,7 @@ mlClassificationLogisticMultinomial <- function(jaspResults, dataset, options, . table[["lower"]] <- coefs[, "lower"] table[["upper"]] <- coefs[, "upper"] } - if (options[["formula"]]) { + if (options[["formula"]]) { # TODO FOR MULTINOMIAL one_cat <- levels(factor(classificationResult[["train"]][[options[["target"]]]]))[2] if (options[["intercept"]]) { regform <- paste0("logit(p", options[["target"]], " = ", one_cat, ") = ", round(as.numeric(coefs[, 1])[1], 3)) From 5c3599ecf25207bab60b5079a5fea813bea11564 Mon Sep 17 00:00:00 2001 From: Koen Derks Date: Tue, 10 Sep 2024 21:38:05 +0200 Subject: [PATCH 10/30] Make roc plot work --- R/commonMachineLearningClassification.R | 18 ++------ R/mlClassificationLogisticMultinomial.R | 6 +-- .../data-split.svg | 42 +++++++++++++++++++ ...test-mlclassificationlogisticmultinomial.R | 13 +++--- 4 files changed, 55 insertions(+), 24 deletions(-) create mode 100644 tests/testthat/_snaps/mlclassificationlogisticmultinomial/data-split.svg diff --git a/R/commonMachineLearningClassification.R b/R/commonMachineLearningClassification.R index 0500f642..1812235e 100644 --- a/R/commonMachineLearningClassification.R +++ b/R/commonMachineLearningClassification.R @@ -721,6 +721,9 @@ } else if (type == "naivebayes") { fit <- e1071::naiveBayes(formula = formula, data = typeData, laplace = options[["smoothingParameter"]]) score <- max.col(predict(fit, test, type = "raw")) + } else if (type == "logistic") { + fit <- glm(formula, data = typeData, family = "binomial") + score <- round(predict(fit, test, type = "response"), 0) } pred <- ROCR::prediction(score, actual.class) nbperf <- ROCR::performance(pred, "tpr", "fpr") @@ -1144,18 +1147,3 @@ score <- round(predict(fit, test, type = "response"), 0) return(score) } - -.calcAUCScore.multinomialClassification <- function(AUCformula, test, typeData, options, jaspResults, ...) { - fit <- VGAM::vglm(AUCformula, data = typeData, family = "multinomial") - logodds <- as.data.frame(predict(fit, test)) - ncategories <- ncol(logodds) + 1 - probabilities <- matrix(0, nrow = nrow(logodds), ncol = ncategories) - for (i in seq_len(ncategories - 1)) { - probabilities[, i] <- exp(logodds[, i]) - } - probabilities[, ncategories] <- 1 - row_sums <- rowSums(probabilities) - probabilities <- probabilities / row_sums - score <- apply(probabilities, 1, which.max) - return(score) -} diff --git a/R/mlClassificationLogisticMultinomial.R b/R/mlClassificationLogisticMultinomial.R index ce3bad7f..7d905f8e 100644 --- a/R/mlClassificationLogisticMultinomial.R +++ b/R/mlClassificationLogisticMultinomial.R @@ -53,8 +53,8 @@ mlClassificationLogisticMultinomial <- function(jaspResults, dataset, options, . .mlClassificationLogisticTableCoef(options, jaspResults, ready, position = 8) -# # Create the ROC curve -# .mlClassificationPlotRoc(dataset, options, jaspResults, ready, position = 10, type = "logistic") # position + 1 for regression equation + # Create the ROC curve + .mlClassificationPlotRoc(dataset, options, jaspResults, ready, position = 10, type = "logistic") # position + 1 for regression equation # Create the Andrews curves .mlClassificationPlotAndrews(dataset, options, jaspResults, ready, position = 11) @@ -106,7 +106,7 @@ mlClassificationLogisticMultinomial <- function(jaspResults, dataset, options, . result[["model"]] <- trainingFit result[["confTable"]] <- table("Pred" = testPredictions, "Real" = testSet[, options[["target"]]]) result[["testAcc"]] <- sum(diag(prop.table(result[["confTable"]]))) - result[["auc"]] <- .classificationCalcAUC(testSet, trainingSet, options, if (family == "binomial") "logisticClassification" else "multinomialClassification") + result[["auc"]] <- .classificationCalcAUC(testSet, trainingSet, options, "logisticClassification") result[["ntrain"]] <- nrow(trainingSet) result[["ntest"]] <- nrow(testSet) result[["testReal"]] <- testSet[, options[["target"]]] diff --git a/tests/testthat/_snaps/mlclassificationlogisticmultinomial/data-split.svg b/tests/testthat/_snaps/mlclassificationlogisticmultinomial/data-split.svg new file mode 100644 index 00000000..25ca32f6 --- /dev/null +++ b/tests/testthat/_snaps/mlclassificationlogisticmultinomial/data-split.svg @@ -0,0 +1,42 @@ + + + + + + + + + + + + + + + + + + + + + + + +Train: 120 +Test: 30 +Total: 150 + + + + + +data-split + + diff --git a/tests/testthat/test-mlclassificationlogisticmultinomial.R b/tests/testthat/test-mlclassificationlogisticmultinomial.R index c41948c8..397a8c2d 100644 --- a/tests/testthat/test-mlclassificationlogisticmultinomial.R +++ b/tests/testthat/test-mlclassificationlogisticmultinomial.R @@ -36,7 +36,7 @@ test_that("Class Proportions table results match", { "virginica", 0.4, 0.316666666666667)) }) -test_that("Model Summary: Logistic / Multinomial Regression table results match", { +test_that("Model Summary: Multinomial Regression Classification table results match", { table <- results[["results"]][["classificationTable"]][["data"]] jaspTools::expect_equal_tables(table, list("multinomial", 30, 120, 1)) @@ -58,9 +58,10 @@ test_that("Data Split plot matches", { test_that("Model Performance Metrics table results match", { table <- results[["results"]][["validationMeasures"]][["data"]] jaspTools::expect_equal_tables(table, - list(1, "", 1, 0, 0, 0, 0, "setosa", 1, 1, 1, 1, 0.333333333333333, - 10, 1, "", 1, 1, 0, 0, 0, 0, "versicolor", 1, 1, 1, - 1, 0.266666666666667, 8, 1, "", 1, 1, 0, 0, 0, 0, "virginica", - 1, 1, 1, 1, 0.4, 12, 1, "", 1, 1, 0, 0, 0, 0, "Average / Total", - 1, 1, 1, 1, 1, 30, 1, "")) + list(1, 1, 1, 0, 0, 0, 0, "setosa", 1, 1, 1, 1, 0.333333333333333, + 10, 1, "", 1, 0.613636363636364, 1, 0, 0, 0, 0, "versicolor", + 1, 1, 1, 1, 0.266666666666667, 8, 1, "", 1, 1, 1, 0, + 0, 0, 0, "virginica", 1, 1, 1, 1, 0.4, 12, 1, "", 1, + 0.871212121212121, 1, 0, 0, 0, 0, "Average / Total", 1, 1, 1, + 1, 1, 30, 1, "")) }) \ No newline at end of file From ef552aef8ce27f06d905b365260a18bd9ca187e7 Mon Sep 17 00:00:00 2001 From: Koen Derks Date: Tue, 10 Sep 2024 21:46:52 +0200 Subject: [PATCH 11/30] Make decision boundary work --- R/commonMachineLearningClassification.R | 20 ++++++++++++++++++++ R/mlClassificationLogisticMultinomial.R | 4 ++-- 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/R/commonMachineLearningClassification.R b/R/commonMachineLearningClassification.R index 1812235e..b0277e50 100644 --- a/R/commonMachineLearningClassification.R +++ b/R/commonMachineLearningClassification.R @@ -582,6 +582,26 @@ fit <- e1071::naiveBayes(formula, data = dataset, laplace = options[["smoothingParameter"]]) predictions <- as.factor(max.col(predict(fit, newdata = grid, type = "raw"))) levels(predictions) <- unique(dataset[, options[["target"]]]) + } else if (type == "logistic") { + if (classificationResult[["family"]] == "binomial") { + fit <- glm(formula, data = dataset, family = "binomial") + predictions <- as.factor(round(predict(fit, grid, type = "response"), 0)) + levels(predictions) <- unique(dataset[, options[["target"]]]) + } else { + fit <- VGAM::vglm(formula, data = dataset, family = "multinomial") + logodds <- predict(fit, newdata = grid) + ncategories <- ncol(logodds) + 1 + probabilities <- matrix(0, nrow = nrow(logodds), ncol = ncategories) + for (i in seq_len(ncategories - 1)) { + probabilities[, i] <- exp(logodds[, i]) + } + probabilities[, ncategories] <- 1 + row_sums <- rowSums(probabilities) + probabilities <- probabilities / row_sums + predicted_columns <- apply(probabilities, 1, which.max) + categories <- levels(dataset[[options[["target"]]]]) + predictions <- as.factor(categories[predicted_columns]) + } } shapes <- rep(21, nrow(dataset)) if (type == "svm") { diff --git a/R/mlClassificationLogisticMultinomial.R b/R/mlClassificationLogisticMultinomial.R index 7d905f8e..ecd0e7f9 100644 --- a/R/mlClassificationLogisticMultinomial.R +++ b/R/mlClassificationLogisticMultinomial.R @@ -59,8 +59,8 @@ mlClassificationLogisticMultinomial <- function(jaspResults, dataset, options, . # Create the Andrews curves .mlClassificationPlotAndrews(dataset, options, jaspResults, ready, position = 11) -# # Decision boundaries -# .mlClassificationPlotBoundaries(dataset, options, jaspResults, ready, position = 12, type = "logistic") + # Decision boundaries + .mlClassificationPlotBoundaries(dataset, options, jaspResults, ready, position = 12, type = "logistic") } .logisticMultinomialClassification <- function(dataset, options, jaspResults, ready) { From 09b2b4554f26b2229e7c826d2601a53923e61a3f Mon Sep 17 00:00:00 2001 From: Koen Derks Date: Tue, 10 Sep 2024 22:08:49 +0200 Subject: [PATCH 12/30] Start with prediction --- R/mlClassificationLogisticMultinomial.R | 12 +++++++-- R/mlPrediction.R | 34 ++++++++++++++++++++++++- 2 files changed, 43 insertions(+), 3 deletions(-) diff --git a/R/mlClassificationLogisticMultinomial.R b/R/mlClassificationLogisticMultinomial.R index ecd0e7f9..9adc9e05 100644 --- a/R/mlClassificationLogisticMultinomial.R +++ b/R/mlClassificationLogisticMultinomial.R @@ -103,7 +103,15 @@ mlClassificationLogisticMultinomial <- function(jaspResults, dataset, options, . result <- list() result[["formula"]] <- formula result[["family"]] <- family - result[["model"]] <- trainingFit + if (family == "binomial") { + result[["model"]] <- trainingFit + } else { + model <- lapply(slotNames(trainingFit), function(x) slot(trainingFit, x)) + names(model) <- slotNames(trainingFit) + model[["original"]] <- trainingFit + class(model) <- "vglm" + result[["model"]] <- model + } result[["confTable"]] <- table("Pred" = testPredictions, "Real" = testSet[, options[["target"]]]) result[["testAcc"]] <- sum(diag(prop.table(result[["confTable"]]))) result[["auc"]] <- .classificationCalcAUC(testSet, trainingSet, options, "logisticClassification") @@ -171,7 +179,7 @@ mlClassificationLogisticMultinomial <- function(jaspResults, dataset, options, . } rownames(coefs) <- vars } else { - coefs <- cbind(model@coefficients, confint(model, level = options[["coefTableConfIntLevel"]])) + coefs <- cbind(model$coefficients, confint(model[["original"]], level = options[["coefTableConfIntLevel"]])) colnames(coefs) <- c("est", "lower", "upper") vars <- rownames(coefs) for (i in seq_along(vars)) { diff --git a/R/mlPrediction.R b/R/mlPrediction.R index ce7f45f1..fb18261a 100644 --- a/R/mlPrediction.R +++ b/R/mlPrediction.R @@ -75,6 +75,12 @@ is.jaspMachineLearning <- function(x) { .mlPredictionGetModelType.naiveBayes <- function(model) { gettext("Naive Bayes") } +.mlPredictionGetModelType.glm <- function(model) { + gettext("Logistic regression") +} +.mlPredictionGetModelType.vglm <- function(model) { + gettext("Multinomial regression") +} # S3 method to make predictions using the model .mlPredictionGetPredictions <- function(model, dataset) { @@ -135,6 +141,12 @@ is.jaspMachineLearning <- function(x) { .mlPredictionGetPredictions.naiveBayes <- function(model, dataset) { as.character(e1071:::predict.naiveBayes(model, newdata = dataset, type = "class")) } +.mlPredictionGetPredictions.glm <- function(model, dataset) { + # TODO +} +.mlPredictionGetPredictions.vglm <- function(model, dataset) { + # TODO +} # S3 method to make find out number of observations in training data .mlPredictionGetTrainingN <- function(model) { @@ -170,6 +182,12 @@ is.jaspMachineLearning <- function(x) { .mlPredictionGetTrainingN.naiveBayes <- function(model) { nrow(model[["data"]]) } +.mlPredictionGetTrainingN.glm <- function(model) { + nrow(model[["data"]]) +} +.mlPredictionGetTrainingN.vglm <- function(model) { + nrow(model$x) +} # S3 method to decode the model variables in the result object # so that they can be matched to variables in the prediction analysis @@ -229,6 +247,14 @@ is.jaspMachineLearning <- function(x) { names(model[["tables"]]) <- decodeColNames(names(model[["tables"]])) return(model) } +.decodeJaspMLobject.glm <- function(model) { + # TODO + return(model) +} +.decodeJaspMLobject.vglm <- function(model) { + # TODO + return(model) +} .mlPredictionReadModel <- function(options) { if (options[["trainedModelFilePath"]] != "") { @@ -238,7 +264,7 @@ is.jaspMachineLearning <- function(x) { if (!is.jaspMachineLearning(model)) { jaspBase:::.quitAnalysis(gettext("Error: The trained model is not created in JASP.")) } - if (!(any(c("kknn", "lda", "gbm", "randomForest", "cv.glmnet", "nn", "rpart", "svm", "lm", "naiveBayes") %in% class(model)))) { + if (!(any(c("kknn", "lda", "gbm", "randomForest", "cv.glmnet", "nn", "rpart", "svm", "lm", "naiveBayes", "glm", "vglm") %in% class(model)))) { jaspBase:::.quitAnalysis(gettextf("The trained model (type: %1$s) is currently not supported in JASP.", paste(class(model), collapse = ", "))) } if (model[["jaspVersion"]] != .baseCitation) { @@ -326,6 +352,8 @@ is.jaspMachineLearning <- function(x) { table$addColumnInfo(name = "mtry", title = gettext("Features per split"), type = "integer") } else if (inherits(model, "cv.glmnet")) { table$addColumnInfo(name = "lambda", title = "\u03BB", type = "number") + } else if (inherits(model, "glm") || inherits(model, "vglm")) { + table$addColumnInfo(name = "family", title = gettext("Family"), type = "string") } table$addColumnInfo(name = "ntrain", title = gettext("n(Train)"), type = "integer") table$addColumnInfo(name = "nnew", title = gettext("n(New)"), type = "integer") @@ -344,6 +372,10 @@ is.jaspMachineLearning <- function(x) { row[["mtry"]] <- model[["mtry"]] } else if (inherits(model, "cv.glmnet")) { row[["lambda"]] <- model[["lambda.min"]] + } else if (inherits(model, "glm")) { + row[["family"]] <- gettext("binomial") + } else if (inherits(model, "vglm")) { + row[["family"]] <- gettext("multinomial") } if (length(presentVars) > 0) { row[["nnew"]] <- nrow(dataset) From 82a9b8e8e5a46a22f9e1f3152cf35b033a9f3c0a Mon Sep 17 00:00:00 2001 From: Koen Derks Date: Tue, 10 Sep 2024 22:48:46 +0200 Subject: [PATCH 13/30] Prediction for logit regression --- R/mlClassificationLogisticMultinomial.R | 4 ++-- R/mlPrediction.R | 11 +++-------- 2 files changed, 5 insertions(+), 10 deletions(-) diff --git a/R/mlClassificationLogisticMultinomial.R b/R/mlClassificationLogisticMultinomial.R index 9adc9e05..f9418332 100644 --- a/R/mlClassificationLogisticMultinomial.R +++ b/R/mlClassificationLogisticMultinomial.R @@ -236,7 +236,7 @@ mlClassificationLogisticMultinomial <- function(jaspResults, dataset, options, . .mlClassificationLogisticPredictions <- function(trainingSet, options, probabilities) { categories <- levels(trainingSet[[options[["target"]]]]) - predicted_categories <- categories[round(probabilities, 0) + 1] + predicted_categories <- factor(categories[round(probabilities, 0) + 1], levels = levels(trainingSet[[options[["target"]]]])) return(predicted_categories) } @@ -251,6 +251,6 @@ mlClassificationLogisticMultinomial <- function(jaspResults, dataset, options, . probabilities <- probabilities / row_sums predicted_columns <- apply(probabilities, 1, which.max) categories <- levels(trainingSet[[options[["target"]]]]) - predicted_categories <- categories[predicted_columns] + predicted_categories <- factor(categories[predicted_columns], levels = levels(trainingSet[[options[["target"]]]])) return(predicted_categories) } diff --git a/R/mlPrediction.R b/R/mlPrediction.R index fb18261a..1e41ef81 100644 --- a/R/mlPrediction.R +++ b/R/mlPrediction.R @@ -142,7 +142,7 @@ is.jaspMachineLearning <- function(x) { as.character(e1071:::predict.naiveBayes(model, newdata = dataset, type = "class")) } .mlPredictionGetPredictions.glm <- function(model, dataset) { - # TODO + as.character(levels(as.factor(model$model[, 1]))[round(predict(model, newdata = dataset, type = "response"), 0) + 1]) } .mlPredictionGetPredictions.vglm <- function(model, dataset) { # TODO @@ -248,7 +248,8 @@ is.jaspMachineLearning <- function(x) { return(model) } .decodeJaspMLobject.glm <- function(model) { - # TODO + formula <- formula(paste(decodeColNames(as.character(model$terms)[2]), "~", paste0(decodeColNames(strsplit(as.character(model$terms)[3], split = " + ", fixed = TRUE)[[1]]), collapse = " + "))) + model$terms <- stats::terms(formula) return(model) } .decodeJaspMLobject.vglm <- function(model) { @@ -352,8 +353,6 @@ is.jaspMachineLearning <- function(x) { table$addColumnInfo(name = "mtry", title = gettext("Features per split"), type = "integer") } else if (inherits(model, "cv.glmnet")) { table$addColumnInfo(name = "lambda", title = "\u03BB", type = "number") - } else if (inherits(model, "glm") || inherits(model, "vglm")) { - table$addColumnInfo(name = "family", title = gettext("Family"), type = "string") } table$addColumnInfo(name = "ntrain", title = gettext("n(Train)"), type = "integer") table$addColumnInfo(name = "nnew", title = gettext("n(New)"), type = "integer") @@ -372,10 +371,6 @@ is.jaspMachineLearning <- function(x) { row[["mtry"]] <- model[["mtry"]] } else if (inherits(model, "cv.glmnet")) { row[["lambda"]] <- model[["lambda.min"]] - } else if (inherits(model, "glm")) { - row[["family"]] <- gettext("binomial") - } else if (inherits(model, "vglm")) { - row[["family"]] <- gettext("multinomial") } if (length(presentVars) > 0) { row[["nnew"]] <- nrow(dataset) From d9a3a59b96266adc228f68406c3280542e039025 Mon Sep 17 00:00:00 2001 From: Koen Derks Date: Tue, 10 Sep 2024 23:09:30 +0200 Subject: [PATCH 14/30] Predictions for multinomial --- R/mlClassificationLogisticMultinomial.R | 1 + R/mlPrediction.R | 18 +++++++++++++++--- 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/R/mlClassificationLogisticMultinomial.R b/R/mlClassificationLogisticMultinomial.R index f9418332..0552de78 100644 --- a/R/mlClassificationLogisticMultinomial.R +++ b/R/mlClassificationLogisticMultinomial.R @@ -109,6 +109,7 @@ mlClassificationLogisticMultinomial <- function(jaspResults, dataset, options, . model <- lapply(slotNames(trainingFit), function(x) slot(trainingFit, x)) names(model) <- slotNames(trainingFit) model[["original"]] <- trainingFit + model[["target"]] <- trainingSet[[options[["target"]]]] class(model) <- "vglm" result[["model"]] <- model } diff --git a/R/mlPrediction.R b/R/mlPrediction.R index 1e41ef81..db564ee9 100644 --- a/R/mlPrediction.R +++ b/R/mlPrediction.R @@ -145,7 +145,18 @@ is.jaspMachineLearning <- function(x) { as.character(levels(as.factor(model$model[, 1]))[round(predict(model, newdata = dataset, type = "response"), 0) + 1]) } .mlPredictionGetPredictions.vglm <- function(model, dataset) { - # TODO + model[["original"]]@terms$terms <- model[["terms"]] + logodds <- predict(model[["original"]], newdata = dataset) + ncategories <- ncol(logodds) + 1 + probabilities <- matrix(0, nrow = nrow(logodds), ncol = ncategories) + for (i in seq_len(ncategories - 1)) { + probabilities[, i] <- exp(logodds[, i]) + } + probabilities[, ncategories] <- 1 + row_sums <- rowSums(probabilities) + probabilities <- probabilities / row_sums + predicted_columns <- apply(probabilities, 1, which.max) + as.character(levels(as.factor(model$target))[predicted_columns]) } # S3 method to make find out number of observations in training data @@ -186,7 +197,7 @@ is.jaspMachineLearning <- function(x) { nrow(model[["data"]]) } .mlPredictionGetTrainingN.vglm <- function(model) { - nrow(model$x) + nrow(model[["x"]]) } # S3 method to decode the model variables in the result object @@ -253,7 +264,8 @@ is.jaspMachineLearning <- function(x) { return(model) } .decodeJaspMLobject.vglm <- function(model) { - # TODO + formula <- formula(paste(decodeColNames(strsplit(as.character(model$terms), " ")[[1]][1]), "~", paste0(decodeColNames(strsplit(strsplit(as.character(model$terms), split = " ~ ")[[1]][2], split = " + ", fixed = TRUE)[[1]]), collapse = " + "))) + model$terms <- stats::terms(formula) return(model) } From 712bb5da68cee919f181e308fac44584ec83efa3 Mon Sep 17 00:00:00 2001 From: Koen Derks Date: Tue, 10 Sep 2024 23:39:27 +0200 Subject: [PATCH 15/30] Formula for multinomial --- R/mlClassificationLogisticMultinomial.R | 42 +++++++++++++++++++------ 1 file changed, 33 insertions(+), 9 deletions(-) diff --git a/R/mlClassificationLogisticMultinomial.R b/R/mlClassificationLogisticMultinomial.R index 0552de78..cf677f70 100644 --- a/R/mlClassificationLogisticMultinomial.R +++ b/R/mlClassificationLogisticMultinomial.R @@ -217,16 +217,40 @@ mlClassificationLogisticMultinomial <- function(jaspResults, dataset, options, . table[["upper"]] <- coefs[, "upper"] } if (options[["formula"]]) { # TODO FOR MULTINOMIAL - one_cat <- levels(factor(classificationResult[["train"]][[options[["target"]]]]))[2] - if (options[["intercept"]]) { - regform <- paste0("logit(p", options[["target"]], " = ", one_cat, ") = ", round(as.numeric(coefs[, 1])[1], 3)) - start <- 2 + if (classificationResult[["family"]] == "binomial") { + one_cat <- levels(factor(classificationResult[["train"]][[options[["target"]]]]))[2] + if (options[["intercept"]]) { + regform <- paste0("logit(p", options[["target"]], " = ", one_cat, ") = ", round(as.numeric(coefs[, 1])[1], 3)) + start <- 2 + } else { + regform <- paste0("logit(p", options[["target"]], " = ", one_cat, ") = ") + start <- 1 + } + for (i in start:nrow(coefs)) { + regform <- paste0(regform, if (round(as.numeric(coefs[, 1])[i], 3) < 0) " - " else " + ", abs(round(as.numeric(coefs[, 1])[i], 3)), " x ", rownames(coefs)[i]) + } } else { - regform <- paste0("logit(p", options[["target"]], " = ", one_cat, ") = ") - start <- 1 - } - for (i in start:nrow(coefs)) { - regform <- paste0(regform, if (round(as.numeric(coefs[, 1])[i], 3) < 0) " - " else " + ", abs(round(as.numeric(coefs[, 1])[i], 3)), " x ", rownames(coefs)[i]) + regform <- NULL + nlevs <- nlevels(classificationResult[["train"]][[options[["target"]]]]) + baseline_cat <- levels(classificationResult[["train"]][[options[["target"]]]])[nlevs] + for (i in seq_len(nlevs - 1)) { + current_cat <- levels(classificationResult[["train"]][[options[["target"]]]])[i] + if (options[["intercept"]]) { + part <- paste0("log(p", options[["target"]], " = ", current_cat, " / p", options[["target"]], " = ", baseline_cat, ") = ", round(as.numeric(coefs[, 1])[i], 3)) + start <- nlevs - 1 + i + } else { + part <- paste0("log(p", options[["target"]], " = ", current_cat, " / p", options[["target"]], " = ", baseline_cat, ") = ") + start <- i + } + for (j in seq(start, nrow(coefs), by = nlevs - 1)) { + part <- paste0(part, if (round(as.numeric(coefs[, 1])[j], 3) < 0) " - " else " + ", abs(round(as.numeric(coefs[, 1])[j], 3)), " x ", strsplit(rownames(coefs)[j], " : ")[[1]][1]) + } + if (i == 1) { + regform <- paste0(regform, part, "\n\n") + } else { + regform <- paste0(regform, part) + } + } } formula <- createJaspHtml(gettextf("Regression equation:\n%1$s", regform), "p") formula$position <- position + 1 From 7fbe88c02eed931b65881e913232fa0df21e920d Mon Sep 17 00:00:00 2001 From: Koen Derks Date: Tue, 10 Sep 2024 23:45:06 +0200 Subject: [PATCH 16/30] Update table content --- R/commonMachineLearningClassification.R | 4 +++- tests/testthat/test-mlclassificationlogisticmultinomial.R | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/R/commonMachineLearningClassification.R b/R/commonMachineLearningClassification.R index b0277e50..2153779e 100644 --- a/R/commonMachineLearningClassification.R +++ b/R/commonMachineLearningClassification.R @@ -323,8 +323,10 @@ } else { table$title <- gettext("Model Summary: Multinomial Regression Classification") } + family <- classificationResult[["family"]] + family <- paste0(toupper(substr(family, 1, 1)), substr(family, 2, nchar(family))) row <- data.frame( - family = classificationResult[["family"]], + family = family, nTrain = nTrain, nTest = classificationResult[["ntest"]], testAcc = classificationResult[["testAcc"]] diff --git a/tests/testthat/test-mlclassificationlogisticmultinomial.R b/tests/testthat/test-mlclassificationlogisticmultinomial.R index 397a8c2d..5828869a 100644 --- a/tests/testthat/test-mlclassificationlogisticmultinomial.R +++ b/tests/testthat/test-mlclassificationlogisticmultinomial.R @@ -39,7 +39,7 @@ test_that("Class Proportions table results match", { test_that("Model Summary: Multinomial Regression Classification table results match", { table <- results[["results"]][["classificationTable"]][["data"]] jaspTools::expect_equal_tables(table, - list("multinomial", 30, 120, 1)) + list("Multinomial", 30, 120, 1)) }) test_that("Confusion Matrix table results match", { From 404bc871fb8f090e74e5a176450865f64b24c588 Mon Sep 17 00:00:00 2001 From: Koen Derks Date: Wed, 11 Sep 2024 00:23:20 +0200 Subject: [PATCH 17/30] explainers --- R/commonMachineLearningClassification.R | 3 +-- R/mlClassificationLogisticMultinomial.R | 22 ++++++++++++---------- 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/R/commonMachineLearningClassification.R b/R/commonMachineLearningClassification.R index 2153779e..7b60432e 100644 --- a/R/commonMachineLearningClassification.R +++ b/R/commonMachineLearningClassification.R @@ -324,9 +324,8 @@ table$title <- gettext("Model Summary: Multinomial Regression Classification") } family <- classificationResult[["family"]] - family <- paste0(toupper(substr(family, 1, 1)), substr(family, 2, nchar(family))) row <- data.frame( - family = family, + family = paste0(toupper(substr(family, 1, 1)), substr(family, 2, nchar(family))), nTrain = nTrain, nTest = classificationResult[["ntest"]], testAcc = classificationResult[["testAcc"]] diff --git a/R/mlClassificationLogisticMultinomial.R b/R/mlClassificationLogisticMultinomial.R index cf677f70..d2e29f55 100644 --- a/R/mlClassificationLogisticMultinomial.R +++ b/R/mlClassificationLogisticMultinomial.R @@ -45,11 +45,11 @@ mlClassificationLogisticMultinomial <- function(jaspResults, dataset, options, . # Create the validation measures table .mlClassificationTableMetrics(dataset, options, jaspResults, ready, position = 5) -# # Create the variable importance table -# .mlTableFeatureImportance(options, jaspResults, ready, position = 6, purpose = "classification") + # Create the variable importance table + .mlTableFeatureImportance(options, jaspResults, ready, position = 6, purpose = "classification") -# # Create the shap table -# .mlTableShap(dataset, options, jaspResults, ready, position = 7, purpose = "classification") + # Create the shap table + .mlTableShap(dataset, options, jaspResults, ready, position = 7, purpose = "classification") .mlClassificationLogisticTableCoef(options, jaspResults, ready, position = 8) @@ -124,12 +124,14 @@ mlClassificationLogisticMultinomial <- function(jaspResults, dataset, options, . result[["test"]] <- testSet result[["testIndicatorColumn"]] <- testIndicatorColumn result[["classes"]] <- dataPredictions -# result[["explainer"]] <- DALEX::explain(result[["model"]], type = "classification", data = result[["train"]], y = result[["train"]][, options[["target"]]], predict_function = function(model, data) predict(model, newdata = data, type = "raw")) -# if (nlevels(result[["testReal"]]) == 2) { -# result[["explainer_fi"]] <- DALEX::explain(result[["model"]], type = "classification", data = result[["train"]], y = as.numeric(result[["train"]][, options[["target"]]]) - 1, predict_function = function(model, data) predict(model, newdata = data, type = "class")) -# } else { -# result[["explainer_fi"]] <- DALEX::explain(result[["model"]], type = "multiclass", data = result[["train"]], y = result[["train"]][, options[["target"]]] , predict_function = function(model, data) predict(model, newdata = data, type = "raw")) -# } + if (family == "binomial") { + result[["explainer"]] <- DALEX::explain(result[["model"]], type = "classification", data = result[["train"]], y = result[["train"]][, options[["target"]]], predict_function = function(model, data) data.frame(1 - predict(model, newdata = data, type = "response"), predict(model, newdata = data, type = "response"))) + result[["explainer_fi"]] <- DALEX::explain(result[["model"]], type = "classification", data = result[["train"]], y = as.numeric(result[["train"]][, options[["target"]]]) - 1, predict_function = function(model, data) round(predict(model, newdata = data, type = "response"), 0) + 1) + } else { + # TODO + result[["explainer"]] <- DALEX::explain(result[["model"]][["original"]], type = "multiclass", data = result[["train"]], y = result[["train"]][, options[["target"]]], predict_function = function(model, data) VGAM::predict(model, data, type = "response")) + result[["explainer_fi"]] <- result[["explainer"]] + } return(result) } From 2986b2ca5b25b45d4bf8a488e94cfb420a3b86b3 Mon Sep 17 00:00:00 2001 From: Koen Derks Date: Wed, 11 Sep 2024 00:25:11 +0200 Subject: [PATCH 18/30] Update mlClassificationLogisticMultinomial.R --- R/mlClassificationLogisticMultinomial.R | 1 - 1 file changed, 1 deletion(-) diff --git a/R/mlClassificationLogisticMultinomial.R b/R/mlClassificationLogisticMultinomial.R index d2e29f55..5dc9df70 100644 --- a/R/mlClassificationLogisticMultinomial.R +++ b/R/mlClassificationLogisticMultinomial.R @@ -128,7 +128,6 @@ mlClassificationLogisticMultinomial <- function(jaspResults, dataset, options, . result[["explainer"]] <- DALEX::explain(result[["model"]], type = "classification", data = result[["train"]], y = result[["train"]][, options[["target"]]], predict_function = function(model, data) data.frame(1 - predict(model, newdata = data, type = "response"), predict(model, newdata = data, type = "response"))) result[["explainer_fi"]] <- DALEX::explain(result[["model"]], type = "classification", data = result[["train"]], y = as.numeric(result[["train"]][, options[["target"]]]) - 1, predict_function = function(model, data) round(predict(model, newdata = data, type = "response"), 0) + 1) } else { - # TODO result[["explainer"]] <- DALEX::explain(result[["model"]][["original"]], type = "multiclass", data = result[["train"]], y = result[["train"]][, options[["target"]]], predict_function = function(model, data) VGAM::predict(model, data, type = "response")) result[["explainer_fi"]] <- result[["explainer"]] } From edcff6b6ddfdde678024df3ab73087ed2cba9b3e Mon Sep 17 00:00:00 2001 From: Koen Derks Date: Wed, 11 Sep 2024 00:26:13 +0200 Subject: [PATCH 19/30] Update test-mlclassificationlogisticmultinomial.R --- ...test-mlclassificationlogisticmultinomial.R | 22 +++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/tests/testthat/test-mlclassificationlogisticmultinomial.R b/tests/testthat/test-mlclassificationlogisticmultinomial.R index 5828869a..fc2f8182 100644 --- a/tests/testthat/test-mlclassificationlogisticmultinomial.R +++ b/tests/testthat/test-mlclassificationlogisticmultinomial.R @@ -49,12 +49,34 @@ test_that("Confusion Matrix table results match", { "virginica", 0, 0, 12)) }) +test_that("Feature Importance Metrics table results match", { + table <- results[["results"]][["featureImportanceTable"]][["data"]] + jaspTools::expect_equal_tables(table, + list(530.419652531233, "Petal.Length", 258.101247632355, "Petal.Width", + 11.6632506085855, "Sepal.Width", 10.7849556008181, "Sepal.Length" + )) +}) + test_that("Data Split plot matches", { plotName <- results[["results"]][["plotDataSplit"]][["data"]] testPlot <- results[["state"]][["figures"]][[plotName]][["obj"]] jaspTools::expect_equal_plots(testPlot, "data-split") }) +test_that("Additive Explanations for Predictions of Test Set Cases table results match", { + table <- results[["results"]][["tableShap"]][["data"]] + jaspTools::expect_equal_tables(table, + list(0.629688901507271, 0.0369777616411127, 2.01684224876431e-11, 3.29787464004028e-09, + 0.333333333528117, 1, "setosa (1)", 0.608365336494419, 0.0583013168725895, + 5.75901548671709e-11, 1.30466177861166e-08, 0.333333333528117, + 2, "setosa (1)", 0.578930644244146, 0.0877359705540646, -1.91210501876427e-08, + 6.96919006948349e-08, 0.333333333528117, 3, "setosa (1)", 0.644504774615733, + 0.0221618748599225, 2.48312481687663e-11, 1.69711121822402e-08, + 0.333333333528117, 4, "setosa (1)", 0.544084722419656, 0.122581029133867, + 2.34213792804638e-09, 9.1257324152938e-07, 0.333333333528117, + 5, "setosa (1)")) +}) + test_that("Model Performance Metrics table results match", { table <- results[["results"]][["validationMeasures"]][["data"]] jaspTools::expect_equal_tables(table, From c298a2d8c7f0340415e51febbb36eaeb6752d753 Mon Sep 17 00:00:00 2001 From: Koen Derks Date: Wed, 11 Sep 2024 08:43:12 +0200 Subject: [PATCH 20/30] Add info text --- R/mlClassificationLogisticMultinomial.R | 4 ++-- inst/qml/mlClassificationLogisticMultinomial.qml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/R/mlClassificationLogisticMultinomial.R b/R/mlClassificationLogisticMultinomial.R index 5dc9df70..721121b5 100644 --- a/R/mlClassificationLogisticMultinomial.R +++ b/R/mlClassificationLogisticMultinomial.R @@ -88,13 +88,13 @@ mlClassificationLogisticMultinomial <- function(jaspResults, dataset, options, . } if (nlevels(trainingSet[[options[["target"]]]]) == 2) { family = "binomial" - trainingFit <- glm(formula, data = trainingSet, family = family) + trainingFit <- glm(formula, data = trainingSet, family = stats::binomial(link = "logit")) # Use the specified model to make predictions for dataset testPredictions <- .mlClassificationLogisticPredictions(trainingSet, options, predict(trainingFit, newdata = testSet, type = "response")) dataPredictions <- .mlClassificationLogisticPredictions(trainingSet, options, predict(trainingFit, newdata = dataset, type = "response")) } else { family <- "multinomial" - trainingFit <- VGAM::vglm(formula, data = trainingSet, family = family) + trainingFit <- VGAM::vglm(formula, data = trainingSet, family = VGAM::multinomial()) # Use the specified model to make predictions for dataset testPredictions <- .mlClassificationMultinomialPredictions(trainingSet, options, predict(trainingFit, newdata = testSet)) dataPredictions <- .mlClassificationMultinomialPredictions(trainingSet, options, predict(trainingFit, newdata = dataset)) diff --git a/inst/qml/mlClassificationLogisticMultinomial.qml b/inst/qml/mlClassificationLogisticMultinomial.qml index 8ad33a4a..2df65239 100644 --- a/inst/qml/mlClassificationLogisticMultinomial.qml +++ b/inst/qml/mlClassificationLogisticMultinomial.qml @@ -28,7 +28,7 @@ import "./common/analyses/regularized" as REGU Form { - info: qsTr("Logistic regression.") + info: qsTr("Logistic regression is a statistical method used to model the relationship between a binary target variable (with two possible outcomes) and one or more feature variables. It predicts the probability of a specific outcome by using a logistic function, which ensures that the predicted probabilities are between 0 and 1. Multinomial regression extends logistic regression to handle target variables with more than two categories. Instead of predicting binary outcomes, multinomial regression is used for scenarios where the target variable has three or more unordered categories.") UI.VariablesFormClassification { id: vars } From 587ecd4e5d58738a465aff4f8de3a9c534bdd6ff Mon Sep 17 00:00:00 2001 From: Koen Derks Date: Wed, 11 Sep 2024 09:03:02 +0200 Subject: [PATCH 21/30] Update --- R/commonMachineLearningClassification.R | 11 +++++++---- R/mlClassificationLogisticMultinomial.R | 5 ++++- R/mlPrediction.R | 9 +++++++++ .../test-mlclassificationlogisticmultinomial.R | 2 +- 4 files changed, 21 insertions(+), 6 deletions(-) diff --git a/R/commonMachineLearningClassification.R b/R/commonMachineLearningClassification.R index 7b60432e..a9ec7b0e 100644 --- a/R/commonMachineLearningClassification.R +++ b/R/commonMachineLearningClassification.R @@ -152,6 +152,7 @@ table$addColumnInfo(name = "smoothing", title = gettext("Smoothing"), type = "number") } else if (type == "logistic") { table$addColumnInfo(name = "family", title = gettext("Family"), type = "string") + table$addColumnInfo(name = "link", title = gettext("Link"), type = "string") } # Add common columns table$addColumnInfo(name = "nTrain", title = gettext("n(Train)"), type = "integer") @@ -324,8 +325,10 @@ table$title <- gettext("Model Summary: Multinomial Regression Classification") } family <- classificationResult[["family"]] + link <- classificationResult[["link"]] row <- data.frame( family = paste0(toupper(substr(family, 1, 1)), substr(family, 2, nchar(family))), + link = paste0(toupper(substr(link, 1, 1)), substr(link, 2, nchar(link))), nTrain = nTrain, nTest = classificationResult[["ntest"]], testAcc = classificationResult[["testAcc"]] @@ -585,11 +588,11 @@ levels(predictions) <- unique(dataset[, options[["target"]]]) } else if (type == "logistic") { if (classificationResult[["family"]] == "binomial") { - fit <- glm(formula, data = dataset, family = "binomial") + fit <- glm(formula, data = dataset, family = stats::binomial(link = "logit")) predictions <- as.factor(round(predict(fit, grid, type = "response"), 0)) levels(predictions) <- unique(dataset[, options[["target"]]]) } else { - fit <- VGAM::vglm(formula, data = dataset, family = "multinomial") + fit <- VGAM::vglm(formula, data = dataset, family = VGAM::multinomial()) logodds <- predict(fit, newdata = grid) ncategories <- ncol(logodds) + 1 probabilities <- matrix(0, nrow = nrow(logodds), ncol = ncategories) @@ -743,7 +746,7 @@ fit <- e1071::naiveBayes(formula = formula, data = typeData, laplace = options[["smoothingParameter"]]) score <- max.col(predict(fit, test, type = "raw")) } else if (type == "logistic") { - fit <- glm(formula, data = typeData, family = "binomial") + fit <- glm(formula, data = typeData, family = stats::binomial(link = "logit")) score <- round(predict(fit, test, type = "response"), 0) } pred <- ROCR::prediction(score, actual.class) @@ -1164,7 +1167,7 @@ } .calcAUCScore.logisticClassification <- function(AUCformula, test, typeData, options, jaspResults, ...) { - fit <- glm(AUCformula, data = typeData, family = "binomial") + fit <- glm(AUCformula, data = typeData, family = stats::binomial(link = "logit")) score <- round(predict(fit, test, type = "response"), 0) return(score) } diff --git a/R/mlClassificationLogisticMultinomial.R b/R/mlClassificationLogisticMultinomial.R index 721121b5..93e83b67 100644 --- a/R/mlClassificationLogisticMultinomial.R +++ b/R/mlClassificationLogisticMultinomial.R @@ -88,12 +88,14 @@ mlClassificationLogisticMultinomial <- function(jaspResults, dataset, options, . } if (nlevels(trainingSet[[options[["target"]]]]) == 2) { family = "binomial" - trainingFit <- glm(formula, data = trainingSet, family = stats::binomial(link = "logit")) + linkFunction <- "logit" + trainingFit <- glm(formula, data = trainingSet, family = stats::binomial(link = linkFunction)) # Use the specified model to make predictions for dataset testPredictions <- .mlClassificationLogisticPredictions(trainingSet, options, predict(trainingFit, newdata = testSet, type = "response")) dataPredictions <- .mlClassificationLogisticPredictions(trainingSet, options, predict(trainingFit, newdata = dataset, type = "response")) } else { family <- "multinomial" + linkFunction <- "logit" trainingFit <- VGAM::vglm(formula, data = trainingSet, family = VGAM::multinomial()) # Use the specified model to make predictions for dataset testPredictions <- .mlClassificationMultinomialPredictions(trainingSet, options, predict(trainingFit, newdata = testSet)) @@ -103,6 +105,7 @@ mlClassificationLogisticMultinomial <- function(jaspResults, dataset, options, . result <- list() result[["formula"]] <- formula result[["family"]] <- family + result[["link"]] <- linkFunction if (family == "binomial") { result[["model"]] <- trainingFit } else { diff --git a/R/mlPrediction.R b/R/mlPrediction.R index db564ee9..cad9e30c 100644 --- a/R/mlPrediction.R +++ b/R/mlPrediction.R @@ -365,6 +365,9 @@ is.jaspMachineLearning <- function(x) { table$addColumnInfo(name = "mtry", title = gettext("Features per split"), type = "integer") } else if (inherits(model, "cv.glmnet")) { table$addColumnInfo(name = "lambda", title = "\u03BB", type = "number") + } else if (inherits(model, "glm") || inherits(model, "vglm")) { + table$addColumnInfo(name = "family", title = gettext("Family"), type = "string") + table$addColumnInfo(name = "link", title = gettext("Link"), type = "string") } table$addColumnInfo(name = "ntrain", title = gettext("n(Train)"), type = "integer") table$addColumnInfo(name = "nnew", title = gettext("n(New)"), type = "integer") @@ -383,6 +386,12 @@ is.jaspMachineLearning <- function(x) { row[["mtry"]] <- model[["mtry"]] } else if (inherits(model, "cv.glmnet")) { row[["lambda"]] <- model[["lambda.min"]] + } else if (inherits(model, "glm")) { + row[["family"]] <- gettext("Binomial") + row[["link"]] <- gettext("Logit") + } else if (inherits(model, "vglm")) { + row[["family"]] <- gettext("Multinomial") + row[["link"]] <- gettext("Logit") } if (length(presentVars) > 0) { row[["nnew"]] <- nrow(dataset) diff --git a/tests/testthat/test-mlclassificationlogisticmultinomial.R b/tests/testthat/test-mlclassificationlogisticmultinomial.R index fc2f8182..94e4ec19 100644 --- a/tests/testthat/test-mlclassificationlogisticmultinomial.R +++ b/tests/testthat/test-mlclassificationlogisticmultinomial.R @@ -39,7 +39,7 @@ test_that("Class Proportions table results match", { test_that("Model Summary: Multinomial Regression Classification table results match", { table <- results[["results"]][["classificationTable"]][["data"]] jaspTools::expect_equal_tables(table, - list("Multinomial", 30, 120, 1)) + list("Multinomial", "Logit", 30, 120, 1)) }) test_that("Confusion Matrix table results match", { From 5326f958815eff37e28338492f79b6c9e25bc087 Mon Sep 17 00:00:00 2001 From: Koen Derks Date: Wed, 11 Sep 2024 09:19:03 +0200 Subject: [PATCH 22/30] Update mlClassificationLogisticMultinomial.R --- R/mlClassificationLogisticMultinomial.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/mlClassificationLogisticMultinomial.R b/R/mlClassificationLogisticMultinomial.R index 93e83b67..7ad19947 100644 --- a/R/mlClassificationLogisticMultinomial.R +++ b/R/mlClassificationLogisticMultinomial.R @@ -214,7 +214,7 @@ mlClassificationLogisticMultinomial <- function(jaspResults, dataset, options, . table[["se"]] <- rep(".", nrow(coefs)) table[["t"]] <- rep(".", nrow(coefs)) table[["p"]] <- rep(".", nrow(coefs)) - table$addFootnote(gettext("Standard errors, t-values and p-values are not available for multinomial regression coefficients.")) + table$addFootnote(gettext("Standard errors, t-values and p-values are not available in multinomial regression.")) } if (options[["coefTableConfInt"]]) { table[["lower"]] <- coefs[, "lower"] From ba3ad28801f476357d84eddc03b674622f43e5f6 Mon Sep 17 00:00:00 2001 From: Koen Derks Date: Wed, 11 Sep 2024 09:45:48 +0200 Subject: [PATCH 23/30] coef table for multinomial --- R/mlClassificationLogisticMultinomial.R | 25 ++++++----------- ...test-mlclassificationlogisticmultinomial.R | 28 +++++++++++++++++++ 2 files changed, 36 insertions(+), 17 deletions(-) diff --git a/R/mlClassificationLogisticMultinomial.R b/R/mlClassificationLogisticMultinomial.R index 7ad19947..3caf82b5 100644 --- a/R/mlClassificationLogisticMultinomial.R +++ b/R/mlClassificationLogisticMultinomial.R @@ -147,7 +147,7 @@ mlClassificationLogisticMultinomial <- function(jaspResults, dataset, options, . table$addColumnInfo(name = "var", title = "", type = "string") table$addColumnInfo(name = "coefs", title = gettextf("Coefficient (%s)", "\u03B2"), type = "number") table$addColumnInfo(name = "se", title = gettext("Standard Error"), type = "number") - table$addColumnInfo(name = "t", title = gettext("t"), type = "number") + table$addColumnInfo(name = "z", title = gettext("z"), type = "number") table$addColumnInfo(name = "p", title = gettext("p"), type = "pvalue") if (options[["coefTableConfInt"]]) { overtitle <- gettextf("%1$s%% Confidence interval", round(options[["coefTableConfIntLevel"]] * 100, 3)) @@ -169,10 +169,8 @@ mlClassificationLogisticMultinomial <- function(jaspResults, dataset, options, . classificationResult <- jaspResults[["classificationResult"]]$object model <- classificationResult[["model"]] if (classificationResult[["family"]] == "binomial") { - coefs <- summary(model)$coefficients - conf_int <- confint(model, level = options[["coefTableConfIntLevel"]]) - coefs <- cbind(coefs, lower = conf_int[, 1], upper = conf_int[, 2]) - colnames(coefs) <- c("est", "se", "t", "p", "lower", "upper") + coefs <- cbind(coef(summary(model)), confint(model, level = options[["coefTableConfIntLevel"]])) + colnames(coefs) <- c("est", "se", "z", "p", "lower", "upper") vars <- rownames(coefs) for (i in seq_along(vars)) { if (!(vars[i] %in% options[["predictors"]]) && vars[i] != "(Intercept)") { @@ -184,8 +182,8 @@ mlClassificationLogisticMultinomial <- function(jaspResults, dataset, options, . } rownames(coefs) <- vars } else { - coefs <- cbind(model$coefficients, confint(model[["original"]], level = options[["coefTableConfIntLevel"]])) - colnames(coefs) <- c("est", "lower", "upper") + coefs <- cbind(VGAM::coef(VGAM::summaryvglm(model[["original"]])), confint(model[["original"]], level = options[["coefTableConfIntLevel"]])) + colnames(coefs) <- c("est", "se", "z", "p", "lower", "upper") vars <- rownames(coefs) for (i in seq_along(vars)) { for (j in c("(Intercept)", options[["predictors"]])) { @@ -206,16 +204,9 @@ mlClassificationLogisticMultinomial <- function(jaspResults, dataset, options, . } table[["var"]] <- rownames(coefs) table[["coefs"]] <- as.numeric(coefs[, "est"]) - if (classificationResult[["family"]] == "binomial") { - table[["se"]] <- as.numeric(coefs[, "se"]) - table[["t"]] <- as.numeric(coefs[, "t"]) - table[["p"]] <- as.numeric(coefs[, "p"]) - } else { - table[["se"]] <- rep(".", nrow(coefs)) - table[["t"]] <- rep(".", nrow(coefs)) - table[["p"]] <- rep(".", nrow(coefs)) - table$addFootnote(gettext("Standard errors, t-values and p-values are not available in multinomial regression.")) - } + table[["se"]] <- as.numeric(coefs[, "se"]) + table[["z"]] <- as.numeric(coefs[, "z"]) + table[["p"]] <- as.numeric(coefs[, "p"]) if (options[["coefTableConfInt"]]) { table[["lower"]] <- coefs[, "lower"] table[["upper"]] <- coefs[, "upper"] diff --git a/tests/testthat/test-mlclassificationlogisticmultinomial.R b/tests/testthat/test-mlclassificationlogisticmultinomial.R index 94e4ec19..43947711 100644 --- a/tests/testthat/test-mlclassificationlogisticmultinomial.R +++ b/tests/testthat/test-mlclassificationlogisticmultinomial.R @@ -4,6 +4,8 @@ context("Machine Learning Logistic / Multinomial Regression Classification") options <- initMlOptions("mlClassificationLogisticMultinomial") options$addIndicator <- FALSE options$addPredictions <- FALSE +options$coefTable <- TRUE +options$coefTableConfInt <- TRUE options$classProportionsTable <- TRUE options$holdoutData <- "holdoutManual" options$modelOptimization <- "manual" @@ -42,6 +44,32 @@ test_that("Model Summary: Multinomial Regression Classification table results ma list("Multinomial", "Logit", 30, 120, 1)) }) +test_that("Regression Coefficients table results match", { + table <- results[["results"]][["coefTable"]][["data"]] + jaspTools::expect_equal_tables(table, + list(-0.42267961226173, -4222.39134720741, 0.999843438690878, 2154.10522892129, + 4221.54598798288, "(Intercept) : setosa", -0.000196220503337896, + 17.8212683047785, 0.307905475852188, 0.0461059069011903, 8.93555339132221, + 35.3346311337047, "(Intercept) : versicolor", 1.99442245200903, + 7.95554619079237, -9967.35637439074, 0.998752813237481, 5089.53837890162, + 9983.26746677233, "Sepal.Length : setosa", 0.00156311743787445, + 1.89635371704686, -2.10497557567811, 0.352947362657274, 2.04153205073509, + 5.89768300977184, "Sepal.Length : versicolor", 0.928887555972511, + 4.70317443290632, -2533.61853897752, 0.997102445459767, 1295.08589618604, + 2543.02488784333, "Sepal.Width : setosa", 0.00363155405116905, + 2.30296713825466, -1.92502652612214, 0.285708816748704, 2.15717926335722, + 6.53096080263146, "Sepal.Width : versicolor", 1.0675826424692, + -38.7099474348317, -15129.1258351686, 0.995988491696961, 7699.33325651139, + 15051.705940299, "Petal.Length : setosa", -0.00502770124959773, + -14.4906493502573, -31.1295653321292, 0.087838572245474, 8.48939884258976, + 2.14826663161466, "Petal.Length : versicolor", -1.7069111275065, + -24.205993975534, -10348.1025945441, 0.996333380037363, 5267.39097350877, + 10299.690606593, "Petal.Width : setosa", -0.00459544280978437, + -12.7574125117412, -27.3435062790441, 0.0864846111196134, 7.44202132404277, + 1.82868125556169, "Petal.Width : versicolor", -1.7142402522452 + )) +}) + test_that("Confusion Matrix table results match", { table <- results[["results"]][["confusionTable"]][["data"]] jaspTools::expect_equal_tables(table, From ddd77838a418fbdf886ca7acb0c8e5879f17168c Mon Sep 17 00:00:00 2001 From: Koen Derks Date: Wed, 11 Sep 2024 09:51:20 +0200 Subject: [PATCH 24/30] Add test for logistic --- .../data-split-1.svg | 42 +++++++ .../{data-split.svg => data-split-2.svg} | 2 +- ...test-mlclassificationlogisticmultinomial.R | 103 +++++++++++++++++- 3 files changed, 144 insertions(+), 3 deletions(-) create mode 100644 tests/testthat/_snaps/mlclassificationlogisticmultinomial/data-split-1.svg rename tests/testthat/_snaps/mlclassificationlogisticmultinomial/{data-split.svg => data-split-2.svg} (95%) diff --git a/tests/testthat/_snaps/mlclassificationlogisticmultinomial/data-split-1.svg b/tests/testthat/_snaps/mlclassificationlogisticmultinomial/data-split-1.svg new file mode 100644 index 00000000..2a5fca82 --- /dev/null +++ b/tests/testthat/_snaps/mlclassificationlogisticmultinomial/data-split-1.svg @@ -0,0 +1,42 @@ + + + + + + + + + + + + + + + + + + + + + + + +Train: 160 +Test: 40 +Total: 200 + + + + + +data-split-1 + + diff --git a/tests/testthat/_snaps/mlclassificationlogisticmultinomial/data-split.svg b/tests/testthat/_snaps/mlclassificationlogisticmultinomial/data-split-2.svg similarity index 95% rename from tests/testthat/_snaps/mlclassificationlogisticmultinomial/data-split.svg rename to tests/testthat/_snaps/mlclassificationlogisticmultinomial/data-split-2.svg index 25ca32f6..35b13f56 100644 --- a/tests/testthat/_snaps/mlclassificationlogisticmultinomial/data-split.svg +++ b/tests/testthat/_snaps/mlclassificationlogisticmultinomial/data-split-2.svg @@ -37,6 +37,6 @@ -data-split +data-split-2 diff --git a/tests/testthat/test-mlclassificationlogisticmultinomial.R b/tests/testthat/test-mlclassificationlogisticmultinomial.R index 43947711..4b2189b4 100644 --- a/tests/testthat/test-mlclassificationlogisticmultinomial.R +++ b/tests/testthat/test-mlclassificationlogisticmultinomial.R @@ -1,6 +1,105 @@ context("Machine Learning Logistic / Multinomial Regression Classification") -# Test fixed model ############################################################# +# Test logistic regression model ############################################ +options <- initMlOptions("mlClassificationLogisticMultinomial") +options$addIndicator <- FALSE +options$addPredictions <- FALSE +options$coefTable <- TRUE +options$coefTableConfInt <- TRUE +options$classProportionsTable <- TRUE +options$holdoutData <- "holdoutManual" +options$modelOptimization <- "manual" +options$modelValid <- "validationManual" +options$predictionsColumn <- "" +options$predictors <- c("x", "y") +options$predictors.types <- rep("scale", 2) +options$saveModel <- FALSE +options$savePath <- "" +options$setSeed <- TRUE +options$target <- "color" +options$target.types <- "nominal" +options$testDataManual <- 0.2 +options$testIndicatorColumn <- "" +options$testSetIndicatorVariable <- "" +options$validationDataManual <- 0.2 +options$validationMeasures <- TRUE +options$tableShap <- TRUE +options$fromIndex <- 1 +options$toIndex <- 5 +options$featureImportanceTable <- TRUE +options$seed <- 2 +set.seed(1) +results <- jaspTools::runAnalysis("mlClassificationLogisticMultinomial", "spiral.csv", options) + +test_that("Class Proportions table results match", { + table <- results[["results"]][["classProportionsTable"]][["data"]] + jaspTools::expect_equal_tables(table, + list(0.5, "Black", 0.575, 0.48125, 0.5, "Red", 0.425, 0.51875)) +}) + +test_that("Model Summary: Logistic Regression Classification table results match", { + table <- results[["results"]][["classificationTable"]][["data"]] + jaspTools::expect_equal_tables(table, + list("Binomial", "Logit", 40, 160, 0.675)) +}) + +test_that("Regression Coefficients table results match", { + table <- results[["results"]][["coefTable"]][["data"]] + jaspTools::expect_equal_tables(table, + list(0.0784909051640228, -0.241691087574101, 0.630847847956443, 0.163341057208965, + 0.400211265504598, "(Intercept)", 0.480533838247466, -0.0733280693358763, + -0.389515022216725, 0.647229030486731, 0.160239637348256, 0.241355688318239, + "x", -0.457615047995329, -0.520574613112014, -0.864252780481388, + 0.00221841580161717, 0.170160350681228, -0.194437228429846, + "y", -3.0593179376272)) +}) + +test_that("Confusion Matrix table results match", { + table <- results[["results"]][["confusionTable"]][["data"]] + jaspTools::expect_equal_tables(table, + list("Observed", "Black", 14, 9, "", "Red", 4, 13)) +}) + +test_that("Feature Importance Metrics table results match", { + table <- results[["results"]][["featureImportanceTable"]][["data"]] + jaspTools::expect_equal_tables(table, + list(0.49553121577218, "y", 0.367228915662651, "x")) +}) + +test_that("Data Split plot matches", { + plotName <- results[["results"]][["plotDataSplit"]][["data"]] + testPlot <- results[["state"]][["figures"]][[plotName]][["obj"]] + jaspTools::expect_equal_plots(testPlot, "data-split-1") +}) + +test_that("Additive Explanations for Predictions of Test Set Cases table results match", { + table <- results[["results"]][["tableShap"]][["data"]] + jaspTools::expect_equal_tables(table, + list(0.518749999999986, 1, "Red (0.511)", -0.0104280206091216, 0.00223442804446772, + 0.481250000000014, 2, "Black (0.505)", 0.0113934073181472, 0.0126826432048778, + 0.518749999999986, 3, "Red (0.694)", -0.0215827660760237, 0.196894941069013, + 0.518749999999986, 4, "Red (0.707)", -0.0148699072618508, 0.203086871619157, + 0.481250000000014, 5, "Black (0.584)", 0.00703631455756359, + 0.0957186107999987)) +}) + +test_that("Model Performance Metrics table results match", { + table <- results[["results"]][["validationMeasures"]][["data"]] + jaspTools::expect_equal_tables(table, + list(0.675, 0.686700767263427, 0.682926829268293, 0.222222222222222, + 0.391304347826087, 0.409090909090909, 0.235294117647059, "Black", + 0.371036713180216, 0.590909090909091, 0.777777777777778, 0.608695652173913, + 0.45, 23, 0.764705882352941, 0.823529411764706, 0.675, 0.686700767263427, + 0.666666666666667, 0.409090909090909, 0.235294117647059, 0.222222222222222, + 0.391304347826087, "Red", 0.371036713180216, 0.777777777777778, + 0.590909090909091, 0.764705882352941, 0.55, 17, 0.608695652173913, + 0.590909090909091, 0.675, 0.686700767263427, 0.676016260162602, + 0.315656565656566, 0.313299232736573, 0.315656565656566, 0.313299232736573, + "Average / Total", 0.371036713180216, 0.684343434343434, 0.698358585858586, + 0.675, 1, 40, 0.686700767263427, 0.707219251336898)) +}) + +# Test multinomial regression model ############################################ options <- initMlOptions("mlClassificationLogisticMultinomial") options$addIndicator <- FALSE options$addPredictions <- FALSE @@ -88,7 +187,7 @@ test_that("Feature Importance Metrics table results match", { test_that("Data Split plot matches", { plotName <- results[["results"]][["plotDataSplit"]][["data"]] testPlot <- results[["state"]][["figures"]][[plotName]][["obj"]] - jaspTools::expect_equal_plots(testPlot, "data-split") + jaspTools::expect_equal_plots(testPlot, "data-split-2") }) test_that("Additive Explanations for Predictions of Test Set Cases table results match", { From 2928da052b3ddbcbebbd8407592abb600179cf7f Mon Sep 17 00:00:00 2001 From: Koen Derks Date: Wed, 11 Sep 2024 10:38:13 +0200 Subject: [PATCH 25/30] Support different link functions --- R/commonMachineLearningClassification.R | 8 ++++---- R/mlClassificationLogisticMultinomial.R | 19 ++++++++++--------- R/mlPrediction.R | 2 +- .../mlClassificationLogisticMultinomial.qml | 14 ++++++++++++++ 4 files changed, 29 insertions(+), 14 deletions(-) diff --git a/R/commonMachineLearningClassification.R b/R/commonMachineLearningClassification.R index a9ec7b0e..6345d19d 100644 --- a/R/commonMachineLearningClassification.R +++ b/R/commonMachineLearningClassification.R @@ -33,7 +33,7 @@ "noOfTrees", "maxTrees", "baggingFraction", "noOfPredictors", "numberOfPredictors", # Random forest "complexityParameter", "degree", "gamma", "cost", "tolerance", "epsilon", "maxCost", # Support vector machine "smoothingParameter", # Naive Bayes - "intercept" # Logistic + "intercept", "link" # Logistic ) if (includeSaveOptions) { opt <- c(opt, "saveModel", "savePath") @@ -588,7 +588,7 @@ levels(predictions) <- unique(dataset[, options[["target"]]]) } else if (type == "logistic") { if (classificationResult[["family"]] == "binomial") { - fit <- glm(formula, data = dataset, family = stats::binomial(link = "logit")) + fit <- stats::glm(formula, data = dataset, family = stats::binomial(link = options[["link"]])) predictions <- as.factor(round(predict(fit, grid, type = "response"), 0)) levels(predictions) <- unique(dataset[, options[["target"]]]) } else { @@ -746,7 +746,7 @@ fit <- e1071::naiveBayes(formula = formula, data = typeData, laplace = options[["smoothingParameter"]]) score <- max.col(predict(fit, test, type = "raw")) } else if (type == "logistic") { - fit <- glm(formula, data = typeData, family = stats::binomial(link = "logit")) + fit <- stats::glm(formula, data = typeData, family = stats::binomial(link = options[["link"]])) score <- round(predict(fit, test, type = "response"), 0) } pred <- ROCR::prediction(score, actual.class) @@ -1167,7 +1167,7 @@ } .calcAUCScore.logisticClassification <- function(AUCformula, test, typeData, options, jaspResults, ...) { - fit <- glm(AUCformula, data = typeData, family = stats::binomial(link = "logit")) + fit <- stats::glm(AUCformula, data = typeData, family = stats::binomial(link = options[["link"]])) score <- round(predict(fit, test, type = "response"), 0) return(score) } diff --git a/R/mlClassificationLogisticMultinomial.R b/R/mlClassificationLogisticMultinomial.R index 3caf82b5..1c570970 100644 --- a/R/mlClassificationLogisticMultinomial.R +++ b/R/mlClassificationLogisticMultinomial.R @@ -88,18 +88,18 @@ mlClassificationLogisticMultinomial <- function(jaspResults, dataset, options, . } if (nlevels(trainingSet[[options[["target"]]]]) == 2) { family = "binomial" - linkFunction <- "logit" - trainingFit <- glm(formula, data = trainingSet, family = stats::binomial(link = linkFunction)) + linkFunction <- options[["link"]] + trainingFit <- stats::glm(formula, data = trainingSet, family = stats::binomial(link = linkFunction)) # Use the specified model to make predictions for dataset - testPredictions <- .mlClassificationLogisticPredictions(trainingSet, options, predict(trainingFit, newdata = testSet, type = "response")) - dataPredictions <- .mlClassificationLogisticPredictions(trainingSet, options, predict(trainingFit, newdata = dataset, type = "response")) + testPredictions <- .mlClassificationLogisticPredictions(trainingSet, options, stats::predict(trainingFit, newdata = testSet, type = "response")) + dataPredictions <- .mlClassificationLogisticPredictions(trainingSet, options, stats::predict(trainingFit, newdata = dataset, type = "response")) } else { family <- "multinomial" linkFunction <- "logit" trainingFit <- VGAM::vglm(formula, data = trainingSet, family = VGAM::multinomial()) # Use the specified model to make predictions for dataset - testPredictions <- .mlClassificationMultinomialPredictions(trainingSet, options, predict(trainingFit, newdata = testSet)) - dataPredictions <- .mlClassificationMultinomialPredictions(trainingSet, options, predict(trainingFit, newdata = dataset)) + testPredictions <- .mlClassificationMultinomialPredictions(trainingSet, options, VGAM::predict(trainingFit, newdata = testSet)) + dataPredictions <- .mlClassificationMultinomialPredictions(trainingSet, options, VGAM::predict(trainingFit, newdata = dataset)) } # Create results object result <- list() @@ -108,6 +108,7 @@ mlClassificationLogisticMultinomial <- function(jaspResults, dataset, options, . result[["link"]] <- linkFunction if (family == "binomial") { result[["model"]] <- trainingFit + result[["model"]]$link <- result[["link"]] } else { model <- lapply(slotNames(trainingFit), function(x) slot(trainingFit, x)) names(model) <- slotNames(trainingFit) @@ -211,14 +212,14 @@ mlClassificationLogisticMultinomial <- function(jaspResults, dataset, options, . table[["lower"]] <- coefs[, "lower"] table[["upper"]] <- coefs[, "upper"] } - if (options[["formula"]]) { # TODO FOR MULTINOMIAL + if (options[["formula"]]) { if (classificationResult[["family"]] == "binomial") { one_cat <- levels(factor(classificationResult[["train"]][[options[["target"]]]]))[2] if (options[["intercept"]]) { - regform <- paste0("logit(p", options[["target"]], " = ", one_cat, ") = ", round(as.numeric(coefs[, 1])[1], 3)) + regform <- paste0(options[["link"]], "(p", options[["target"]], " = ", one_cat, ") = ", round(as.numeric(coefs[, 1])[1], 3)) start <- 2 } else { - regform <- paste0("logit(p", options[["target"]], " = ", one_cat, ") = ") + regform <- paste0(options[["link"]], "(p", options[["target"]], " = ", one_cat, ") = ") start <- 1 } for (i in start:nrow(coefs)) { diff --git a/R/mlPrediction.R b/R/mlPrediction.R index cad9e30c..68b864c0 100644 --- a/R/mlPrediction.R +++ b/R/mlPrediction.R @@ -388,7 +388,7 @@ is.jaspMachineLearning <- function(x) { row[["lambda"]] <- model[["lambda.min"]] } else if (inherits(model, "glm")) { row[["family"]] <- gettext("Binomial") - row[["link"]] <- gettext("Logit") + row[["link"]] <- paste0(toupper(substr(model[["link"]], 1, 1)), substr(model[["link"]], 2, nchar(model[["link"]]))) } else if (inherits(model, "vglm")) { row[["family"]] <- gettext("Multinomial") row[["link"]] <- gettext("Logit") diff --git a/inst/qml/mlClassificationLogisticMultinomial.qml b/inst/qml/mlClassificationLogisticMultinomial.qml index 2df65239..9efa3329 100644 --- a/inst/qml/mlClassificationLogisticMultinomial.qml +++ b/inst/qml/mlClassificationLogisticMultinomial.qml @@ -65,6 +65,20 @@ Form { title: qsTr("Algorithmic Settings") + DropDown + { + name: "link" + indexDefaultValue: 0 + label: qsTr("Link function (for binary classification)") + values: + [ + { label: qsTr("Logit"), value: "logit"}, + { label: qsTr("Probit"), value: "probit"}, + { label: qsTr("Cauchit"), value: "cauchit"}, + { label: qsTr("C log-log"), value: "cloglog"}, + { label: qsTr("Log"), value: "log"} + ] + } REGU.Intercept { } UI.ScaleVariables { } UI.SetSeed { } From e15d7eaac2625052af503f66294b47e009046291 Mon Sep 17 00:00:00 2001 From: Koen Derks Date: Wed, 11 Sep 2024 10:43:12 +0200 Subject: [PATCH 26/30] Change decode method --- R/mlPrediction.R | 3 ++- tests/testthat/test-mlclassificationlogisticmultinomial.R | 2 ++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/R/mlPrediction.R b/R/mlPrediction.R index 68b864c0..00043aba 100644 --- a/R/mlPrediction.R +++ b/R/mlPrediction.R @@ -259,7 +259,8 @@ is.jaspMachineLearning <- function(x) { return(model) } .decodeJaspMLobject.glm <- function(model) { - formula <- formula(paste(decodeColNames(as.character(model$terms)[2]), "~", paste0(decodeColNames(strsplit(as.character(model$terms)[3], split = " + ", fixed = TRUE)[[1]]), collapse = " + "))) + vars <- all.vars(stats::terms(model)) + formula <- formula(paste(decodeColNames(vars[1]), "~", paste0(decodeColNames(vars[-1]), collapse = " + "))) model$terms <- stats::terms(formula) return(model) } diff --git a/tests/testthat/test-mlclassificationlogisticmultinomial.R b/tests/testthat/test-mlclassificationlogisticmultinomial.R index 4b2189b4..38434d4f 100644 --- a/tests/testthat/test-mlclassificationlogisticmultinomial.R +++ b/tests/testthat/test-mlclassificationlogisticmultinomial.R @@ -8,6 +8,7 @@ options$coefTable <- TRUE options$coefTableConfInt <- TRUE options$classProportionsTable <- TRUE options$holdoutData <- "holdoutManual" +options$link <- "logit" options$modelOptimization <- "manual" options$modelValid <- "validationManual" options$predictionsColumn <- "" @@ -107,6 +108,7 @@ options$coefTable <- TRUE options$coefTableConfInt <- TRUE options$classProportionsTable <- TRUE options$holdoutData <- "holdoutManual" +options$link <- "logit" options$modelOptimization <- "manual" options$modelValid <- "validationManual" options$predictionsColumn <- "" From b3f438326c48177f65a45f0c2e4a0ba7943cd7ec Mon Sep 17 00:00:00 2001 From: Koen Derks Date: Wed, 11 Sep 2024 13:02:45 +0200 Subject: [PATCH 27/30] Complementary log log --- inst/qml/mlClassificationLogisticMultinomial.qml | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/inst/qml/mlClassificationLogisticMultinomial.qml b/inst/qml/mlClassificationLogisticMultinomial.qml index 9efa3329..f63502c4 100644 --- a/inst/qml/mlClassificationLogisticMultinomial.qml +++ b/inst/qml/mlClassificationLogisticMultinomial.qml @@ -67,16 +67,16 @@ Form DropDown { - name: "link" - indexDefaultValue: 0 - label: qsTr("Link function (for binary classification)") + name: "link" + indexDefaultValue: 0 + label: qsTr("Link function (for binary classification)") values: [ - { label: qsTr("Logit"), value: "logit"}, - { label: qsTr("Probit"), value: "probit"}, - { label: qsTr("Cauchit"), value: "cauchit"}, - { label: qsTr("C log-log"), value: "cloglog"}, - { label: qsTr("Log"), value: "log"} + { label: qsTr("Logit"), value: "logit"}, + { label: qsTr("Probit"), value: "probit"}, + { label: qsTr("Cauchit"), value: "cauchit"}, + { label: qsTr("Complimentary log-log"), value: "cloglog"}, + { label: qsTr("Log"), value: "log"} ] } REGU.Intercept { } From 6d3dc36a553e5bf3fa43fe66cf65ca33b8e6a0d4 Mon Sep 17 00:00:00 2001 From: Koen Derks Date: Wed, 11 Sep 2024 13:31:39 +0200 Subject: [PATCH 28/30] Fix little bug in regularized linear regression where intercept was incorrectly shown in equation --- R/mlRegressionRegularized.R | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/R/mlRegressionRegularized.R b/R/mlRegressionRegularized.R index b2a11b6d..2f02fb52 100644 --- a/R/mlRegressionRegularized.R +++ b/R/mlRegressionRegularized.R @@ -166,12 +166,14 @@ mlRegressionRegularized <- function(jaspResults, dataset, options, ...) { if (options[["intercept"]]) { regform <- paste0(options[["target"]], " = ", round(as.numeric(coefs[, 1])[1], 3)) start <- 2 + form_coefs <- coefs } else { regform <- paste0(options[["target"]], " = ") start <- 1 + form_coefs <- coefs[-1, , drop = FALSE] # There is still a row with (Intercept) but its value is 0 } - for (i in start:nrow(coefs)) { - regform <- paste0(regform, if (round(as.numeric(coefs[, 1])[i], 3) < 0) " - " else " + ", abs(round(as.numeric(coefs[, 1])[i], 3)), " x ", rownames(coefs)[i]) + for (i in start:nrow(form_coefs)) { + regform <- paste0(regform, if (round(as.numeric(form_coefs[, 1])[i], 3) < 0) " - " else " + ", abs(round(as.numeric(form_coefs[, 1])[i], 3)), " x ", rownames(form_coefs)[i]) } result <- list() result[["model"]] <- trainingFit From f438dc89494030cc9af07839f21d98aa4ec82dc0 Mon Sep 17 00:00:00 2001 From: Koen Derks Date: Wed, 11 Sep 2024 13:45:07 +0200 Subject: [PATCH 29/30] Ensure first "+" is not shown if there is no intercept --- R/mlClassificationLogisticMultinomial.R | 4 ++-- R/mlRegressionLinear.R | 2 +- R/mlRegressionRegularized.R | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/R/mlClassificationLogisticMultinomial.R b/R/mlClassificationLogisticMultinomial.R index 1c570970..fc4f18ee 100644 --- a/R/mlClassificationLogisticMultinomial.R +++ b/R/mlClassificationLogisticMultinomial.R @@ -223,7 +223,7 @@ mlClassificationLogisticMultinomial <- function(jaspResults, dataset, options, . start <- 1 } for (i in start:nrow(coefs)) { - regform <- paste0(regform, if (round(as.numeric(coefs[, 1])[i], 3) < 0) " - " else " + ", abs(round(as.numeric(coefs[, 1])[i], 3)), " x ", rownames(coefs)[i]) + regform <- paste0(regform, if (round(as.numeric(coefs[, 1])[i], 3) < 0) " - " else (if (!options[["intercept"]] && i == 1) "" else " + "), abs(round(as.numeric(coefs[, 1])[i], 3)), " x ", rownames(coefs)[i]) } } else { regform <- NULL @@ -239,7 +239,7 @@ mlClassificationLogisticMultinomial <- function(jaspResults, dataset, options, . start <- i } for (j in seq(start, nrow(coefs), by = nlevs - 1)) { - part <- paste0(part, if (round(as.numeric(coefs[, 1])[j], 3) < 0) " - " else " + ", abs(round(as.numeric(coefs[, 1])[j], 3)), " x ", strsplit(rownames(coefs)[j], " : ")[[1]][1]) + part <- paste0(part, if (round(as.numeric(coefs[, 1])[j], 3) < 0) " - " else (if (!options[["intercept"]] && j == i) "" else " + "), abs(round(as.numeric(coefs[, 1])[j], 3)), " x ", strsplit(rownames(coefs)[j], " : ")[[1]][1]) } if (i == 1) { regform <- paste0(regform, part, "\n\n") diff --git a/R/mlRegressionLinear.R b/R/mlRegressionLinear.R index 2026cc77..e3b9b54f 100644 --- a/R/mlRegressionLinear.R +++ b/R/mlRegressionLinear.R @@ -101,7 +101,7 @@ mlRegressionLinear <- function(jaspResults, dataset, options, ...) { start <- 1 } for (i in start:nrow(coefs)) { - regform <- paste0(regform, if (round(as.numeric(coefs[, 1])[i], 3) < 0) " - " else " + ", abs(round(as.numeric(coefs[, 1])[i], 3)), " x ", vars[i]) + regform <- paste0(regform, if (round(as.numeric(coefs[, 1])[i], 3) < 0) " - " else (if (!options[["intercept"]] && i == 1) "" else " + "), abs(round(as.numeric(coefs[, 1])[i], 3)), " x ", vars[i]) } # Create results object result <- list() diff --git a/R/mlRegressionRegularized.R b/R/mlRegressionRegularized.R index 2f02fb52..2e5af926 100644 --- a/R/mlRegressionRegularized.R +++ b/R/mlRegressionRegularized.R @@ -173,7 +173,7 @@ mlRegressionRegularized <- function(jaspResults, dataset, options, ...) { form_coefs <- coefs[-1, , drop = FALSE] # There is still a row with (Intercept) but its value is 0 } for (i in start:nrow(form_coefs)) { - regform <- paste0(regform, if (round(as.numeric(form_coefs[, 1])[i], 3) < 0) " - " else " + ", abs(round(as.numeric(form_coefs[, 1])[i], 3)), " x ", rownames(form_coefs)[i]) + regform <- paste0(regform, if (round(as.numeric(form_coefs[, 1])[i], 3) < 0) " - " else (if (!options[["intercept"]] && i == 1) "" else " + "), abs(round(as.numeric(form_coefs[, 1])[i], 3)), " x ", rownames(form_coefs)[i]) } result <- list() result[["model"]] <- trainingFit From e9f8b74eea9d2e9917fe50aa3c42c79c06bf45c1 Mon Sep 17 00:00:00 2001 From: Koen Derks Date: Wed, 11 Sep 2024 13:54:57 +0200 Subject: [PATCH 30/30] Fix bug for single variable logistic without intercept --- R/mlClassificationLogisticMultinomial.R | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/R/mlClassificationLogisticMultinomial.R b/R/mlClassificationLogisticMultinomial.R index fc4f18ee..465e7ed5 100644 --- a/R/mlClassificationLogisticMultinomial.R +++ b/R/mlClassificationLogisticMultinomial.R @@ -170,7 +170,13 @@ mlClassificationLogisticMultinomial <- function(jaspResults, dataset, options, . classificationResult <- jaspResults[["classificationResult"]]$object model <- classificationResult[["model"]] if (classificationResult[["family"]] == "binomial") { - coefs <- cbind(coef(summary(model)), confint(model, level = options[["coefTableConfIntLevel"]])) + estimates <- coef(summary(model)) + conf_int <- confint(model, level = options[["coefTableConfIntLevel"]]) + if (!options[["intercept"]] && length(options[["predictors"]] == 1)) { + coefs <- cbind(estimates, conf_int[1], conf_int[2]) + } else { + coefs <- cbind(estimates, conf_int) + } colnames(coefs) <- c("est", "se", "z", "p", "lower", "upper") vars <- rownames(coefs) for (i in seq_along(vars)) { @@ -183,7 +189,9 @@ mlClassificationLogisticMultinomial <- function(jaspResults, dataset, options, . } rownames(coefs) <- vars } else { - coefs <- cbind(VGAM::coef(VGAM::summaryvglm(model[["original"]])), confint(model[["original"]], level = options[["coefTableConfIntLevel"]])) + estimates <- VGAM::coef(VGAM::summaryvglm(model[["original"]])) + conf_int <- confint(model[["original"]], level = options[["coefTableConfIntLevel"]]) + coefs <- cbind(estimates, conf_int) colnames(coefs) <- c("est", "se", "z", "p", "lower", "upper") vars <- rownames(coefs) for (i in seq_along(vars)) {