Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Logistic / Multinomial regression #369

Merged
merged 30 commits into from
Sep 13, 2024
Merged
Show file tree
Hide file tree
Changes from 24 commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
a83a279
First implementation
koenderks Sep 9, 2024
ac4d14e
Update mlClassificationLogistic.R
koenderks Sep 9, 2024
7c86ce2
Add intercept option
koenderks Sep 10, 2024
1818d85
Add coefficients table for logistic
koenderks Sep 10, 2024
140e5db
Update
koenderks Sep 10, 2024
79e2fe5
Change file names
koenderks Sep 10, 2024
775b948
Update mlClassificationLogisticMultinomial.R
koenderks Sep 10, 2024
4dc4a38
Update coef table
koenderks Sep 10, 2024
50a04d3
Make AUC work
koenderks Sep 10, 2024
5c3599e
Make roc plot work
koenderks Sep 10, 2024
ef552ae
Make decision boundary work
koenderks Sep 10, 2024
09b2b45
Start with prediction
koenderks Sep 10, 2024
82a9b8e
Prediction for logit regression
koenderks Sep 10, 2024
d9a3a59
Predictions for multinomial
koenderks Sep 10, 2024
712bb5d
Formula for multinomial
koenderks Sep 10, 2024
7fbe88c
Update table content
koenderks Sep 10, 2024
404bc87
explainers
koenderks Sep 10, 2024
2986b2c
Update mlClassificationLogisticMultinomial.R
koenderks Sep 10, 2024
edcff6b
Update test-mlclassificationlogisticmultinomial.R
koenderks Sep 10, 2024
c298a2d
Add info text
koenderks Sep 11, 2024
587ecd4
Update
koenderks Sep 11, 2024
5326f95
Update mlClassificationLogisticMultinomial.R
koenderks Sep 11, 2024
ba3ad28
coef table for multinomial
koenderks Sep 11, 2024
ddd7783
Add test for logistic
koenderks Sep 11, 2024
2928da0
Support different link functions
koenderks Sep 11, 2024
e15d7ea
Change decode method
koenderks Sep 11, 2024
b3f4383
Complementary log log
koenderks Sep 11, 2024
6d3dc36
Fix little bug in regularized linear regression where intercept was i…
koenderks Sep 11, 2024
f438dc8
Ensure first "+" is not shown if there is no intercept
koenderks Sep 11, 2024
e9f8b74
Fix bug for single variable logistic without intercept
koenderks Sep 11, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,8 @@ Imports:
rpart (>= 4.1.16),
ROCR,
Rtsne,
signal
signal,
VGAM
Suggests:
testthat
Remotes:
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ export(mlClassificationBoosting)
export(mlClassificationDecisionTree)
export(mlClassificationKnn)
export(mlClassificationLda)
export(mlClassificationLogisticMultinomial)
export(mlClassificationNaiveBayes)
export(mlClassificationNeuralNetwork)
export(mlClassificationRandomForest)
Expand Down
61 changes: 56 additions & 5 deletions R/commonMachineLearningClassification.R
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,8 @@
"mutationMethod", "survivalMethod", "elitismProportion", "candidates", # Neural network
"noOfTrees", "maxTrees", "baggingFraction", "noOfPredictors", "numberOfPredictors", # Random forest
"complexityParameter", "degree", "gamma", "cost", "tolerance", "epsilon", "maxCost", # Support vector machine
"smoothingParameter" # Naive Bayes
"smoothingParameter", # Naive Bayes
"intercept" # Logistic
)
if (includeSaveOptions) {
opt <- c(opt, "saveModel", "savePath")
Expand Down Expand Up @@ -62,7 +63,7 @@
if (type == "lda" || type == "randomForest" || type == "boosting") {
# Require at least 2 features
ready <- length(options[["predictors"]][options[["predictors"]] != ""]) >= 2 && options[["target"]] != ""
} else if (type == "knn" || type == "neuralnet" || type == "rpart" || type == "svm" || type == "naivebayes") {
} else if (type == "knn" || type == "neuralnet" || type == "rpart" || type == "svm" || type == "naivebayes" || type == "logistic") {
# Require at least 1 features
ready <- length(options[["predictors"]][options[["predictors"]] != ""]) >= 1 && options[["target"]] != ""
}
Expand Down Expand Up @@ -93,7 +94,8 @@
"neuralnet" = .neuralnetClassification(dataset, options, jaspResults),
"rpart" = .decisionTreeClassification(dataset, options, jaspResults),
"svm" = .svmClassification(dataset, options, jaspResults),
"naivebayes" = .naiveBayesClassification(dataset, options, jaspResults)
"naivebayes" = .naiveBayesClassification(dataset, options, jaspResults),
"logistic" = .logisticMultinomialClassification(dataset, options, jaspResults)
)
})
if (isTryError(p)) { # Fail gracefully
Expand All @@ -116,7 +118,8 @@
"neuralnet" = gettext("Neural Network Classification"),
"rpart" = gettext("Decision Tree Classification"),
"svm" = gettext("Support Vector Machine Classification"),
"naivebayes" = gettext("Naive Bayes Classification")
"naivebayes" = gettext("Naive Bayes Classification"),
"logistic" = gettext("Logistic / Multinomial Regression Classification")
)
tableTitle <- gettextf("Model Summary: %1$s", title)
table <- createJaspTable(tableTitle)
Expand Down Expand Up @@ -147,6 +150,9 @@
table$addColumnInfo(name = "vectors", title = gettext("Support Vectors"), type = "integer")
} else if (type == "naivebayes") {
table$addColumnInfo(name = "smoothing", title = gettext("Smoothing"), type = "number")
} else if (type == "logistic") {
table$addColumnInfo(name = "family", title = gettext("Family"), type = "string")
table$addColumnInfo(name = "link", title = gettext("Link"), type = "string")
}
# Add common columns
table$addColumnInfo(name = "nTrain", title = gettext("n(Train)"), type = "integer")
Expand All @@ -164,7 +170,7 @@
}
# If no analysis is run, specify the required variables in a footnote
if (!ready) {
table$addFootnote(gettextf("Please provide a target variable and at least %i feature variable(s).", if (type == "knn" || type == "neuralnet" || type == "rpart" || type == "svm") 1L else 2L))
table$addFootnote(gettextf("Please provide a target variable and at least %i feature variable(s).", if (type == "knn" || type == "neuralnet" || type == "rpart" || type == "svm" || type == "logistic") 1L else 2L))
}
if (options[["savePath"]] != "") {
validNames <- (length(grep(" ", decodeColNames(colnames(dataset)))) == 0) && (length(grep("_", decodeColNames(colnames(dataset)))) == 0)
Expand Down Expand Up @@ -312,6 +318,22 @@
testAcc = classificationResult[["testAcc"]]
)
table$addRows(row)
} else if (type == "logistic") {
if (classificationResult[["family"]] == "binomial") {
table$title <- gettext("Model Summary: Logistic Regression Classification")
} else {
table$title <- gettext("Model Summary: Multinomial Regression Classification")
}
family <- classificationResult[["family"]]
link <- classificationResult[["link"]]
row <- data.frame(
family = paste0(toupper(substr(family, 1, 1)), substr(family, 2, nchar(family))),
link = paste0(toupper(substr(link, 1, 1)), substr(link, 2, nchar(link))),
nTrain = nTrain,
nTest = classificationResult[["ntest"]],
testAcc = classificationResult[["testAcc"]]
)
table$addRows(row)
}
# Save the applied model if requested
if (options[["saveModel"]] && options[["savePath"]] != "") {
Expand Down Expand Up @@ -564,6 +586,26 @@
fit <- e1071::naiveBayes(formula, data = dataset, laplace = options[["smoothingParameter"]])
predictions <- as.factor(max.col(predict(fit, newdata = grid, type = "raw")))
levels(predictions) <- unique(dataset[, options[["target"]]])
} else if (type == "logistic") {
if (classificationResult[["family"]] == "binomial") {
fit <- glm(formula, data = dataset, family = stats::binomial(link = "logit"))
predictions <- as.factor(round(predict(fit, grid, type = "response"), 0))
levels(predictions) <- unique(dataset[, options[["target"]]])
} else {
fit <- VGAM::vglm(formula, data = dataset, family = VGAM::multinomial())
logodds <- predict(fit, newdata = grid)
ncategories <- ncol(logodds) + 1
probabilities <- matrix(0, nrow = nrow(logodds), ncol = ncategories)
for (i in seq_len(ncategories - 1)) {
probabilities[, i] <- exp(logodds[, i])
}
probabilities[, ncategories] <- 1
row_sums <- rowSums(probabilities)
probabilities <- probabilities / row_sums
predicted_columns <- apply(probabilities, 1, which.max)
categories <- levels(dataset[[options[["target"]]]])
predictions <- as.factor(categories[predicted_columns])
}
}
shapes <- rep(21, nrow(dataset))
if (type == "svm") {
Expand Down Expand Up @@ -703,6 +745,9 @@
} else if (type == "naivebayes") {
fit <- e1071::naiveBayes(formula = formula, data = typeData, laplace = options[["smoothingParameter"]])
score <- max.col(predict(fit, test, type = "raw"))
} else if (type == "logistic") {
fit <- glm(formula, data = typeData, family = stats::binomial(link = "logit"))
score <- round(predict(fit, test, type = "response"), 0)
}
pred <- ROCR::prediction(score, actual.class)
nbperf <- ROCR::performance(pred, "tpr", "fpr")
Expand Down Expand Up @@ -1120,3 +1165,9 @@
score <- max.col(predict(fit, test, type = "raw"))
return(score)
}

.calcAUCScore.logisticClassification <- function(AUCformula, test, typeData, options, jaspResults, ...) {
fit <- glm(AUCformula, data = typeData, family = stats::binomial(link = "logit"))
score <- round(predict(fit, test, type = "response"), 0)
return(score)
}
Loading
Loading