summaryrefslogtreecommitdiff
path: root/scripts/modelling.R
diff options
context:
space:
mode:
authorMike Vink <mike1994vink@gmail.com>2021-05-02 17:33:26 +0200
committerMike Vink <mike1994vink@gmail.com>2021-05-02 17:33:26 +0200
commitbf1adece8aeb48e136085233d2f5ff2f9600eaf5 (patch)
tree6a46b0c7e7fbea6a85c0e44714e0076251e82cac /scripts/modelling.R
parentde4565fe9290ec1f1031eed6f7d067794df53166 (diff)
update
Diffstat (limited to 'scripts/modelling.R')
-rw-r--r--scripts/modelling.R39
1 files changed, 39 insertions, 0 deletions
diff --git a/scripts/modelling.R b/scripts/modelling.R
new file mode 100644
index 0000000..b1fc746
--- /dev/null
+++ b/scripts/modelling.R
@@ -0,0 +1,39 @@
+library(caret)
+library(tidyverse)
+library(MLeval)
+
+source("./data_prep.R")
+
+data_list <- sets_partitions
+results <- list()
+models <- c("rrlda", "naive_bayes", "rf", "regLogistic")
+fitControl <- trainControl( ## 10-fold CV
+ method = "repeatedcv",
+ number = 10,
+ classProbs = TRUE,
+ savePredictions = TRUE,
+ repeats = 2
+)
+for (model in models) {
+ dataset = 1
+ # for (data in data_list) {
+ for (data in data_list[c(14, 16, 19)]) {
+ print(paste("Training", model, "on dataset", dataset))
+ train <- data[["train"]]
+ X_train <- as.data.frame(train[-c(1, 2)])
+ Y_train <- train[c(2)][[1]]
+ levels(Y_train) <- c("Low", "High")
+ set.seed(13121994)
+ model_trained <- train(
+ X_train,
+ y = Y_train,
+ method = model,
+ trControl = fitControl
+ )
+ results[[model]][[dataset]] <- model_trained
+ dataset = dataset + 1
+ }
+}
+save(results, file="./modelling_results_withrrlda.RData")
+# save(results, file="./modelling_results.RData")
+