-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathmain.R
74 lines (57 loc) · 1.92 KB
/
main.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
# Header ----
# library(ggplot2)
library(data.table)
library(parallel)
library(cluster)
library(clusterCrit)
library(TSrepr)
library(kSamples)
library(rpart)
library(party)
library(smooth)
library(forecast)
library(doParallel)
library(randomForest)
library(dtwclust)
library(foreign)
# Data reading ----
# data must be in the format where time series streams are in rows of a matrix or a data.frame
# Get the London data directly from OpenML app
# Firstly, install these packages:
# install.packages("OpenML")
# install.packages("farff")
# Secondly, Read the data:
data <- OpenML::getOMLDataSet(data.id = 41060)
data <- as.data.table(data$data)
# Or read downloaded data from OpenML:
data <- as.data.table(read.arff("London_5months_5066ID.ARFF"))
# choose daily season
seas <- 48
# Offline batch clustering - benchmarks ----
source("batchClust.R")
batchClust(data, string = "London", k.min = 20, k.max = 30, method = "lm")
batchClust(data, string = "London", k.min = 20, k.max = 30, method = "dft")
batchClust(data, string = "London", k.min = 20, k.max = 30, method = "kshape")
# ClipStream ----
source("ClipStream.R")
testClipStream(data, string = "London", k.min = 20, k.max = 30, tresh = 1.5, alpha = 0.05)
# Simple aggregate forecasting ----
source("TestingForecasting.R")
data_sum <- colSums(data)
res_sim <- ForecastAggregatedSimple(data_sum)
err_sim <- computeMape(data_sum, res_sim)
gc()
write.table(res_sim_london, "res_sim.csv", row.names = F, col.names = F, quote = F)
# Evaluation of results ----
err_agg <- err_sim$ByDay
res_clipstream <- fread("result_of_clipstream.csv")
err_clip <- computeMape(data_sum, res_clipstream)
err_clip <- err_clip$ByDay
err_agg <- err_agg$ByDay
colMeans(err_agg)
colMeans(err_clip)
wilcErr <- function(x, y) {
wilcox.test(x, y, paired = T, alternative = "less")$p.value
}
round(as.matrix(sapply(seq_len(dim(err_agg)[2]),
function(i) wilcErr(err_clip[,i], err_agg[,i]))), digits = 6)