-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdawum_package.R
125 lines (100 loc) · 5.25 KB
/
dawum_package.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
call_dawum_api <- function(cache_data = T, use_cached = F) {
pacman::p_load("httr", "xml2", "readr", "here", "dplyr", "tidyr", "purrr")
if (file.exists(here("dawum_api", "cache.txt"))){
cache <- readr::read_file(here("dawum_api", "cache.txt"))
} else {
cache <- "Cache Datei fehlt"
}
message(paste0("Date Cache: \t", cache, "\n Date API: \t", readr::read_file("https://api.dawum.de/last_update.txt")))
if (use_cached) {
if (file.exists(here("dawum_api", "dawum.rds"))){
df_long <- read_rds(here("dawum_api", "dawum.rds"))
} else {
message("No cached data.")
break
}
} else {
dawum_api <- GET("https://api.dawum.de")
dawum_json <- content(dawum_api)
as_df_tibble <- function(x) tibble(as.data.frame(x))
df <- map_dfr(dawum_json$Surveys, as_df_tibble)
df_parties <- map_dfr(dawum_json$Parties, as_df_tibble)
df_parties$id <- names(dawum_json$Parties)
df_parliaments <- map_dfr(dawum_json$Parliaments, as_df_tibble)
df_parliaments$id <- names(dawum_json$Parliaments)
df_taskers <- map_dfr(dawum_json$Taskers, as_df_tibble) %>%
rename("tasker" = "Name")
df_taskers$id <- names(dawum_json$Taskers)
df_institutes <- map_dfr(dawum_json$Institutes, as_df_tibble) %>%
rename("institute" = "Name")
df_institutes$id <- names(dawum_json$Institutes)
df_long <- df %>%
pivot_longer(cols = starts_with("Results."), names_to = "partycode", names_prefix = "Results\\.", values_to = "value") %>%
left_join(df_parties, by = c("partycode" = "id")) %>%
left_join(df_parliaments, by = c("Parliament_ID" = "id"), suffix = c("_party", "_parliament")) %>%
left_join(df_taskers, by = c("Tasker_ID" = "id")) %>%
left_join(df_institutes, by = c("Institute_ID" = "id")) %>%
mutate(field_end = as.Date(Survey_Period.Date_End),
field_start = as.Date(Survey_Period.Date_Start),
date = as.Date(Date),
value = value/100,
Date = NULL,
Survey_Period.Date_Start = NULL,
Survey_Period.Date_End = NULL,
Surveyed_Persons = as.numeric(Surveyed_Persons)) %>%
rename("parliament" = "Shortcut_parliament",
"n_surveyed" = "Surveyed_Persons",
"party" = "Shortcut_party") %>%
janitor::clean_names() %>%
filter(!is.na(value)) %>%
select(date, field_start, field_end, parliament, party, value, institute, tasker, n_surveyed, method_id)
if (cache_data) {
write_file(readr::read_file("https://api.dawum.de/last_update.txt"), here("dawum_api","cache.txt"))
write_rds(df_long, here("dawum_api", "dawum.rds"))
}
}
return(df_long)
}
select_parties <- function(df, parties = c("SPD", "CDU/CSU", "CDU", "CSU", "Grüne", "AfD", "FDP", "Linke", "Freie Wähler")) {
tidyr::pivot_wider(filter(df, party %in% parties),
names_from = party, values_from = value) %>%
mutate(Sonstige = 1 - rowSums(.[9:ncol(.)], na.rm = T)) %>%
tidyr::pivot_longer(cols = -c(date:method_id), names_to = "party", values_to = "value") %>%
filter(!is.na(value)) %>%
select(date:parliament, party, value, institute:method_id) %>%
return()
}
subset_parliament <- function(df, parliament = "Bundestag") return(filter(df, parliament == parliament))
subset_latest <- function(df) filter(filter(df, date == max(date)), n_surveyed == max(n_surveyed))
subset_date_range <- function(df, min_date = "2017-01-18", max_date = Sys.Date()) arrange(filter(df, date >= as.Date(min_date) & date <= as.Date(max_date)), date)
calc_cis <- function(df, alpha = 0.05) {
mutate(df, std.err = sqrt((value * (1 - value)) / n_surveyed),
conf.low = value - qnorm(1 - alpha / 2) * std.err,
conf.high = value + qnorm(1 - alpha / 2) * std.err) %>%
select(date:value, std.err, conf.high, conf.low, everything()) %>%
return()
}
plot_single_poll <- function(df, colors) {
require(ggplot2)
df %>%
mutate(party = factor(ifelse(party %in% c("CDU", "CSU"), "CDU/CSU", party),
levels = c("SPD", "CDU/CSU", "Grüne", "AfD", "FDP", "Linke", "Freie Wähler", "Sonstige")),
title = paste0(institute, "-Umfrage (", lubridate::day(field_start),".", lubridate::month(field_start),". - ", lubridate::day(field_end),".", lubridate::month(field_end),".",lubridate::year(field_end),").")) %>%
ggplot(aes(party, value, fill = party)) +
scale_y_continuous("Anteil Sonntagsfrage", labels = scales::label_percent(1), limits = c(0,0.4)) +
scale_x_discrete(NULL) +
scale_fill_manual(values = colors) +
geom_col() +
geom_errorbar(aes(ymin = conf.low, ymax = conf.high), color = "darkgrey", width = 0.5) +
facet_wrap(~title)
}
plot_polling_average <- function(df, k = 5, colors) {
df %>%
mutate(party = factor(ifelse(party %in% c("CDU", "CSU"), "CDU/CSU", party),
levels = c("SPD", "CDU/CSU", "Grüne", "AfD", "FDP", "Linke", "Freie Wähler", "Sonstige"))) %>%
mutate(value = zoo::rollmeanr(value, k = k, fill = NA), .by = party) %>%
ggplot(aes(date, value, color = party)) +
scale_color_manual("Party", values = colors) +
geom_hline(yintercept = 0.05, color = "black", linetype = "longdash") +
geom_line()
}