-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathafd_topics.Rmd
89 lines (78 loc) · 2.35 KB
/
afd_topics.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
---
title: "AfD Topics"
author: "Anton Könneke"
output: github_document
---
```{r setup, include=FALSE}
pacman::p_load(quanteda,
readtext,
seededlda,
tidyverse,
quanteda.textstats,
quanteda.textplots,
LSX)
afdt <- readtext(file = "texts/afd_articles.csv")
```
Format dates
```{r}
Sys.setlocale(locale = "de_DE")
afdt <- afdt %>%
mutate(day = str_extract(date, "^\\d+"),
mon = str_match(date, "(?<=\\d{1}\\.\\s)\\w*")[,1],
year = str_extract(date, "\\d{4}$"),
date_fmt = paste(year, mon, day, sep = "-"),
date_fmt2 = as.Date(date_fmt, format = "%Y-%B-%e")) %>%
filter(!is.na(fulltext))
Sys.setlocale(locale = "en_US")
```
prepare data
```{r}
afdt_corp <- corpus(afdt, text_field = "fulltext")
afdt_tok <- tokens(afdt_corp, remove_punct = TRUE, remove_numbers = TRUE, remove_symbol = TRUE)
afdt_tok <- tokens_remove(afdt_tok, pattern = c(stopwords("de")))
afdt_tok <- tokens_remove(
afdt_tok,
pattern =
as.character(tokens(
paste0(unique(afdt$author), collapse = " "),
remove_punct = TRUE,
remove_numbers = TRUE,
remove_symbol = TRUE
))
)
afdt_dfmat <- dfm(afdt_tok) %>%
dfm_trim(min_termfreq = 0.8, termfreq_type = "quantile",
max_docfreq = 0.1, docfreq_type = "prop")
```
topic model
```{r}
ldas <- lapply(seq(5, 20, 5), function(x){
textmodel_lda(afdt_dfmat, k = x)
})
lapply(ldas, function(x) terms(x, 10))
lda15k <- ldas[[3]]
```
plot
```{r}
bind_cols(afdt,
as.data.frame(lda15k$theta)) %>%
pivot_longer(starts_with("topic")) %>%
filter(year >= 2020 & !is.na(date_fmt2)) %>%
mutate(month = lubridate::floor_date(date_fmt2, "month"),
topic_n = as.integer(str_extract(name, "\\d+")),
facet = case_match(topic_n,
c(1:3) ~ 1,
c(4:7) ~ 2,
c(8:11) ~3,
c(11:15)~4,
.default = NA)) %>%
summarise(value = mean(value, na.rm = T), .by = c(month, name, facet)) %>%
arrange(month) %>%
mutate(value = zoo::rollmean(value, k = 3, fill = NA, align = "center"),
.by = name) %>%
filter(facet == 1) %>%
ggplot(aes(month, value, color = name))+
geom_line()+
facet_wrap(~facet)+
theme_bw()
```