-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathLandscape Phase 1 Screening R1.Rmd
231 lines (177 loc) · 6.58 KB
/
Landscape Phase 1 Screening R1.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
---
title: "Pilot Round 1"
author: "Meng Liu"
date: "2022-09-07"
output: html_document
editor_options:
chunk_output_type: console
---
Overview:
We finished Screening R1 in July 2023, the following steps were taken to prepare for the next stage (i.e., resolving inconsistencies):
1. downloaded the two google sheets (sheet1 and sheet2) and joined the sheets back to the full dataset
2. analysed the screening results and identified which records require round 2 screening (inconsistent records between the two coders)
3. marked records that require Screening R2 to set up a g-doc for crowdsourcing
The next step is to resolve all inconsitencies.
# Load libraries
```{r}
#devtools::install_github('Mikata-Project/ggthemr')
library(pacman)
p_load(tidyverse,rio)
theme_set(theme_minimal())
```
# Load data
load the full record from the previous stage:
```{r}
data <- read.csv("full_record_20220907.csv", encoding = "UTF-8")
```
# Screening
## Load Sheets
```{r}
sheet1 <- read_csv("Screening R1/Round 1 Screening - Sheet1 (screener_id ODD).csv") %>%
rename(screening = `screening (include, exclude, uncertain)`,
require_full_text = `require_full_text(1,0)`) %>%
filter(unique_id != "Example")
sheet2 <- read_csv("Screening R1/Round 1 Screening - Sheet2 (screener_id EVEN).csv") %>%
rename(screening = `screening (include, exclude, uncertain)`,
require_full_text = `require_full_text(1,0)`) %>%
filter(unique_id != "Example")
```
## Corret typos
```{r}
# Identify typos
sheet1 %>%
count(screening) %>% pull(screening)
# Corret typos
sheet1 <- sheet1 %>%
mutate(screening = case_when(
screening == "exlcude" ~ "exclude",
screening == "incldue" ~ "include",
screening == "unsure" ~ "uncertain",
TRUE ~ screening
))
# Repeat the same for sheet2
sheet2 %>%
count(screening) %>% pull(screening)
sheet2 <- sheet2 %>%
mutate(screening = case_when(
screening == "exlcude" ~ "exclude",
#screening == "incldue" ~ "include",
screening == "unsure" ~ "uncertain",
TRUE ~ screening
))
```
## Match Screening Results
```{r}
# Join the two sheets
sheets <- sheet1 %>%
left_join(sheet2 %>% select(unique_id,screening,screener_id,note,require_full_text), by = "unique_id")
# Match the screening results
sheets <- sheets %>%
mutate(decision = ifelse(
screening.x == screening.y,screening.x, "inconsistent"
))
```
### Visualise the Results
```{r}
sheets %>%
count(decision) %>%
mutate(pct = n/sum(n)) %>%
ggplot(aes(x="", y=pct, fill=decision)) +
geom_col(width = 1) +
coord_polar(theta = "y") +
ggrepel::geom_label_repel(aes(label=paste(str_to_title(decision), scales::percent(pct))), position = position_stack(vjust = 0.5)) +
theme_void() +
theme(legend.position="none") +
labs(fill = "Decision", title="Round 1 Screening", x="", y="",
subtitle = paste0("n = ",nrow(sheets)))
ggsave("r1_screening_results.jpeg",width = 8,height =6 )
```
## Integrate to the Full Dataset
```{r}
# Extract the pilot screening results
data <- read.csv("full_record_20220907.csv", encoding = "UTF-8")
pilot <- data %>% filter(!r1_screening1=="")%>%
select(unique_id,starts_with("r1_")) %>%
# drop cols of pilot tagging
select(-starts_with("r1_tag")) %>%
select(-starts_with("r1_coder")) %>%
select(-starts_with("r1_require_more")) %>%
select(-starts_with("r1_key")) %>%
mutate(unique_id = as.integer(unique_id)) %>%
mutate(r1_screening1 = case_when(r1_screening1 == "unsure" ~ "uncertain",
TRUE ~ r1_screening1),
r1_screening2 = case_when(r1_screening2 == "unsure" ~ "uncertain",
r1_screening2 == "exlude" ~ "exclude",
TRUE ~ r1_screening2) )
# Rename columns with r1 prefix
results <- sheets %>%
rename(r1_screening1 = screening.x,
r1_screening2 = screening.y,
r1_screener1 = screener_id.x,
r1_screener2 = screener_id.y,
r1_screen_note1 = note.x,
r1_screen_note2 = note.y,
r1_require_full_text1 = require_full_text.x,
r1_require_full_text2 = require_full_text.y,
r1_screening_decision = decision) %>%
select(unique_id,starts_with("r1_")) %>%
mutate(unique_id = as.integer(unique_id))
# Combine the results of piloting and screening R1
combined = rbind(results,pilot)
# Found a duplicate record in combined and remove it
combined[duplicated(combined$unique_id), ] %>% pull(unique_id)
combined <- distinct(combined, unique_id, .keep_all = TRUE)
# Integrate back to the full record
wos = data %>%
# remove all the manual coding cols to prepare for mergging
select(-starts_with("r1_")) %>%
select(-starts_with("r1_tag")) %>%
select(-starts_with("r1_coder")) %>%
select(-starts_with("r1_require_more")) %>%
select(-starts_with("r1_key")) %>%
mutate(unique_id = as.integer(unique_id))
full_record =
left_join(wos,combined,by="unique_id")
full_record = full_record %>%
mutate(action_required = case_when(
r1_screening_decision%in%c("inconsistent","uncertain") ~ "Y",
is.na(r1_screening_decision) ~ "Y",
TRUE ~ "N")
)
#saveRDS(full_record,"full_record_20230714.rds")
# the full record was saved as RDS to prevent data loss as some metadata from WoS were too long to be preseved as is in excel
r2_screening = full_record %>%
select(unique_id, action_required, title, author_keywords, abstract, keywords_plus, doi,starts_with("r1_"))
r2_screening %>% count(action_required)
#export(r2_screening,"r2_screening_sheet.csv")
```
# Calculate IRR for pilot + R1
```{r}
# use the meta_rate programme by Reza Norouzian
source("meta-rate.r")
screener1 = full_record %>%
select(unique_id,r1_screening1) %>%
rename(study.name = unique_id,
code = r1_screening1)
screener2 = full_record %>%
select(unique_id,r1_screening2) %>%
rename(study.name = unique_id,
code = r1_screening2)
meta_rate(screener1,screener2)
```
# Tabulation of screener contribution
```{r}
tb1 = full_record %>%
count(r1_screener1) %>%
rename(screener = r1_screener1)
tb2 = full_record %>%
count(r1_screener2) %>%
rename(screener = r1_screener2)
contribution = rbind(tb1,tb2) %>%
filter(!is.na(screener)) %>%
group_by(screener) %>%
summarise(n=sum(n),.groups = "drop")
tibble(screener = 1:57) %>%
left_join(contribution,by="screener") %>%
export("screener_contribution.csv")
```