-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path0-tidy.R
66 lines (51 loc) · 1.69 KB
/
0-tidy.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
library(tidyjson)
library(tidyverse)
library(magrittr)
json_df <- read_json("data/raw/db.json", "jsonl")
# ------------------------------------------------------------------------------
# Separate data into two "tidy" tables, each with there own observational unit
players <-
json_df %>%
spread_values(
id = json_chr("uniqueId"),
name = json_chr("displayName"),
platform = json_chr("platform", "name"),
# start_date = json_dbl("createdAt"), # Not sure what this means
shots = json_dbl("stats", "shots"),
saves = json_dbl("stats", "saves"),
mvps = json_dbl("stats", "mvps"),
goals = json_dbl("stats", "goals"),
assists = json_dbl("stats", "assists"),
wins = json_dbl("stats", "wins")
)
# anytime::anytime(1468013823) # Convert createdAt if we decide to use it
# This takes a few minutes
rank <-
json_df %>%
spread_values(
id = json_chr("uniqueId")
) %>%
enter_object("rankedSeasons") %>%
spread_all()
# ------------------------------------------------------------------------------
# Reshape the rank table so that it follows "tidy" methodology
rank %<>%
gather(key, val, -c(document.id, id)) %>%
separate(key,
c("season", "game_type", "stats"),
sep = "\\.") %>%
spread(stats, val) %>%
rename(
matches_played = matchesPlayed,
mmr = rankPoints
)
# ------------------------------------------------------------------------------
# Remove duplicates and NAs to make the data easier to analyze
players %<>%
drop_na() %>%
distinct(id, .keep_all = TRUE)
rank %<>%
drop_na() %>%
distinct()
saveRDS(players, "data/players.Rds")
saveRDS(rank, "data/rank.Rds")