|
| 1 | +--- |
| 2 | +title_meta : Unit 2 |
| 3 | +title : Estimating measurement error in continuous variables |
| 4 | +description : "Evaluating measurement error without a gold standard" |
| 5 | +attachments : |
| 6 | + slides_link : http://jpsmonline.umd.edu/pluginfile.php/4810/mod_folder/content/0/SURV730-Unit-1-slides-2016-summer.pdf?forcedownload=1 |
| 7 | + |
| 8 | +--- type:NormalExercise lang:r xp:50 skills:7 key:39a225af5a |
| 9 | +## Association measures for categorical variables |
| 10 | + |
| 11 | + |
| 12 | +* Yule's Q is explained on the Wikipedia page about "Goodman & Krusal's gamma" <https://en.wikipedia.org/wiki/Goodman_and_Kruskal%27s_gamma#Yule.27s_Q> |
| 13 | +* The phi coefficient has its own wikipedia page: <https://en.wikipedia.org/wiki/Phi_coefficient> |
| 14 | +* John Uebersax's page has a good overview of various association measures and their interrelatedness: <http://www.john-uebersax.com/stat/agree.htm> |
| 15 | + |
| 16 | +*** =instructions |
| 17 | +- Look up the `phi` (φ) and `Yule` (Yule's Q) association measures online; |
| 18 | +- Verify that the base code to the right gives the same estimates as are given for `tab_marijuana` in the slides; |
| 19 | +- Adjust the code to the right to estimate association between `gndr` and `dshltgp` using the `tab_gender_GP` table; |
| 20 | +- Verify that you get the same results as in the slides. |
| 21 | + |
| 22 | +*** =hint |
| 23 | +- Replace `tab_marijuana` with `tab_gender_GP` in the code to the right; |
| 24 | +- In the call to `glm`, which uses Poisson regression to get at the log-odds ratio coefficient from a table, you may need to adjust the variable names `Question 1` and `Question 2` to correspond to the names in the `tab_gender_GP` table. |
| 25 | + |
| 26 | +*** =solution |
| 27 | +```{r} |
| 28 | +# These are all from psych library: |
| 29 | +tab_gender_GP %>% cohen.kappa |
| 30 | +tab_gender_GP %>% tetrachoric |
| 31 | +tab_gender_GP %>% phi |
| 32 | +tab_gender_GP %>% Yule |
| 33 | +
|
| 34 | +# Different ways of getting the log-odds ratio from a table |
| 35 | +
|
| 36 | +# Calculate it direclty using vcd package |
| 37 | +tab_gender_GP %>% vcd::loddsratio(.) |
| 38 | +
|
| 39 | +# Calculate it by hand |
| 40 | +my_log_odds_ratio <- function(tab) { |
| 41 | + odds_ratio <- (tab[1, 1] * tab[2, 2]) / (tab[1, 2] * tab[2, 1]) |
| 42 | + log(odds_ratio) |
| 43 | +} |
| 44 | +tab_gender_GP %>% my_log_odds_ratio |
| 45 | +
|
| 46 | +# Using a Poisson model for the counts |
| 47 | +# (useful when you want to use svyglm from the survey package |
| 48 | +# to get design-based complex sampling standard errors) |
| 49 | +tab_gender_GP %>% as.data.frame %>% |
| 50 | + glm(Freq ~ gndr * dshltgp, data = ., family = poisson) %>% |
| 51 | + summary |
| 52 | +
|
| 53 | +# As a loglinear model |
| 54 | +# (useful when you want to test margins, can also use the survey |
| 55 | +# package's loglinear function to adjust for complex sampling) |
| 56 | +# Note that the logodds ratio is 4*the loglinear interaction param. |
| 57 | +tab_gender_GP %>% loglin(margin = list(1:2), param = TRUE) |
| 58 | +
|
| 59 | +``` |
| 60 | + |
| 61 | +*** =sample_code |
| 62 | +```{r} |
| 63 | +library(psych) |
| 64 | +
|
| 65 | +# These are all from psych library: |
| 66 | +tab_marijuana %>% cohen.kappa |
| 67 | +tab_marijuana %>% tetrachoric |
| 68 | +tab_marijuana %>% phi |
| 69 | +tab_marijuana %>% Yule |
| 70 | +
|
| 71 | +# Different ways of getting the log-odds ratio from a table |
| 72 | +
|
| 73 | +# Calculate it direclty using vcd package |
| 74 | +tab_marijuana %>% vcd::loddsratio(.) |
| 75 | +
|
| 76 | +# Calculate it by hand |
| 77 | +my_log_odds_ratio <- function(tab) { |
| 78 | + odds_ratio <- (tab[1, 1] * tab[2, 2]) / (tab[1, 2] * tab[2, 1]) |
| 79 | + log(odds_ratio) |
| 80 | +} |
| 81 | +tab_marijuana %>% my_log_odds_ratio |
| 82 | +
|
| 83 | +# Using a Poisson model for the counts |
| 84 | +# (useful when you want to use svyglm from the survey package |
| 85 | +# to get design-based complex sampling standard errors) |
| 86 | +tab_marijuana %>% as.data.frame %>% |
| 87 | + glm(Freq ~ Question.1 * Question.2, data = ., family = poisson) %>% |
| 88 | + summary |
| 89 | +
|
| 90 | +# As a loglinear model |
| 91 | +# (useful when you want to test margins, can also use the survey |
| 92 | +# package's loglinear function to adjust for complex sampling) |
| 93 | +# Note that the logodds ratio is 4*the loglinear interaction param. |
| 94 | +tab_marijuana %>% loglin(margin = list(1:2), param = TRUE) |
| 95 | +
|
| 96 | +
|
| 97 | +``` |
| 98 | + |
| 99 | +*** =pre_exercise_code |
| 100 | +```{r} |
| 101 | +library(psych) |
| 102 | +library(vcd) |
| 103 | +library(dplyr) |
| 104 | +
|
| 105 | +load(url("http://daob.nl/files/SURV730/tab_marijuana.rdata")) |
| 106 | +
|
| 107 | +load(url("http://daob.nl/files/SURV730/table_gender_GP.rdata")) |
| 108 | +
|
| 109 | +options(digits = 4) |
| 110 | +
|
| 111 | +``` |
| 112 | + |
| 113 | +*** =sct |
| 114 | + |
| 115 | +```{r} |
| 116 | +test_function("cohen.kappa", args = "object", |
| 117 | + not_called_msg = "You didn't call `cohen.kappa()`!", |
| 118 | + incorrect_msg = "You didn't call `cohen.kappa(object = ...)` with the correct argument, `object`.") |
| 119 | + |
| 120 | +test_error() |
| 121 | + |
| 122 | +``` |
| 123 | +
|
0 commit comments