| 
 | 1 | +---  | 
 | 2 | +title_meta  : Unit 2  | 
 | 3 | +title       : Estimating measurement error in continuous variables  | 
 | 4 | +description : "Evaluating measurement error without a gold standard"  | 
 | 5 | +attachments :  | 
 | 6 | +  slides_link : http://jpsmonline.umd.edu/pluginfile.php/4810/mod_folder/content/0/SURV730-Unit-1-slides-2016-summer.pdf?forcedownload=1  | 
 | 7 | + | 
 | 8 | +--- type:NormalExercise lang:r xp:50 skills:7  key:39a225af5a  | 
 | 9 | +## Association measures for categorical variables  | 
 | 10 | + | 
 | 11 | + | 
 | 12 | +* Yule's Q is explained on the Wikipedia page about "Goodman & Krusal's gamma" <https://en.wikipedia.org/wiki/Goodman_and_Kruskal%27s_gamma#Yule.27s_Q>  | 
 | 13 | +* The phi coefficient has its own wikipedia page: <https://en.wikipedia.org/wiki/Phi_coefficient>  | 
 | 14 | +* John Uebersax's page has a good overview of various association measures and their interrelatedness: <http://www.john-uebersax.com/stat/agree.htm>  | 
 | 15 | + | 
 | 16 | +*** =instructions   | 
 | 17 | +- Look up the `phi` (φ) and `Yule` (Yule's Q) association measures online;  | 
 | 18 | +- Verify that the base code to the right gives the same estimates as are given for `tab_marijuana` in the slides;  | 
 | 19 | +- Adjust the code to the right to estimate association between `gndr` and `dshltgp` using the `tab_gender_GP` table;  | 
 | 20 | +- Verify that you get the same results as in the slides.  | 
 | 21 | + | 
 | 22 | +*** =hint  | 
 | 23 | +- Replace `tab_marijuana` with `tab_gender_GP` in the code to the right;  | 
 | 24 | +- In the call to `glm`, which uses Poisson regression to get at the log-odds ratio coefficient from a table, you may need to adjust the variable names `Question 1` and `Question 2` to correspond to the names in the `tab_gender_GP` table.  | 
 | 25 | + | 
 | 26 | +*** =solution  | 
 | 27 | +```{r}  | 
 | 28 | +# These are all from psych library:  | 
 | 29 | +tab_gender_GP %>% cohen.kappa  | 
 | 30 | +tab_gender_GP %>% tetrachoric  | 
 | 31 | +tab_gender_GP %>% phi  | 
 | 32 | +tab_gender_GP %>% Yule  | 
 | 33 | +
  | 
 | 34 | +# Different ways of getting the log-odds ratio from a table  | 
 | 35 | +
  | 
 | 36 | +# Calculate it direclty using vcd package  | 
 | 37 | +tab_gender_GP %>% vcd::loddsratio(.)  | 
 | 38 | +
  | 
 | 39 | +# Calculate it by hand  | 
 | 40 | +my_log_odds_ratio <- function(tab) {  | 
 | 41 | +  odds_ratio <- (tab[1, 1] * tab[2, 2]) / (tab[1, 2] * tab[2, 1])  | 
 | 42 | +  log(odds_ratio)  | 
 | 43 | +}  | 
 | 44 | +tab_gender_GP %>% my_log_odds_ratio  | 
 | 45 | +
  | 
 | 46 | +# Using a Poisson model for the counts   | 
 | 47 | +#    (useful when you want to use svyglm from the survey package  | 
 | 48 | +#      to get design-based complex sampling standard errors)  | 
 | 49 | +tab_gender_GP %>% as.data.frame %>%   | 
 | 50 | +  glm(Freq ~ gndr * dshltgp, data = ., family = poisson) %>%  | 
 | 51 | +  summary  | 
 | 52 | +
  | 
 | 53 | +# As a loglinear model   | 
 | 54 | +#     (useful when you want to test margins, can also use the survey  | 
 | 55 | +#      package's loglinear function to adjust for complex sampling)  | 
 | 56 | +# Note that the logodds ratio is 4*the loglinear interaction param.  | 
 | 57 | +tab_gender_GP %>% loglin(margin = list(1:2), param = TRUE)  | 
 | 58 | +
  | 
 | 59 | +```  | 
 | 60 | + | 
 | 61 | +*** =sample_code  | 
 | 62 | +```{r}  | 
 | 63 | +library(psych)  | 
 | 64 | +
  | 
 | 65 | +# These are all from psych library:  | 
 | 66 | +tab_marijuana %>% cohen.kappa  | 
 | 67 | +tab_marijuana %>% tetrachoric  | 
 | 68 | +tab_marijuana %>% phi  | 
 | 69 | +tab_marijuana %>% Yule  | 
 | 70 | +
  | 
 | 71 | +# Different ways of getting the log-odds ratio from a table  | 
 | 72 | +
  | 
 | 73 | +# Calculate it direclty using vcd package  | 
 | 74 | +tab_marijuana %>% vcd::loddsratio(.)  | 
 | 75 | +
  | 
 | 76 | +# Calculate it by hand  | 
 | 77 | +my_log_odds_ratio <- function(tab) {  | 
 | 78 | +  odds_ratio <- (tab[1, 1] * tab[2, 2]) / (tab[1, 2] * tab[2, 1])  | 
 | 79 | +  log(odds_ratio)  | 
 | 80 | +}  | 
 | 81 | +tab_marijuana %>% my_log_odds_ratio  | 
 | 82 | +
  | 
 | 83 | +# Using a Poisson model for the counts   | 
 | 84 | +#    (useful when you want to use svyglm from the survey package  | 
 | 85 | +#      to get design-based complex sampling standard errors)  | 
 | 86 | +tab_marijuana %>% as.data.frame %>%   | 
 | 87 | +  glm(Freq ~ Question.1 * Question.2, data = ., family = poisson) %>%  | 
 | 88 | +  summary  | 
 | 89 | +
  | 
 | 90 | +# As a loglinear model   | 
 | 91 | +#     (useful when you want to test margins, can also use the survey  | 
 | 92 | +#      package's loglinear function to adjust for complex sampling)  | 
 | 93 | +# Note that the logodds ratio is 4*the loglinear interaction param.  | 
 | 94 | +tab_marijuana %>% loglin(margin = list(1:2), param = TRUE)  | 
 | 95 | +
  | 
 | 96 | +
  | 
 | 97 | +```  | 
 | 98 | + | 
 | 99 | +*** =pre_exercise_code  | 
 | 100 | +```{r}  | 
 | 101 | +library(psych)  | 
 | 102 | +library(vcd)  | 
 | 103 | +library(dplyr)  | 
 | 104 | +
  | 
 | 105 | +load(url("http://daob.nl/files/SURV730/tab_marijuana.rdata"))  | 
 | 106 | +
  | 
 | 107 | +load(url("http://daob.nl/files/SURV730/table_gender_GP.rdata"))  | 
 | 108 | +
  | 
 | 109 | +options(digits = 4)  | 
 | 110 | +
  | 
 | 111 | +```  | 
 | 112 | + | 
 | 113 | +*** =sct  | 
 | 114 | + | 
 | 115 | +```{r}  | 
 | 116 | +test_function("cohen.kappa", args = "object",  | 
 | 117 | +              not_called_msg = "You didn't call `cohen.kappa()`!",  | 
 | 118 | +              incorrect_msg = "You didn't call `cohen.kappa(object = ...)` with the correct argument, `object`.")  | 
 | 119 | + | 
 | 120 | +test_error()  | 
 | 121 | + | 
 | 122 | +```  | 
 | 123 | +
  | 
0 commit comments