Skip to content

Commit 7056979

Browse files
authored
Change to two-sided t-test
1 parent a9bd2cc commit 7056979

File tree

3 files changed

+51
-42
lines changed

3 files changed

+51
-42
lines changed
67 Bytes
Binary file not shown.

analysis_code/SARA_analysis_documentation.tex

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818

1919
\author{Tianchen Qian, Mashfiqui Rabbi, Susan Murphy}
2020

21-
\date{2018.09.25}
21+
\date{2019.02.05}
2222

2323
\maketitle
2424
In this document, we describe the statistical methods to conduct the
@@ -80,8 +80,8 @@ \subsubsection{Test statistic and critical value}
8080
\begin{equation}
8181
T=\frac{\hat{\beta}}{\{\widehat{\text{Var}}(\hat{\beta})\}^{1/2}}.\label{eq:test-primary1}
8282
\end{equation}
83-
To conduct one-sided hypothesis test with significance level $0.025$,
84-
the critical value is $c=t_{n-1-q}^{-1}(1-0.025)$. If $T>c$, we
83+
To conduct two-sided hypothesis test with significance level $\eta$,
84+
the critical value is $c=t_{n-1-q}^{-1}(1 - \eta / 2)$. If $|T|>c$, we
8585
reject $H_{0}$. Here, $n$ is the sample size, $q$ is the length
8686
of vector $Z_{it}$ (including the added intercept), and $t_{n-1-q}^{-1}(\gamma)$
8787
denotes the $\gamma$-quantile of $t$-distribution with $(n-1-q)$
@@ -170,8 +170,8 @@ \subsubsection{Test statistic and critical value}
170170
\begin{equation}
171171
T=\frac{\hat{\beta}}{\{\widehat{\text{Var}}(\hat{\beta})\}^{1/2}}.\label{eq:test-primary2}
172172
\end{equation}
173-
To conduct one-sided hypothesis test with significance level $0.025$,
174-
the critical value is $c=t_{n-1-q}^{-1}(1-0.025)$. If $T>c$, we
173+
To conduct two-sided hypothesis test with significance level $\eta$,
174+
the critical value is $c=t_{n-1-q}^{-1}(1 - \eta / 2)$. If $|T|>c$, we
175175
reject $H_{0}$. Here, $n$ is the sample size, $q$ is the length
176176
of vector $Z_{it}$ (including the added intercept), and $t_{n-1-q}^{-1}(\gamma)$
177177
denotes the $\gamma$-quantile of $t$-distribution with $(n-1-q)$

analysis_code/primary_and_secondary_analysis.R

Lines changed: 46 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,12 @@
3232
#
3333
# 1. Implement general F-test for linear combinations in SARA_exploratory_analysis_general_F_test().
3434

35+
####################################
36+
# Update by Tianchen Qian, 2019/2/5
37+
#
38+
# 1. Changed from one-sided test to two-sided test in all t-tests
39+
40+
3541
library(rootSolve) # for solver function multiroot()
3642

3743
##########################################
@@ -62,7 +68,7 @@ binary_outcome_moderated_effect <- function(
6268
outcome_var = "Y",
6369
avail_var = NULL,
6470
prob_treatment = 1/2,
65-
significance_level = 0.025)
71+
significance_level = 0.05)
6672
{
6773
############## description ###############
6874
##
@@ -93,7 +99,7 @@ binary_outcome_moderated_effect <- function(
9399
## avail_var.............variable name for availability variable
94100
## NULL (default) means always-available
95101
## prob_treatment........probability of treatment (default to 1/2)
96-
## significance_level....significance level for the hypothesis testing (default to 0.025)
102+
## significance_level....significance level for the hypothesis testing (default to 0.05)
97103

98104

99105
############## return value ###############
@@ -106,7 +112,7 @@ binary_outcome_moderated_effect <- function(
106112
## alpha_se..............standard error for alpha_hat
107113
## beta_se_ssa...........standard error for beta_hat, with small sample correction (hat matrix)
108114
## alpha_se..............standard error for alpha_hat, with small sample correction (hat matrix)
109-
## test_result_t.........(one-sided) t-test result for each entry in beta_hat, which is a list consisting of test_stat, critical_value, p_value
115+
## test_result_t.........(two-sided) t-test result for each entry in beta_hat, which is a list consisting of test_stat, critical_value, p_value
110116
## test_result_f.........F-test result for beta = 0, which is a list consisting of test_stat, critical_value, p_value
111117
## varcov................estimated variance-covariance matrix for (beta_hat, alpha_hat)
112118
## varcov_ssa............estimated variance-covariance matrix for (beta_hat, alpha_hat), with small sample correction (hat matrix)
@@ -319,11 +325,11 @@ binary_outcome_moderated_effect <- function(
319325

320326
############## part 5 :: p-value with small sample correction ###############
321327

322-
# t test (one sided, because we are using significance_level instead of significance_level/2)
328+
# t test (two-sided -- note the use of significance_level/2)
323329

324330
test_stat <- beta_root / beta_se_ssa
325-
critical_value <- qt(1 - significance_level, df = n - p - q)
326-
p_val <- pt(abs(test_stat), df = n - p - q, lower.tail = FALSE)
331+
critical_value <- qt(1 - significance_level/2, df = n - p - q) # two-sided
332+
p_val <- 2 * pt(abs(test_stat), df = n - p - q, lower.tail = FALSE) # two-sided
327333
names(test_stat) <- names(p_val) <- Xnames
328334
test_result_t <- list(test_stat = test_stat,
329335
critical_value = critical_value,
@@ -334,7 +340,6 @@ binary_outcome_moderated_effect <- function(
334340
test_stat <- as.numeric( t(beta_root) %*% solve(asymp_varcov_ssa[1:p, 1:p] / sample_size) %*% beta_root )
335341
n <- sample_size
336342
critical_value <- qf((n-q-p) * (1-significance_level) / (p * (n-q-1)), df1 = p, df2 = n-q-p)
337-
# browser()
338343
p_val <- pf(test_stat, df1 = p, df2 = n-q-p, lower.tail = FALSE)
339344
test_result_f <- list(test_stat = test_stat,
340345
critical_value = critical_value,
@@ -369,7 +374,7 @@ SARA_primary_hypothesis_1 <- function(
369374
outcome_var = "Y",
370375
avail_var = NULL,
371376
prob_treatment = 1/2,
372-
significance_level = 0.025
377+
significance_level = 0.05
373378
) {
374379
############## description ###############
375380
##
@@ -392,17 +397,17 @@ SARA_primary_hypothesis_1 <- function(
392397
## avail_var.............variable name for availability variable
393398
## NULL (default) means always-available
394399
## prob_treatment........probability of treatment (default to 1/2)
395-
## significance_level....significance level for the hypothesis testing (default to 0.025)
400+
## significance_level....significance level for the hypothesis testing (default to 0.05)
396401

397402
############## return value ###############
398403
##
399404
## This function returns a list of the following components:
400405
##
401406
## beta..................estimated beta (marginal treatment effect)
402407
## beta_se...............standard error for beta, with small sample correction
403-
## test_stat.............(one sided) t-test statsitic for testing beta = 0
404-
## critical_value........(one sided) critical value for t-test with the input significance level
405-
## p_value...............(one sided) p-value for t-test
408+
## test_stat.............(two-sided) t-test statsitic for testing beta = 0
409+
## critical_value........(two-sided) critical value for t-test with the input significance level
410+
## p_value...............(two-sided) p-value for t-test
406411

407412
# make sure dta is sorted by id_var then day_var
408413
dta <- dta[order(dta[, id_var], dta[, day_var]), ]
@@ -437,7 +442,7 @@ SARA_primary_hypothesis_2 <- function(
437442
outcome_var = "Y",
438443
avail_var = NULL,
439444
prob_treatment = 1/2,
440-
significance_level = 0.025
445+
significance_level = 0.05
441446
) {
442447

443448
############## description ###############
@@ -463,17 +468,17 @@ SARA_primary_hypothesis_2 <- function(
463468
## avail_var.............variable name for availability variable
464469
## NULL (default) means always-available
465470
## prob_treatment........probability of treatment (default to 1/2)
466-
## significance_level....significance level for the hypothesis testing (default to 0.025)
471+
## significance_level....significance level for the hypothesis testing (default to 0.05)
467472

468473
############## return value ###############
469474
##
470475
## This function returns a list of the following components:
471476
##
472477
## beta..................estimated beta (marginal treatment effect)
473478
## beta_se...............standard error for beta, with small sample correction
474-
## test_stat.............(one sided) t-test statsitic for testing beta = 0
475-
## critical_value........(one sided) critical value for t-test with the input significance level
476-
## p_value...............(one sided) p-value for t-test
479+
## test_stat.............(two-sided) t-test statsitic for testing beta = 0
480+
## critical_value........(two-sided) critical value for t-test with the input significance level
481+
## p_value...............(two-sided) p-value for t-test
477482

478483
# make sure survey_completion_var is binary
479484
stopifnot(all(dta[, survey_completion_var] %in% c(0, 1)))
@@ -533,7 +538,7 @@ SARA_exploratory_analysis <- function(
533538
outcome_var = "Y",
534539
avail_var = NULL,
535540
prob_treatment = 1/2,
536-
significance_level = 0.025
541+
significance_level = 0.05
537542
) {
538543
############## description ###############
539544
##
@@ -556,17 +561,17 @@ SARA_exploratory_analysis <- function(
556561
## avail_var.............variable name for availability variable
557562
## NULL (default) means always-available
558563
## prob_treatment........probability of treatment (default to 1/2)
559-
## significance_level....significance level for the hypothesis testing (default to 0.025)
564+
## significance_level....significance level for the hypothesis testing (default to 0.05)
560565

561566
############## return value ###############
562567
##
563568
## This function returns a list of the following components:
564569
##
565570
## beta..................estimated beta (moderated treatment effect)
566571
## beta_se...............standard error for beta, with small sample correction
567-
## test_stat_t.............(one sided) t-test statsitic for testing beta = 0
568-
## critical_value_t........(one sided) critical value for t-test with the input significance level
569-
## p_value_t...............(one sided) p-value for t-test
572+
## test_stat_t.............(two-sided) t-test statsitic for testing beta = 0
573+
## critical_value_t........(two-sided) critical value for t-test with the input significance level
574+
## p_value_t...............(two-sided) p-value for t-test
570575
## test_stat_f.............F-test statsitic for testing all beta's = 0
571576
## critical_value_f........critical value for F-test with the input significance level
572577
## p_value_f...............p-value for F-test
@@ -610,7 +615,7 @@ SARA_exploratory_analysis_general_F_test <- function(
610615
outcome_var = "Y",
611616
avail_var = NULL,
612617
prob_treatment = 1/2,
613-
significance_level = 0.025,
618+
significance_level = 0.05,
614619
F_test_L,
615620
F_test_c = NULL
616621
) {
@@ -635,7 +640,7 @@ SARA_exploratory_analysis_general_F_test <- function(
635640
## avail_var.............variable name for availability variable
636641
## NULL (default) means always-available
637642
## prob_treatment........probability of treatment (default to 1/2)
638-
## significance_level....significance level for the hypothesis testing (default to 0.025)
643+
## significance_level....significance level for the hypothesis testing (default to 0.05)
639644
## F_test_L, F_test_c....test for H_0: F_test_L %*% beta_hat = F_test_c,
640645
## where dim(beta) = p * 1, dim(F_test_L) = p1 * p, dim(F_test_c) = p1 * 1.
641646
## If F_test_L is passed in as a vector, it will be treated as a row vector.
@@ -647,9 +652,9 @@ SARA_exploratory_analysis_general_F_test <- function(
647652
##
648653
## beta..................estimated beta (moderated treatment effect)
649654
## beta_se...............standard error for beta, with small sample correction
650-
## test_stat_t.............(one sided) t-test statsitic for testing beta = 0
651-
## critical_value_t........(one sided) critical value for t-test with the input significance level
652-
## p_value_t...............(one sided) p-value for t-test
655+
## test_stat_t.............(two-sided) t-test statsitic for testing beta = 0
656+
## critical_value_t........(two-sided) critical value for t-test with the input significance level
657+
## p_value_t...............(two-sided) p-value for t-test
653658
## test_stat_f.............F-test statsitic for testing F_test_L %*% beta_hat = F_test_c
654659
## critical_value_f........critical value for F-test with the input significance level
655660
## p_value_f...............p-value for F-test
@@ -677,6 +682,7 @@ SARA_exploratory_analysis_general_F_test <- function(
677682
p <- length(beta_hat)
678683
beta_hat <- matrix(beta_hat, ncol = 1)
679684
varcov_beta_hat <- result$varcov_ssa[1:p, 1:p]
685+
680686
## general F test for F_test_L %*% beta_hat = F_test_c ##
681687
if (is.vector(F_test_L)) {
682688
F_test_L <- matrix(F_test_L, nrow = 1)
@@ -760,7 +766,10 @@ if (0) {
760766

761767
if (0) {
762768
# output of the above example
763-
# Version: 2018/10/22
769+
# Version: 2019/2/5
770+
771+
# All the p-values for t-tests are doubled compared to the previous version.
772+
# This is as expected, because we changed from one-sided to two-sided test.
764773

765774

766775
> ### try out the three analysis functions ###
@@ -780,7 +789,7 @@ if (0) {
780789
[1] 1.984984
781790

782791
$p_value
783-
[1] 5.112921e-15
792+
[1] 1.022584e-14
784793

785794
>
786795
> # primary hypothesis 2
@@ -798,7 +807,7 @@ if (0) {
798807
[1] 1.984984
799808

800809
$p_value
801-
[1] 0.3619496
810+
[1] 0.7238992
802811

803812
>
804813
> # exploratory analysis
@@ -818,7 +827,7 @@ if (0) {
818827
[1] 1.985251
819828

820829
$p_value_t
821-
[1] 1.285891e-09 2.581879e-04
830+
[1] 2.571782e-09 5.163757e-04
822831

823832
$test_stat_f
824833
[1] 95.53464
@@ -846,7 +855,7 @@ if (0) {
846855
[1] 1.985251
847856

848857
$p_value_t
849-
[1] 1.285891e-09 2.581879e-04
858+
[1] 2.571782e-09 5.163757e-04
850859

851860
$test_stat_f
852861
[1] 95.53464
@@ -873,7 +882,7 @@ if (0) {
873882
[1] 1.985251
874883

875884
$p_value_t
876-
[1] 1.285891e-09 2.581879e-04
885+
[1] 2.571782e-09 5.163757e-04
877886

878887
$test_stat_f
879888
[1] 40.83885
@@ -905,7 +914,7 @@ if (0) {
905914
[1] 1.984984
906915

907916
$p_value
908-
[1] 0.01703565
917+
[1] 0.03407131
909918

910919
> SARA_primary_hypothesis_2(dta2, control_var = c("Y_lag1", "at_tapcount_lag1"), survey_completion_var = "Y", avail_var = "avail")
911920
$beta
@@ -921,7 +930,7 @@ if (0) {
921930
[1] 1.984984
922931

923932
$p_value
924-
[1] 0.3034204
933+
[1] 0.6068408
925934

926935
> SARA_exploratory_analysis(dta2, control_var = c("Y_lag1", "at_tapcount_lag1"), moderator = "Y_lag1", avail_var = "avail")
927936
$beta
@@ -939,7 +948,7 @@ if (0) {
939948
[1] 1.985251
940949

941950
$p_value_t
942-
[1] 0.1047124 0.2798671
951+
[1] 0.2094249 0.5597343
943952

944953
$test_stat_f
945954
[1] 4.588463
@@ -949,4 +958,4 @@ if (0) {
949958

950959
$p_value_f
951960
[1] 0.01252342
952-
}
961+
}

0 commit comments

Comments
 (0)