27
27
# 2. Modified the output of SARA_exploratory_analysis() to include test results
28
28
# of both t-test and F-test.
29
29
30
+ # ###################################
31
+ # Update by Tianchen Qian, 2018/11/1
32
+ #
33
+ # 1. Implement general F-test for linear combinations in SARA_exploratory_analysis_general_F_test().
30
34
31
35
library(rootSolve ) # for solver function multiroot()
32
36
@@ -207,7 +211,6 @@ binary_outcome_moderated_effect <- function(
207
211
exp_Zalpha <- as.vector(exp(Zalpha ))
208
212
exp_negAXbeta <- as.vector(exp(- A [it ] * Xbeta ))
209
213
exp_2Zalpha <- as.vector(exp(2 * Zalpha ))
210
- # browser()
211
214
Mn_summand [it , 1 : p , 1 : p ] <-
212
215
- as.numeric(Y [it ] * exp_negAXbeta * A [it ] * cA [it ]) * (Xdm [it , ] %o% Xdm [it , ]) * avail [it ]
213
216
Mn_summand [it , 1 : p , (p + 1 ): (p + q )] <-
@@ -243,7 +246,7 @@ binary_outcome_moderated_effect <- function(
243
246
}
244
247
Sigman <- Sigman / sample_size
245
248
246
- # Compute the asymptotic variance matrix
249
+ # Compute the asymptotic variance matrix ( this is on the scale of \sqrt{n}(\hat{\beta} - \beta) )
247
250
248
251
asymp_varcov <- Mn_inv %*% Sigman %*% t(Mn_inv )
249
252
asymp_var <- diag(asymp_varcov )
@@ -331,6 +334,7 @@ binary_outcome_moderated_effect <- function(
331
334
test_stat <- as.numeric( t(beta_root ) %*% solve(asymp_varcov_ssa [1 : p , 1 : p ] / sample_size ) %*% beta_root )
332
335
n <- sample_size
333
336
critical_value <- qf((n - q - p ) * (1 - significance_level ) / (p * (n - q - 1 )), df1 = p , df2 = n - q - p )
337
+ # browser()
334
338
p_val <- pf(test_stat , df1 = p , df2 = n - q - p , lower.tail = FALSE )
335
339
test_result_f <- list (test_stat = test_stat ,
336
340
critical_value = critical_value ,
@@ -350,7 +354,8 @@ binary_outcome_moderated_effect <- function(
350
354
test_result_f = test_result_f ,
351
355
varcov = asymp_varcov / sample_size ,
352
356
varcov_ssa = asymp_varcov_ssa / sample_size ,
353
- dims = list (p = p , q = q )))
357
+ dims = list (p = p , q = q ),
358
+ sample_size = sample_size ))
354
359
}
355
360
356
361
@@ -566,6 +571,8 @@ SARA_exploratory_analysis <- function(
566
571
# # critical_value_f........critical value for F-test with the input significance level
567
572
# # p_value_f...............p-value for F-test
568
573
574
+ # # Note: all tests are using standard error estimates with small sample correction
575
+
569
576
570
577
# make sure dta is sorted by id_var then day_var
571
578
dta <- dta [order(dta [, id_var ], dta [, day_var ]), ]
@@ -591,7 +598,116 @@ SARA_exploratory_analysis <- function(
591
598
}
592
599
593
600
601
+ # # In a future update, it may be desirable to merge SARA_exploratory_analysis_general_F_test()
602
+ # # with SARA_exploratory_analysis().
603
+ SARA_exploratory_analysis_general_F_test <- function (
604
+ dta ,
605
+ control_var ,
606
+ moderator ,
607
+ id_var = " userid" ,
608
+ day_var = " Day" ,
609
+ trt_var = " A" ,
610
+ outcome_var = " Y" ,
611
+ avail_var = NULL ,
612
+ prob_treatment = 1 / 2 ,
613
+ significance_level = 0.025 ,
614
+ F_test_L ,
615
+ F_test_c = NULL
616
+ ) {
617
+ # ############# description ###############
618
+ # #
619
+ # # This function does exploratory analysis for SARA:
620
+ # # it estimates moderated treatment effect, and test for no treatment by using an F-test.
621
+ # #
622
+ # # For more details, refer to the writeup for SARA analysis.
623
+
624
+ # ############# arguments ###############
625
+ # #
626
+ # # dta.............the data set in long format
627
+ # # control_var...........vector of variable names used to reduce noise (Z in the model),
628
+ # # could be NULL (no control covariates)
629
+ # # moderator.............vector of variable names as effect modifiers (X in the model),
630
+ # # could be NULL (no effect modifier)
631
+ # # id_var................variable name for subject id (to distinguish between subjects in dta)
632
+ # # day_var...............variable name for day in study (from 1 to max_days)
633
+ # # trt_var...............variable name for treatment indicator
634
+ # # outcome_var...........variable name for outcome variable
635
+ # # avail_var.............variable name for availability variable
636
+ # # NULL (default) means always-available
637
+ # # prob_treatment........probability of treatment (default to 1/2)
638
+ # # significance_level....significance level for the hypothesis testing (default to 0.025)
639
+ # # F_test_L, F_test_c....test for H_0: F_test_L %*% beta_hat = F_test_c,
640
+ # # where dim(beta) = p * 1, dim(F_test_L) = p1 * p, dim(F_test_c) = p1 * 1.
641
+ # # If F_test_L is passed in as a vector, it will be treated as a row vector.
642
+ # # If F_test_c is unspecified, it will be default to 0.
643
+
644
+ # ############# return value ###############
645
+ # #
646
+ # # This function returns a list of the following components:
647
+ # #
648
+ # # beta..................estimated beta (moderated treatment effect)
649
+ # # beta_se...............standard error for beta, with small sample correction
650
+ # # test_stat_t.............(one sided) t-test statsitic for testing beta = 0
651
+ # # critical_value_t........(one sided) critical value for t-test with the input significance level
652
+ # # p_value_t...............(one sided) p-value for t-test
653
+ # # test_stat_f.............F-test statsitic for testing F_test_L %*% beta_hat = F_test_c
654
+ # # critical_value_f........critical value for F-test with the input significance level
655
+ # # p_value_f...............p-value for F-test
656
+
657
+ # # Note: all tests are using standard error estimates with small sample correction
658
+
659
+
660
+ # make sure dta is sorted by id_var then day_var
661
+ dta <- dta [order(dta [, id_var ], dta [, day_var ]), ]
662
+
663
+ result <- binary_outcome_moderated_effect(dta = dta ,
664
+ control_var = control_var ,
665
+ moderator = moderator ,
666
+ id_var = id_var ,
667
+ trt_var = trt_var ,
668
+ outcome_var = outcome_var ,
669
+ avail_var = avail_var ,
670
+ prob_treatment = prob_treatment ,
671
+ significance_level = significance_level )
594
672
673
+ n <- result $ sample_size
674
+ q <- length(result $ alpha_hat )
675
+
676
+ beta_hat <- result $ beta_hat
677
+ p <- length(beta_hat )
678
+ beta_hat <- matrix (beta_hat , ncol = 1 )
679
+ varcov_beta_hat <- result $ varcov_ssa [1 : p , 1 : p ]
680
+ # # general F test for F_test_L %*% beta_hat = F_test_c ##
681
+ if (is.vector(F_test_L )) {
682
+ F_test_L <- matrix (F_test_L , nrow = 1 )
683
+ }
684
+ p1 <- dim(F_test_L )[1 ]
685
+ if (is.null(F_test_c )) {
686
+ F_test_c <- matrix (rep(0 , p1 ), ncol = 1 )
687
+ }
688
+ if (dim(F_test_L )[1 ] != dim(F_test_c )[1 ]) {
689
+ stop(" The dimensions of F_test_L and F_test_c are not coherent." )
690
+ }
691
+
692
+ tmp <- F_test_L %*% beta_hat - F_test_c
693
+ test_stat_f <- (t(tmp ) %*% solve(F_test_L %*% varcov_beta_hat %*% t(F_test_L )) %*% tmp )
694
+ # test statistic is computed in the same manner as in Liao et al. (2016)
695
+ test_stat_f <- as.numeric(test_stat_f )
696
+ critical_value_f <- qf((n - q - p1 ) * (1 - significance_level ) / (p1 * (n - q - 1 )), df1 = p1 , df2 = n - q - p )
697
+ # critical value is computed as in Section 5 of Boruvka et al. (2018)
698
+ p_value_f <- pf(test_stat_f , df1 = p1 , df2 = n - q - p , lower.tail = FALSE )
699
+
700
+
701
+ output <- list (beta = result $ beta_hat ,
702
+ beta_se = result $ beta_se_ssa ,
703
+ test_stat_t = as.numeric(result $ test_result_t $ test_stat ),
704
+ critical_value_t = result $ test_result_t $ critical_value ,
705
+ p_value_t = as.numeric(result $ test_result_t $ p_value ),
706
+ test_stat_f = test_stat_f ,
707
+ critical_value_f = critical_value_f ,
708
+ p_value_f = p_value_f )
709
+ return (output )
710
+ }
595
711
596
712
597
713
@@ -624,6 +740,11 @@ if (0) {
624
740
# exploratory analysis
625
741
SARA_exploratory_analysis(dta , control_var = c(" Y_lag1" , " at_tapcount_lag1" ), moderator = " Y_lag1" )
626
742
743
+ # this will give the same F-test result as SARA_exploratory_analysis()
744
+ SARA_exploratory_analysis_general_F_test(dta , control_var = c(" Y_lag1" , " at_tapcount_lag1" ), moderator = " Y_lag1" , F_test_L = diag(2 ))
745
+
746
+ SARA_exploratory_analysis_general_F_test(dta , control_var = c(" Y_lag1" , " at_tapcount_lag1" ), moderator = " Y_lag1" , F_test_L = rep(1 , 2 ))
747
+
627
748
628
749
# ## create fake availability indicator, and try the three analysis functions with availability ###
629
750
set.seed(123 )
@@ -680,7 +801,7 @@ if (0) {
680
801
[1 ] 0.3619496
681
802
682
803
>
683
- > # exploratory analysis
804
+ > # exploratory analysis
684
805
> SARA_exploratory_analysis(dta , control_var = c(" Y_lag1" , " at_tapcount_lag1" ), moderator = " Y_lag1" )
685
806
$ beta
686
807
Intercept Y_lag1
@@ -708,6 +829,61 @@ if (0) {
708
829
$ p_value_f
709
830
[1 ] 1.81756e-23
710
831
832
+ > # this will give the same F-test result as SARA_exploratory_analysis()
833
+ > SARA_exploratory_analysis_general_F_test(dta , control_var = c(" Y_lag1" , " at_tapcount_lag1" ), moderator = " Y_lag1" , F_test_L = diag(2 ))
834
+ $ beta
835
+ Intercept Y_lag1
836
+ 0.3493801 - 0.2138751
837
+
838
+ $ beta_se
839
+ Intercept Y_lag1
840
+ 0.05310572 0.05948923
841
+
842
+ $ test_stat_t
843
+ [1 ] 6.578955 - 3.595191
844
+
845
+ $ critical_value_t
846
+ [1 ] 1.985251
847
+
848
+ $ p_value_t
849
+ [1 ] 1.285891e-09 2.581879e-04
850
+
851
+ $ test_stat_f
852
+ [1 ] 95.53464
853
+
854
+ $ critical_value_f
855
+ [1 ] 0.6631817
856
+
857
+ $ p_value_f
858
+ [1 ] 1.81756e-23
859
+
860
+ > SARA_exploratory_analysis_general_F_test(dta , control_var = c(" Y_lag1" , " at_tapcount_lag1" ), moderator = " Y_lag1" , F_test_L = rep(1 , 2 ))
861
+ $ beta
862
+ Intercept Y_lag1
863
+ 0.3493801 - 0.2138751
864
+
865
+ $ beta_se
866
+ Intercept Y_lag1
867
+ 0.05310572 0.05948923
868
+
869
+ $ test_stat_t
870
+ [1 ] 6.578955 - 3.595191
871
+
872
+ $ critical_value_t
873
+ [1 ] 1.985251
874
+
875
+ $ p_value_t
876
+ [1 ] 1.285891e-09 2.581879e-04
877
+
878
+ $ test_stat_f
879
+ [1 ] 40.83885
880
+
881
+ $ critical_value_f
882
+ [1 ] 5.186928
883
+
884
+ $ p_value_f
885
+ [1 ] 6.113323e-09
886
+
711
887
>
712
888
>
713
889
> # ## create fake availability indicator, and try the three analysis functions with availability ###
0 commit comments