Skip to content

Commit b555041

Browse files
committed
Initial commit
0 parents  commit b555041

10 files changed

+12445
-0
lines changed

Diff for: .gitattributes

+2
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
# Auto detect text files and perform LF normalization
2+
* text=auto

Diff for: Alignment_Test.R

+323
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,323 @@
1+
# measurement alignment test with Fisher & Karl (2019)
2+
3+
# package load
4+
library(lavaan)
5+
library(sirt)
6+
library(foreach)
7+
library(parallel)
8+
library(doParallel)
9+
library(psych)
10+
library(MASS)
11+
12+
# set some constants
13+
fits <- c('rmsea.scaled','srmr','cfi.scaled')
14+
var.help <- c('help1','help2','help3','help4','help5','help6','help7')
15+
16+
# data load
17+
data <- read.csv('example.csv')
18+
DATA <- data
19+
20+
# how many countries?
21+
table(data$country)
22+
# eight countries
23+
24+
# sort by countries
25+
data <- data[order(data$country),]
26+
27+
# extract country names
28+
countries <- labels(table(data$country))[[1]]
29+
30+
#####
31+
# first, let's start with MGCFA
32+
# let's focus on help for now for alignment
33+
cfa_model.help<- '
34+
help =~ help1 + help2 + help3 + help4 + help5 + help6 + help7'
35+
36+
# invariance test
37+
# configural invariance
38+
# use WLSMV (ordinal scale)
39+
fit.help.configural <- cfa (cfa_model.help, data, group = 'country',
40+
estimator='WLSMV')
41+
fitMeasures(fit.help.configural)[fits]
42+
#rmsea.scaled srmr cfi.scaled
43+
#0.06855601 0.03272451 0.95127722
44+
45+
# metric invariance
46+
fit.help.metric <- cfa (cfa_model.help, data, group = 'country',
47+
estimator='WLSMV', group.equal='loadings')
48+
fitMeasures(fit.help.metric)[fits]
49+
#rmsea.scaled srmr cfi.scaled
50+
#0.04358127 0.03944788 0.97292655
51+
# changes
52+
fitMeasures(fit.help.metric)[fits]-fitMeasures(fit.help.configural)[fits]
53+
# -0.024974735 0.006723362 0.021649334
54+
# acceptable
55+
56+
# scalar invariance
57+
fit.help.scalar <- cfa (cfa_model.help, data, group = 'country',
58+
estimator='WLSMV', group.equal=c('loadings','intercepts'))
59+
fitMeasures(fit.help.scalar)[fits]
60+
#rmsea.scaled srmr cfi.scaled
61+
#0.05909330 0.04892335 0.93664870
62+
# changes
63+
fitMeasures(fit.help.scalar)[fits]-fitMeasures(fit.help.metric)[fits]
64+
# 0.015512027 0.009475471 -0.036277857
65+
# both rmsea and cfi changes exceeded threshold. alignment necessary
66+
67+
#####
68+
# Then, let's perform measurement alignment
69+
70+
# help
71+
# extract cfa parameters
72+
par.help <- invariance_alignment_cfa_config(dat = data[,var.help],
73+
group = data$country)
74+
# do alignment
75+
# following the suggested threshold values in Fisher & Karl (2019)
76+
mod.help <- invariance.alignment(lambda = par.help$lambda, nu =
77+
par.help$nu, align.scale = c(0.2, 0.4), align.pow = c(0.25, 0.25))
78+
79+
# test performance
80+
mod.help$es.invariance['R2',]
81+
# loadings intercepts
82+
# 0.9979870 0.9996975
83+
# 99% absolbed -> great result
84+
85+
# item-level test
86+
cmod <- invariance_alignment_constraints(mod.help, lambda_parm_tol = .4, nu_parm_tol = .2)
87+
summary(cmod)
88+
# lambda noninvariance item = 0%
89+
# nu noninvariance item = 5.4%
90+
# acceptable
91+
92+
#####
93+
# Monte Carlo simulation
94+
95+
# define function for simulation/iteration
96+
# simulation function
97+
# times: how many time to repeat the same test?
98+
# n: n for each test (e.g., 100, 200, 500)
99+
# data: data to be tested
100+
# model: CFA model
101+
# lv: name of the latent variable e.g., help
102+
# n.include: number of groups. in this case, 8 (countries)
103+
# gruops: names of groups to be tested. e.g., data$country
104+
# items: items (variable names) to be tested. an array
105+
# par: parameters for cfa (created by invariance_alignment_cfa_config)
106+
# seed: random seed
107+
simulation <- function(n,data,model,lv,
108+
n.include,groups,items,par,seed=1){
109+
110+
# create a matrix to return results
111+
# correlation and R2s
112+
cor.mean <- 0
113+
cor.var <- 0
114+
R2.loading <- 0
115+
R2.intercept <- 0
116+
117+
# begin simulation
118+
set.seed(seed)
119+
G <- n.include # number of groups
120+
I <- length(items) # number of items
121+
122+
# lambda, nu, and error_var to be created for simulation
123+
err_var.cle <- matrix(1, nrow=G,ncol=I)
124+
125+
# Create stimulated data
126+
# enter group mu and sigma
127+
data$Y <- rowMeans(data[,items])
128+
mu<-scale(aggregate(x=data$Y,
129+
by = list(groups),
130+
FUN=mean, na.rm=T)[,2])[,1]
131+
sigma <- (aggregate(x=data$Y,
132+
by = list(groups),
133+
FUN=sd, na.rm=T)[,2])
134+
N <- rep(n,G)
135+
136+
# do simulation for data creation
137+
dat <- invariance_alignment_simulate(
138+
par$nu,par$lambda,err_var.cle,mu,sigma,N
139+
)
140+
141+
# do CFA. parameter extraction for alignment
142+
par.simul <- invariance_alignment_cfa_config(dat = dat[,items],
143+
group = dat$group,
144+
estimator = 'WLSMV')
145+
146+
147+
#cfa.test <-cfa(model,dat,estimator='WLSMV',group='group')
148+
#ipars <- parameterEstimates(cfa.test)
149+
150+
# then, do alignment
151+
mod1.simul <- invariance.alignment(lambda = par.simul$lambda, nu =
152+
par.simul$nu, align.scale = c(0.2, 0.4), align.pow = c(0.25, 0.25),
153+
optimizer='nlminb')
154+
155+
# do CFA. scalar invariance model for further calculation
156+
cfa.simul <- cfa(model,dat,estimator='WLSMV',group='group',
157+
group.equal=c('loadings','intercepts'),meanstructure=T)
158+
159+
# get group mean
160+
params.simul <- parameterEstimates(cfa.simul)
161+
alpha.simul <- params.simul[(params.simul$op=='~1')&(params.simul$lhs==lv),'est']
162+
163+
# group mean correlation (Muthen 2018)
164+
correlation <- corr.test(alpha.simul,mod1.simul$pars$alpha0,method='spearman')$r
165+
166+
# get group intercept
167+
psi.simul <- params.simul[(params.simul$op=='~~')&(params.simul$lhs==lv),'est']
168+
correlation.psi <- corr.test(psi.simul,mod1.simul$pars$psi0,method='spearman')$r
169+
170+
cor.mean <- correlation
171+
cor.var <- correlation.psi
172+
173+
# R2. The extent to which non-invariances in loadings and intercepts were absorbed?
174+
# ideally >= 75-80%
175+
R2.loading <- mod1.simul$es.invariance['R2',1]
176+
R2.intercept <- mod1.simul$es.invariance['R2',2]
177+
178+
# make matrix
179+
to.return <-cbind(cor.mean,cor.var,R2.loading,R2.intercept)
180+
to.return <- data.matrix(to.return)
181+
182+
return(to.return)
183+
184+
}
185+
186+
187+
188+
# use five cores
189+
# and repeat each test 500 times
190+
cores <- 5
191+
times <- 500
192+
193+
# create threads to distribute tasks
194+
cl <- parallel::makeCluster(cores,type='FORK')
195+
doParallel::registerDoParallel(cl)
196+
197+
# start simulation with n = 100
198+
# time measure as well
199+
200+
start_100 <-Sys.time()
201+
now <- foreach (i = seq(1,times)) %dopar%{
202+
# do simulation
203+
#simulation <- function(n,data,model,lv,
204+
# n.include,groups,items,par,seed=1)
205+
simulation(100,data,cfa_model.help,'help',
206+
n.include, data$country,var.help,par.help,i)
207+
# message(sprintf('%d',i))
208+
}
209+
end_100<-Sys.time()
210+
elapsed_100 <- end_100 - start_100
211+
# merge result
212+
for (i in 1:times){
213+
if (i == 1){
214+
simulate_100 <- now[[1]]
215+
}else{
216+
simulate_100 <- rbind(simulate_100,now[[i]])
217+
}
218+
}
219+
# save n = 100
220+
write.csv(data.frame(simulate_100),file='simulate_100.csv',row.names = FALSE)
221+
222+
# results for n = 100
223+
print(describe(simulate_100),digits=4)
224+
225+
# do the sam ewith n = 200
226+
start_200 <-Sys.time()
227+
now <- foreach (i = seq(1,times)) %dopar%{
228+
# do simulation
229+
#simulation <- function(n,data,model,lv,
230+
# n.include,groups,items,par,seed=1)
231+
simulation(200,data,cfa_model.help,'help',
232+
n.include, data$country,var.help,par.help,i)
233+
# message(sprintf('%d',i))
234+
}
235+
end_200<-Sys.time()
236+
elapsed_200 <- end_200 - start_200
237+
# merge result
238+
for (i in 1:times){
239+
if (i == 1){
240+
simulate_200 <- now[[1]]
241+
}else{
242+
simulate_200 <- rbind(simulate_200,now[[i]])
243+
}
244+
}
245+
# save n = 200
246+
write.csv(data.frame(simulate_200),file='simulate_200.csv',row.names = FALSE)
247+
print(describe(simulate_200),digits=4)
248+
249+
# then n= 500
250+
start_500 <-Sys.time()
251+
now <- foreach (i = seq(1,times)) %dopar%{
252+
# do simulation
253+
#simulation <- function(n,data,model,lv,
254+
# n.include,groups,items,par,seed=1)
255+
simulation(500,data,cfa_model.help,'help',
256+
n.include, data$country,var.help,par.help,i)
257+
# message(sprintf('%d',i))
258+
}
259+
end_500<-Sys.time()
260+
elapsed_500 <- end_500 - start_500
261+
# merge result
262+
for (i in 1:times){
263+
if (i == 1){
264+
simulate_500 <- now[[1]]
265+
}else{
266+
simulate_500 <- rbind(simulate_500,now[[i]])
267+
}
268+
}
269+
# save n = 500
270+
write.csv(data.frame(simulate_500),file='simulate_500.csv',row.names = FALSE)
271+
print(describe(simulate_500),digits=4)
272+
273+
# terminate multiprocessing
274+
parallel::stopCluster(cl)
275+
276+
# in all cases, cor ≥ 95%. Good
277+
278+
279+
#####
280+
# calculate factor scores based on aligned loadings and intercepts
281+
282+
# function to implement factor score calculation
283+
# from adjusted lambda and nu
284+
# basically, x = lambda*X + nu
285+
# so, X = inv (lambda) (x - nu)
286+
aligned.factor.scores <- function(lambda,nu,y){
287+
#calculate inverse matrix
288+
lambda1 <- ginv((lambda))
289+
#create matrix for nu
290+
ns <- nrow(y)
291+
nus <- matrix(nu,nrow=ns, ncol=length(nu), byrow=T)
292+
# y - nu
293+
y_nu <- y - nu
294+
F <- lambda1 %*% t(as.matrix(y_nu))
295+
}
296+
297+
# calculate score
298+
# do calculation for each country, and then merge
299+
300+
for (i in 1:(n.include)){
301+
if (i == 1){
302+
# first country
303+
# create new matrix
304+
data.aligned <- data[data$country==countries[i],]
305+
# calculate factor score
306+
Fs <- aligned.factor.scores(mod.help$lambda.aligned[i,],
307+
mod.help$nu.aligned[i,],
308+
data[data$country==countries[i],var.help])
309+
data.aligned$help <- t(Fs)
310+
}else{
311+
# other than the first country
312+
# append
313+
current <- data[data$country==countries[i],]
314+
Fs <- aligned.factor.scores(mod.help$lambda.aligned[i,],
315+
mod.help$nu.aligned[i,],
316+
data[data$country==countries[i],var.help])
317+
current$help <- t(Fs)
318+
data.aligned <- rbind(data.aligned,current)
319+
}
320+
}
321+
322+
# save aligned result
323+
write.csv(data.frame(data.aligned),file='aligned.csv',row.names = FALSE)

0 commit comments

Comments
 (0)