From 50508838721383f1886425737861cab9c0046931 Mon Sep 17 00:00:00 2001 From: zackbatist Date: Fri, 14 Feb 2025 14:55:44 -0500 Subject: [PATCH] Built site for gh-pages --- .nojekyll | 2 +- ethics-protocol.pdf | Bin 51458 -> 51465 bytes listings.json | 17 +- notes.html | 4 +- notes/methodology-notes.html | 48 +- posts.html | 27 +- posts.xml | 21 +- posts/weeknotes-2025-W07.html | 820 ++++++++++++++++++++++++++++++++++ research-protocol.pdf | Bin 70379 -> 70374 bytes search.json | 569 +++++++++++------------ sitemap.xml | 80 ++-- 11 files changed, 1240 insertions(+), 348 deletions(-) create mode 100644 posts/weeknotes-2025-W07.html diff --git a/.nojekyll b/.nojekyll index a15b57f..9f23ca0 100644 --- a/.nojekyll +++ b/.nojekyll @@ -1 +1 @@ -8c7c0315 \ No newline at end of file +b5089ff3 \ No newline at end of file diff --git a/ethics-protocol.pdf b/ethics-protocol.pdf index 3b39d0c5ac9401c81dfef2ac016594ab5c86412d..a81053502e9fbe2830f7618c4125728c479b1363 100644 GIT binary patch delta 6461 zcmai2Ra6uVu$G2p=}?wNy3>`A?k?%>Mrvu6Mi!A0q@}wT%=P%Sh~CJfB$n| z{-^u)oip?D#hE!X;mK&R$!K+DN+<#;ynnU+72~3Llx#7{F(#M8w>}EtTqd~dwu4r- zze9{PU6(sg#kbcj@Ue}mdmOiOtSI7*dTq>qIj667a^CA`U@)z~6;1-zq{}U|TE*x- z9*g}^6wRsprmeC56ac*b5k*U>48$QMkfe%KBnsu9?9u#FsEobwwvkZ^)uvK0vARL9 z=>&L6YE$_m4BY|GD4ci_u;Q9~XaQxza0Nz9vhfo>MA0}{DjJ115Uyf;QBu)l_8=*g zlxf8Y3n5-D9*gjG6#ZEp`5FgG{CNvt^FFQ=+IG7Xwtu;m;$@y0^PNZJ-K}8h1Yh-$ zQ9^`u^unht!FTZmJ3o9~!L}L4FC{<&8vt838!vB9TT9pfMebHk0O7ZEDT*pg|2W~M zLJ5tZb)yS!CXMu=ZnU*_cN8nFU-Uklk*zpIl(?drH^@x8s5YvXAKWjtF38g>tY zik)#~Bn)v50M%1wgND^xt%SJ^B>m@cg}lcFXxlq$66U3Qw}&?izS1#4k2r_kB#>ZX z+MdXnNzKb!x6sq5kzgNMd8G2n1*h7(e&~Z?Ni<@0fxY&gL#%CnXx1XtZDCJa_fa*kDqU{Q(k9NL9$E=lr)uZkS{6f+`r_`y6Kn2Cc0skkb z-=b;Cic^myVbt+CX`mxpGHqiAIb)tyxu&!81&S5_V(3DoklD?Z+7*x-r1)oPbTnfj z?pO8ZG|!8FP#ehe!9$ zmrr-~D_|eD9*Z9i(3-zDeg-khU%aDt*AdAd)4?ML9{Xjh>0Ynx1T?UrYYK3mD|-3! z24Td=hwqI{3&SSAkm+_uj2$h9sZRVk*gqIO)}^=f1esPAf6!%R;rch`L5cZ{uQfK3 z*q$E~n3mOdHdtCB{9BP@5p?(@3gxdcV9a^g=qf@e-JbkdP%i4rn9q72DI#W1(9{?b z(H#>}>%%cUzOa2P4BdDUprVOA0w(0(?W5bG;Fv@e#Gv2z`^-v};$9Cli;X+Yr086ExlDJZcY*o(P?mB^joe;K&6vvoH+_$m@3g)x z5pGG*8MVN7b4{*`-6Ne{5aRu*l==daln!-PDEL1{elc}k;w6nfe&q08;8S)PNd{p@ zGk#XcGgbKA1KFKdwkxwOeEahDy73ZT#4CfEARa_RP5rG8Vt!yM3hU!po5K=?>Fk5t zc^+rx^r#Noe6*5bCDjVrH?gPJR0K=C6pgU%lo#AeiY_^jX=rfk{k_;q@)?7kM=yOYy>tJ& z^LNP3h-Sbr8EX-9Di12yX97r0e_hSTBIOsyyw2r`$BwEN5eAw68m4@JHiYg4h(u;a zBF}J&>a`<)ddazU$@gh=@_EfDh)h`l`8*2NIFD#l*$JkPxkbRpM*EX#NXWN4402P8_M=+ldRQUsY8 z-r7*0y_k(=zM0@C5XTO1Szu|W_``8awO4iHv*0MBo6&FJso*H7o6+Vf9?nAQrx(sj zFAX@RMg{+FtSHW)hM7Rd|zvBMFxTv0(+ zwMZguC8bC;FWwU(sR)d3V${OMdV({?T}?)C`HNWxX6w4HIQ^x+^_1tm;AQ8c=C<=e zwfw2%C@qkvd{YQB99>Q>+H37Rqo8jKx}IwfJTzE9TpZ4VFVGl26DW5XMjn(cWxB=< zw9OQTiLtyGny?f1ICYEAVH*qOLAm!P&Y2>~b|Z_!Hm>g6{In>e3}g6Qn2;5j)M~&z z>G6(!=yy}-4@W?_(bGOP%#yTg`Ec}nd~{U>)P@Dp?OywxYml>a-OnPcVA(cx^N=F+ zrp)RArTm?%!oPGAbkwlC0KikP)0xIi9exyaB^gihe=D5tS)l^|85f#r&b_?A&U=02 zJfCtVRl5py6He66#dv1h1LVB2Q<;Qjw#?I`75IWT3>XliF{PtD``-0S%vM&F!=y~@ zCq}A6S=9VqDT*{Zy3`YAGV}a4YbF2fqaGn??Z*c*{KNa5TPGp#{UKS*ZntBgv&Jrz zhVe?nQ&cq4rFy?2Lm{F4O=pyt{fi+6ox)OzH&~9MLAdiTl^AU?=a+|>8OFAj(S$$* zqI#tDrgzKg2C;>ksIgfnhH&G8(1#}3?#V&ub7jSTUyhCiZ33?7$W+3h*=Zhbh&iNB zwn;TXAr{^P82+>GHr}2GyTyvh^St<12Wvf6sPKPuNHKPf_-FsEJ(At0YlmO2pod$y z(Fv*zMQzU@(*Yi-@&1EdwMP_pn%zHJM7_4(X17+lAT> z>F!Jtc6#IG=_Rs!$a@CX*F)4py||~~Zf2F7c9^=_-`L4EG=0a+yfcHyrTS*-TN>@n zBgUdsl}+F)XFOFng3I3qPij1gW+93@JYmG&JzEQAML?=y-@fWB;VB&&)Bch%WU5C7||I ztF4xnSKvCDw0HT$Wn0i=MXlYRJ8NiPNWh21HTTl37sMT@L0MeV?dugT+lY0|-Eqk@ zzEFpxas&d{Q7Fg-x4l^aD=`)a&acEVt7Z-@V&Ruj|KgnJZMX4Z`Sc6n8TQL3fiv#w zFuf>Os=)Nw8nnG)QT@U5?BLqla^TcBInevYL<`hJIpTi0`)&Q7(D~u80N6U|i%N}g`T{5! z&d}K1wvGaQ_RO*G_`O>-I_@U^<>Jao?9u%@I4j}IKfyA!KIDpDbolBt+38_!0>XE~ z8OSI4t#b(9xBr}A1>s^)G*=j>4w#wAKAA*aXP&Kkbv&`{a!-0tv;wu4Nz@9XE|`ViJA z0#r~^|CT|PxYtZK$Gd!Mw6eSTVo8F-VyJ_n9ovm99|575t zrp@v^WU+}8>$86f$oYvPIMG8#xqXr2%OF(Gf;Jh|zMSx`UKfNnEOqU~i*cZT+hk&j z8Hq~#(T?+dG=yhR*ra5G_MBi2z?b}Bts@^snfUfThkVQT&C6Jc=E0iieNtUg)l$Xj zdDJ>xj%VFUUzsLDPNc{|ke7`CW+m4Cy;G17ciifDx6AxH4y^NrMz8p7U7x#!{ch-c z_*7AERXy6VLrV||wTt0P_Xg|vGc}*O((a{d3Yn~$lJk+>ih$Q&iB$*6{$?G%>J0Lt zwoY@0TOUNX!P2vz@Tz@br>t1liaUMT@eql;aVt09wiU0O} zi&@?4z~_5fZ{;NH1M!SiRp#KPlT>C-PQoI$#<4dMHwj!GGbQ>1INy4v0!;4f-pkeX z_CyC1GS45CzHMl72&c|)`z`z0*)<&>ODL9E3yia}I;(42t+s-&TUsLRC_GHbhI2p@ zMwFfo7`j?5RB`)(4fIyLjeg%R4z$?66jpF096Q(z&aAIYAHTBat`Kinc=q4?lSsX6 zBtlAw$8aDB{vmw-ZU#S^nUA|x zz_@JVk}KFvtGJ#N z)U)*%xjRNPx1HO2e9^ibDGycssxzFRE@eU=t+*yU!x_Dc{e-LGr`*g= z0g+$~HU#{4AWA!+PL+gyz_O0`J&n%DgMt?E01Pq4`|UW0CKzO%zzwYtXMrAJGn49! zJ}vCZ1`>e@Fm2&pCnTAIBqkokNy;6n&p+o0a#dsX7ai@FY5ADd zYpCd~(MqxFNS-JA5@&rvSVZ<7-jCjcc_K*g$8ZH)fVHP7`WZ+ZHaVFN_sA#l<{u?a z)Wx9-i&jL27lKIhB433k*ND=|BW%OM>A8Vt*|HWWBk-67y*~BKL^4ZG`-+%LB21!1 z#T6+iP2=s*m|YG;qv8g@!*o(Dp%|BrEQ^<5 z9mWhQTBrRbJp}BGmY|eCCz6&Dw4x3c`Rd(gZ7TUuu7bXh(nhy}sb+of7}-z>lI5q` zNBAglW!mANQRN@uu!DeQifIhQ;R#!SJZh0U2PrN(C?P>zk`JRzzE`$D^83kR^e6k+ zv0p|3CHO@==JWXdQ^CoffXq?!FHq>qNMCV=UtOsF>Ys$oXI9qfauFzUw42w(z92yI z{E1TNo_-X1Uu8C&;x|Ihi|{;tRN0# zR)^G)fs1|tZC--XAgZtCBI$qc0*K+7%)mfnXmWhv;sIg8rizGfGV(Cb3PlcJtojsykW@H#b*dTgBT?TW<@w1<=C#Kx$Xbq+gcYEHiF&kbeYQ--BwG{R}dR@Nu zRJN%%8R3)D#T|C)-MB{cW2$`Bj0oREXc1W#rdF#76Kpk{)%C1}DbDnD+)^&*083Zy z(}QRJXbJ|Io0~x2JJzxfGxB7wuL zB{xDPH-{N#PkpP$EVZCaF;j!`qqH=eSaTJ)aMjIhT|rO^2R|cSB$-@SgWpyibP8r3 z!4_W2%M`NH#-g*h<*t&Jev#?YE&L&{{bA?g@n~d))_h&?ZSnbXP=FY#ggBGqwChxQ zOrm`K>$hJa@wxo84c2`hWvzxRBvg>YB3}1drj_F|x7YvmDy=nS&#Qk?;@KFr208 z)(GFOuGM$7XrcbY#gO|*I$Ce(4`(4NYSvjZcoLfe?7!?E(@)#*;M)LW=sTK0*Uwm7 z8!cu=A;FHj?uy%i$E&nC(~CDOTd*I&vWWyD4FAYhrQa$oae|?bp4N%4fPP)p_5%Zs zslT;{Bjx5L?LgGUrT;5h)$RMv@cyRxXQ*&lvxmk|>QW5)ZJtcH>+{JUy>aZtX_M0g zTK&a8&YrfHp>!?6bWI=E@G?0dIUJXm8_&mQZ;=0948Qw9(t^CgyaGZ3*8Boi!j^9Y z1%&y)mR7b_*0xpxmUe=Cmf~XnUq=ehfWm+OpG*j_fUMT5nA39rM{V}`a8AfFt1FxW;vvP()pDU25lIEBN+2Y!2LXH2Rh++L}fj?W*%8jNdT0=9Sx=3 zrh5^`OTbZwjw)}q(yzH+2cWD&Gp@su`bd~k_4Rc=o^&07mQ@FI?zaOnaoRKI4f4x! z-uNpd`9Sr!0W!?*V+6V)seP`QQg}$6%0P68N;C>Z>r@761V5xufb1+EUAY^@HWBl$ zCQ7O%#!MnUh9r0t>_Zskoz2E* z!0Xz9o@={xXfw9$TTZ){OML~Kqs2^I^|-#^INPr@sw2bYvt^O|&2~)Q1wza*=0>=l z(=|feI|a?0J6T2~#PCU@l#_(nqGf7l*O8T&G<_QGX2{=loe>s|1S71o>>?p1o-U)T dvnSV4iGiAT)G3RDGyq}VHvlFkISqNh{{Y78d$9lj delta 6436 zcmai2RZtv?vV`Dni@Q6)J-E9DSafkGEU;K$aRMYk77gwW!C?syAh^3jaDrQKdFP&b zKllB-zpk43>7K5c>WN803QI!zUZ#pD2rE~mhjNO}S7)B|(p0ZQ@;l8o)6A zm`pMhiXKh$YuGuD8_f^(6-0MtD%CsK2PPxBN}%M^$k+z1SuJaB*5kBfW6;!}p;ED+ z&rUuw0aR21qxMXtf1CjC_BveJ*+)Kh=>P`~FzwuJy}ZG8Ah-V@4{K*kF;SS3ItwB{ z%uSsVS~%}cC+A{>KVLlyIpY({*0K5YFzF%f#TsNx7rL?b3~PUS{vis0Q)fV%JGel$ zK9wnv`MFid?`3D4DR^|CmHD z{~j*&K_ZGp>ZVWDGOQ!48y(~Q0qPIIbff<1?H~MjjSp%2d_xt$7tEt!f2lMy*&YF9 z&?i8&Lg28X$`qwuqiifv(YcHQd?%ZYewopYjdThNV=DDD3q^%*>MKVYRR@jk+ZAs1 zX?tY_INMM9G^f?p@zPRFa6RNjP>|^4gY<&4--BDgwn6JF0?Sw`2=-k=94aIwh55f? z`YoHoR-Aie@T1PI0D(^IN#RRtBusf1Av&gfYPB}}%OOhCH_u%4ZD(V=?9|j-_S~@uS>Xo#Sd6;w6$f@Y6|-A%a^0_P1Zq|9HvR;+ zSab!u-fCuN@{&ztLS4k9Xx`@&GG~JV{2G}U_Qn_jSH&wt_EtarnI-DkxJ~rP4*E1z z<*IefpB1!}(Pj zVDjtB0HJ%S*9B~)TQci*PZmEqrf@d#jUtf_a#-=V5G%2aP$`6#zfdrNV-iXgcnAg};7YVi8$tkp^TI)F7+k^VAD~h7Ggv3#Kpy&T zJPCu?$y>2O_6@wi1krq|>gTV&`3yMkzQhV{Bn8~ay?xcv6UmW`?#;tJ`ny`QW;R?fFoc@9#G@7(iol=2t|aDA5FtNbBSPZEw=wN$z=1mLS8`B*)wrl7Kpr}pGluh z&Xf}Xfu#Kb+{`;K6ZEr{cN}zCjy+C?5(E7%H~qn2$+(lw#dZiFPP(%e4+7Yq7h1P? z%&e^zT~a?PU?SEujW0U4^qs@!ZuXcvuRN?W1`HHDwclS{ophXWK+}G`J-Y!vRy>Vd z6lyd&9+Ou}pN12&`&&|-E=)lJeNX$Gi81QZo0}GX&z4~k4$|Dva@H@RTB>s+~^ePXvx`GL0-#p9vgMhpn^n5vIH_3Ru~ZHjl|A|DqyKU}p^eE>{(_ zLH?GQFyV?OCnbb2PdG&^*+7V_tr=GOYmH;aYZ)JXKC0*g5SnGaiOtUVQ(Vy-MZ4Gc za-AHH+@OpUM?gL-L$1Ryot)5vLTrW?*2;(@5R*ACC@+a45ScmO>MJh?VP^aZoJN_) z{9BM{j>O-nBq{IGN(O|b%kGT+)Tehq&54TO&?&=5a*?-g+M*f#=JQ%>#SxOOx>ft5rTZ7Hh9?$wN&L3^Og;x9;>n!F3*&gAC?6-1HBKd zbeOou1M1%759kt%fD_bOtK(YITS2O8;Hxp#h$8W9XywGh+fzUju%I}#edPshV0ZlF zB|yynjLy@~k*892u%%PX`{?+Cv*V-=0GsZDUr3_8r^Y%7K9ClYU6otm7_ruL$}r8{ zzv?LmPN=W{yj>ryy2(8nh1Hm)WUF{s8L@RxX{&O%@mO+*=D9uJAU!QIF@DAInkhjuOEygQ}~%~^+CD;(V{Qc#Bk2Xcy`k~QkmIJEo4Om3Iq z3}NX2qdL?~w0a1DzCA&)7@;|ff5XjlCC1^Bfo)ltPt*Wh-(i0r^gfFg<<4=$(s^u> z;Ut8~Tzj`IswYge64!Wbg2zkD4q-+r9wBXX%dGaz2o@Qo3XU%$XCkVVA0ToCr1lDW z_9iHHGX}_C9u`)JpXs2H8|k%_4SqKMTc?f&icq{j@I~-SW=;ZC-|q77ZS8S*nc4+f z8KaxhzjPDyHPNmh*jJv|GYvcX{D{cP$6(@rE8K=`P`>|K`#RmR;k@`A`<#1=#emA_ zQ^klm2J{t(Zu@QeXJH)}Z+|$nizS5yV)f?ga((niOzfeV(=OcuJtKwpBquips{l;q z;DgS$J`5G+)%TG`pI7V4T0aEtGRh?{wA4SZK`osHz(L}(qNCbc(SfG@tO?M?uE7&) zf~4x2@iBfeD$X@Z6@66vYJMRxpw%t2W;88Ld!m9~K*(B$0&e~0EIyiP{!|}l9n07F)TmqQ=dp4-(f9U`li~Z;PMV$G^iov;(Udd4~e&x!@ z_>val;lt)}(pAZ}yIVGFD=xl=qHk#U`V_OTX?BkJ2%z5%cpJ)ww+h3BuMcuc7(DY$ z3#p~?ee`iOZd#m2);&d{>t8uc2~X))O79=MprR1B^0X_)wVYe%^yozCXj(xDXRLN@ zj>69@kGKV!JmG}}t*ZB5@lO}$B~h;k8yZh}uKBWqGQVjQdRRl&0XIc{H_zF?>G1O` zraG|Hs-d}Nd}q~Q@`KX&FlDYnRsDr3)%4%4N1cKbaCNY{K+0og-Rf0Qt_C0?4q5ut8sv(KRGWo?dRwe-HD!@j|V`_BV*EKm&Nqekkk$}wga(W@_Jxh=GP-%AeW#c z=MwA<2{|H$9evubW9d{mlq zb*XIp)lUn0MR>NF_WRd*62_`fs%eVD=3nDzgVW2zzmkkc&|ZVMmv$y2iASC7GnUHO zCW_zV7SQq+D4}IfCw&kk!iXRVEH+6BGw5++`(gb%RZ+3;gp?M)prTxh_G}_|ZVub5 z>!fy3+gIz+)JqkYuFiC5%#~moCv87~h8nl{6x)^@=@Ur?1@+i>3ALFmJ6LWIOhFGG zy;fb;Ta;7Pu^qB3J9;TurT*TxM}>-;RK@Ieu->nns(wUIOrHDoxwNw4~!9mad4dWEBafOf|f$Jf&Wl?86t4DK-vDW2o2q>ivbi*Jqd~=cH{3J38;X4<6bnbrSpdA{m$+KY~b=FR0E;!oZ@*En50ntD5xSveqL%)QZLMSJ5m%D7Q8-tJ1vz z$BUsF!YMFPq!t=~;IS`nVnHlzyY#BG{n|;JGE<;YdUd7P4E2UAN}B+fuaD*^JYDn^ zu~Ve}TGb^_V!%DQ^-Q|2znwOYmbA9nTo$8f0L_k%G@MUI_ymgax1Y*G4&gj1|tIL>^m&Wj>xSv(&z zH}iTu>w49CJ3ux0RGS)$M6?22eykpiulYo}ip#De2WSW=U} z?>_8)=*+Uv8nOQF^_}cidt|Eq_{1z<%&-G=b94aZw{)2X%`b-DHkx+C&@@+#Wge3^ z2k5^$b~=aH^iP&l{&{kSSF8`FAkPRm|FsWPs$DmmNT16*BX0-QFtC(D<&Uzh#fQ4z zO}Aw<%()j(YkoO4X|^Z~Os!p5q?tD;GZ|_I!)SckBWM=RC=qLlhb!W~*f&ea)wCIw z2h2m)D+J#ixf{ZB{&F`3XK2NSVpPl!L}OBhlptx%x#1rV20uRIi5IINLvlxJ9UAC? zcR(T%5_G+mB}k_1(?Dvg^UTq9gf__OT}|L3oM90JlJC$Za=q23G7%T-{;J{YRFK#t ztPvglO#xPx8FA|FQVFiqT${3QUjeiip1|@tIicDkCNIa5($WqZSpC@b^wzu$e$Cg% zx*W-{3hwwbNBhCK&9&JxGKcX>srt27|D)dt)T_qg07_+bTLG zffmvpI`^(Zx?trqlqiqAET?4t!}=dK)~*i#QXU z&sKMF7VI=ekfLYobmw3E#WYw|BqS!I>C8LaK6ebtAVygx3d@Q(X(zbu(d6){a z9e>eE82aWE#O!LZ)kO9w{r%QUc^;mSE)*8}fwD*qqFo+r?i{!_Zxc7nFohoT_$@(! zW5k=Yt+6p7v+3O09E)8TPvn+rJ?A;c8BHEWmgb>-EWwg0!UEB(H_*+C#Wf-zDHd*3 z%6P;u3Rn|zc3D^~`^8aMscA4pZE*1rB_&ce4aDRIT?Y4D+sC_jO>FuGn7PX_LQ6)N z?E4ie+mY`?nRrrT-BX|(hEYqNQW;-wNOF7X`X)yK6B&YLbZ7K4i6o?U8k#!k;GDij^w`3BXjSoVvSw zO~X(Cvu9PhS^U-RciY z8HWdn{>~#sM+@`at2KPvN;i_AajZq!w=DiQC4-jcX$^mHSa|Hm#jkhMp|bL zI#&ekD}pFp`1zXF7;Y4($)J#(e-fo$q!nHJDh6&*e0}=L(_SIV)LVyWo;F+Hv`05VaF~-<&>a$M%g#W5 zr&5EkoL?AEGW+HY^a6p>L|Kj^dpj$!DvXEuTw2|gWqw+~hc3w9MuJTJR+XPW9T0_p z9FeDI^&NeAMu31)p3~Z;rbn+ylCJt7xSQpTWL{SY?Kc?-1GEV4L_L(l{5&6Zp-r0D z(*+F#Vs7LLLeO);^kHbBoola#f~Ys*<_9@!BK&{@r3BePXdm)i!E7SPOtDO*10fR+ zEQ;sjNv+Rbki(0kp3sBlAe`LGvOnpYy{EtUGKJWok2G?lq(}pVzD`UGutslmNlHpDrDPJJB(&t%1oS6B6yPzQ-SF9R+7jGxvStQ^-yqfWfDr)XGYe zJGl(rV>eZ(@K@*U-D}JYj@1xdrgwg5i^exRtKX|VMCO%k+xtXqjqMKxNpl`oD~|!d zj$%r7hj#CT;c!FXV0KEcQrOr34^eX@l8xpUJ8z!px7N z%qpBH5iNMM?}O06GwD+N)rIvNW!q{v`hqPo3JK^f(JhffoX8b>z{Yp~bIz!w_4%gX@W$x3pe&Hw#h1lJlzwaGe*@rnrVn)SG zjzJQ1j&GeH-C<>(faAP+@*ZK6V{wxw65A!VkESJerWzM30=vJ*x;H%nxixcmstkP9 zIqQGbGFwu=|L9<>+c`!|PKe(VTVQD-2{!%%{Z-os=w3)C?7CWov72AEFso|>xHbQ# zYJF!8vCMv|d%U`R>q(I{#$jGdwMF2Gbdn?7l=p zr}z^$IbHrP8efc@uB5elbV*CCz1uq`k)0)NuH*D=_(oX;LT%rij$&%^pj2u_mN0ps zPc7PT;oHf)K@~_!21LUk^;@+^Q-t0gs~z?|sgIotwYU@Cf+GEgiy>DLbhO@bA1;H{ zHElBI@T9i|IDR=im#uW--Ys*wKKs!@Tf}I*KWzj!-LI&td(dXb<5`~42o3c}(;MxK zF@}AdBf9!qwfUSsAlLGdqYcKwr1vbOspF20qzftIWCsz(-({(OsW<7@GRfdE z#ReMYGRxLp=NxH7NnMy7Hp&YFWawM-(Dm}j)baqsc~pAzXkdE0%ltRfd4&A*SX93w zyaz)#%A%diLV;!Bh>oGCj$y(FVYmm;?&K16FNwa3J-5z$_tt#&`i`5H3r5tbKO=?q zE4cQmwYg+lXf%el66Zff90P+fsGa?7U)5c-a48y%*&Nh?HsGMDCa#4I;5OJR zuZdp718C7EF|(Z7uFah*x$1HNotes cases, brainstorming - + -Feb 12, 2025 +Feb 14, 2025 Methodology notes diff --git a/notes/methodology-notes.html b/notes/methodology-notes.html index a69c594..5140bea 100644 --- a/notes/methodology-notes.html +++ b/notes/methodology-notes.html @@ -283,6 +283,9 @@

On this page

  • Interviews
      +
    • Structured interviewing
    • +
    • Group interviews
    • +
    • Unstructured interviewing
    • Transcribing
      • Transcript notation
      • @@ -341,7 +344,7 @@

        Methodology notes

        Modified
        -

        February 12, 2025

        +

        February 14, 2025

        @@ -691,18 +694,45 @@

        Theoretical sampling<

        Data Collection

        Interviews

        -
          -
        • structured, semi-structured

        • -
        • lunk to more detailed transcription protocol

        • -
        • See (Yin 2014: 110-113)

        • -
        • See Becker (1998)

        • -
        -

        See Fontana and Frey (2000)

        -
        +

        See (Yin 2014: 110-113) See Becker (1998)

        From Charmaz (2000: 525):

        A constructivist approach necessitates a relationship with respondents in which they can cast their stories in their terms. It means listening to their stories with openness to feeling and experience. … Furthermore, one-shot interviewing lends itself to a partial, sanitized view of experience, cleaned up for public discourse. The very structure of an interview may preclude private thoughts and feelings from emerging. Such a structure reinforces whatever proclivities a respondent has to tell only the public version of the story. Researchers’ sustained involvement with research participants lessens these problems.

        +

        Fontana and Frey (2000) spend some time writing about the emergence of an “interview society”, whereby interviews are commonly used to seek various forms of biographical information. They cite (holstein1998?), who noted that “the interview has become a means of contemporary storytelling, where persons divulge life accounts in response to interview inquiries”. They then go over a brief history of interviewing in the context of sociological research, which largely tracks the values underlying positivist and postmodernist transitions as you might expect.

        +
        +

        Structured interviewing

        +

        From Fontana and Frey (2000: 649-651):

        +

        Interviewers ask respondents a series of preestablished questions with a limited set of response categories. The interview records responses according to a preestablished coding scheme.

        +

        Instructions to interviewers often follow these guidelines:

        +
          +
        • Never get involved in long explanations of the study; use the standard explanation provided by the supervisor.
        • +
        • Never deviate from the study introduction, sequence of questions, or question wording.
        • +
        • Never let another person interrupt the interview; do not let another person answer for the respondent or offer his or her opinions on the question.
        • +
        • Never suggest an answer or agree or disagree with an answer. Do not give the respondent any idea of your personal views on the topic of the question or the survey.
        • +
        • Never interpret the meaning of a question; just repeat the question and give instructions or clarifications that are provided in training or by the supervisors.
        • +
        • Never improvise, such as by assing answer categories or making wording changes.
        • +
        +

        The interviewer must establish a “balanced rapport”, being casual and friendly while also directive and impersonal. Interviewers must also perfect a style of “interested listening” that rewards respondents’ participation but does not evaluate their responses.

        +

        From Fontana and Frey (2000: 651):

        +
        +

        This kind of interview often elicits rational responses, but it overlooks or inadequately assesses the emotional dimension.

        +
        +
        +
        +

        Group interviews

        +

        From Fontana and Frey (2000: 651-652):

        +

        Can be used to test a methodological technique, try out a definition of a research problem or to identify key informants. Pre-testing a questionnaire or survey design.

        +

        Can be used to aid respondents’ recall of specific events or to stimulate embellished descriptions of events, or experiences shared by members of a group.

        +

        In formal group interviews, participants share views through the coordinator.

        +

        Less formal group interviews are meant to establish the widest range of meaning and interpretation on a topic, and the objective is “to tap intersubjective meaning with depth and diversity”.

        +
        +
        +

        Unstructured interviewing

        +

        From Fontana and Frey (2000: 652-657):

        +

        The essence of an unstructured interview is establishing a human-to-human relation with the respondent and a desire to understand rather than to explain.

        +

        (fontana?) then goes on with some practical guidance on how to engage in unstructured interviews, largely concerned with how to access a community and relate with respondents.

        +

        Transcribing

        This section describes how I transcibe interviews and accounts for the decisions to encode certain things and not others. It goes on to explains the procedures for transcribing spoken dialog into textual formats, including the notation applied to encode idiosyncratic elements of conversational speech.

        diff --git a/posts.html b/posts.html index 1519290..bcbf026 100644 --- a/posts.html +++ b/posts.html @@ -296,7 +296,18 @@

        Blog

        - + + +Feb 14, 2025 + + +Week notes (2025-W07) + + +week notes + + + Feb 7, 2025 @@ -307,7 +318,7 @@

        Blog

        week notes - + Jan 31, 2025 @@ -318,7 +329,7 @@

        Blog

        week notes - + Jan 25, 2025 @@ -329,7 +340,7 @@

        Blog

        week notes - + Jan 24, 2025 @@ -340,7 +351,7 @@

        Blog

        AI / LLM, Methods, QDA - + Jan 18, 2025 @@ -351,7 +362,7 @@

        Blog

        week notes - + Dec 18, 2024 @@ -362,7 +373,7 @@

        Blog

        website - + Dec 9, 2024 @@ -373,7 +384,7 @@

        Blog

        meeting notes, general thoughts - + Dec 9, 2024 diff --git a/posts.xml b/posts.xml index 1014a98..4ef3796 100644 --- a/posts.xml +++ b/posts.xml @@ -10,7 +10,26 @@ quarto-1.6.39 -Fri, 07 Feb 2025 05:00:00 GMT +Fri, 14 Feb 2025 05:00:00 GMT + + Week notes (2025-W07) + https://zackbatist.info/CITF-Postdoc/posts/weeknotes-2025-W07.html + This week I continued my methodological readings. Today I got some requests for minor modifications to my REB protocol, which will be rather painless to implement.

        +

        I also attended the Public Health day, a student conference where grad students presented their practicum research. I really enjoyed engaging with the students at the poster session, I actually learned quite a lot.

        +

        Next week I’ll try to round off my methodology readings in anticipation of my REB application being approved by the end of this month.

        + + + + ]]>
        + week notes + https://zackbatist.info/CITF-Postdoc/posts/weeknotes-2025-W07.html + Fri, 14 Feb 2025 05:00:00 GMT +
        Week notes (2025-W06) https://zackbatist.info/CITF-Postdoc/posts/weeknotes-2025-W06.html diff --git a/posts/weeknotes-2025-W07.html b/posts/weeknotes-2025-W07.html new file mode 100644 index 0000000..f0db803 --- /dev/null +++ b/posts/weeknotes-2025-W07.html @@ -0,0 +1,820 @@ + + + + + + + + + + +Week notes (2025-W07) – CITF-Postdoc + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
        +
        + +
        + +
        + + +
        + + + +
        + +
        +
        +

        Week notes (2025-W07)

        +
        +
        week notes
        +
        +
        + + + +
        + + +
        +
        Published
        +
        +

        February 14, 2025

        +
        +
        + + +
        + + + +
        + + +

        This week I continued my methodological readings. Today I got some requests for minor modifications to my REB protocol, which will be rather painless to implement.

        +

        I also attended the Public Health day, a student conference where grad students presented their practicum research. I really enjoyed engaging with the students at the poster session, I actually learned quite a lot.

        +

        Next week I’ll try to round off my methodology readings in anticipation of my REB application being approved by the end of this month.

        + + + +
        + +
        + + + + + + \ No newline at end of file diff --git a/research-protocol.pdf b/research-protocol.pdf index ac9cd74cce557a9c7bccc32ed30559a8241396d1..0741cb3e65393bf97f0180fde4ef53aa70d9e43b 100644 GIT binary patch delta 8879 zcmaiaMN}LB(pCljJ1mB)>j{AH2nz$8U7XB~Z2=x@1_@$614S`{uRbD#N|N#_!!rYrq_B~= ztjdvtx1wUo*0o6!Z?c!a8P?^Lx5869&>* zquq+wDoTjc&Re^1v-eA03j}a@u zxt^&9v3_~$ip$kul+s8~KXEP*?=TvRtf%ievVGGc}-6Sg}qWqIZTud{et zoUp2sZNa`;NoD|F3cA}N+(=7-niUFz_eey2x0>LQ%6VFVUx%bURDg*ZbY=IFUcR2l0wSSO74Qk z@n<{D#LD;^Fh7;1D^yuU{#brjDd^Y+`O35tG!$fr)5WCe?>@+QUGwbbp*5o(Ui|QP z`l+}3y*RoZ@-7`#o1;c#ArEoDWvNlY~D3eZ8% zMufkDH+&l@bnVmIH~^Ma!>>osKYrv4mCd%FWR3`PeE+B%+LqHn?^4*w65 z)4?E!-p_q@jNt7%rnePyhHx7>SqeC(a*CNt8mp&G*QBa^$EZZX`|aB5tC_y9SkGJu zhgmlsLzR&Nab3Ztn^E!^=Vzyk5uc0YHq!fx&Z{5v zN)&`wexIwtEWIK^_S2;ez{FQu-mR_i%Z~?0F+1yOC~T*Z@LF8a-lW%)V|&Pa313#? zs!bC0{)e`2yBpc)rU}FkWjmxfcT?EK)mi@1P^+`8p|knIn>Y9{Ltg66vuldHmj9~1 zn5A+ic@h|9k@%5L*H{2KESx$2j8tM7-a=d4ZXjyOH1GB07@ZWHV@wE~OVS3I4@g2# zOb7>96bD#VVRWTW@u`HISVI9*V(AnX6b%BHtBnkQS5C;M6=##O`JbQ|0@b}`D!{5G zsHzAD*n(9QCqBQ5)Pkmnuev7*uysu2kkA!_LbDhTJRB&?HQmbbRL6dI%wI3L5gxe z_*}f=y#fIIE*4!1p;J*g4s+jxLlxzec=-n+m9rUOD>$vhnu{BWn;$7X$#YW?;|233 z#sg&*i*ra9Q@p&?y)j8lPH18g*;G^SV&25J`;T~b5y=Lf($G%TdOy|*lZTCKabN7X z&E!LYGl{_2U?8dPP}*odE>5LIiAtuiT^hV!SN<~FKJN$*3O5De4YkhtCZulRiH0A9DeCR*L z`aW{Y z7R8sm1?+K><|hYpJ2A2$iIh0pn6Oe{lggkyv&tytI>;59aplqs9!q0V4jF77rTyJ>>vF%PEfxpS{;7k*ZB01mK+#0SVS=B+{r*E-u2r*;a zj?Uz{z}1vu+p7JOuR!^AEYWFT_Comu(GRx%#YIh$i}s5^UKIsBCGihE_eFxB%p>bJ z4Oh)zk*)3V*w`zPT%(X0$f?#7+oR8W)yDh!8S7F-Jzap2eW6D9wV!vJ0qy$5>wMnQ znP|C#;!n23&hpL%!kEYPhR1d8rRH~?&2C#XzXJ!7VWss|KUEcF9KX)A)6@L=)cT6* z%9qA3OS5k{*4>%8ly$y$+n+YzwHx)%>J2PXk*Qskq`~0QXu2;n5QZdCns^4u96U-k zmi+2c&)IowPr@cDbXjcr&>1;0a)q1_)Ohg%J93pcL6u((VY)5K@_zwXuKB2a5+NOa zL8Row<^`gDdg{wJH8#Z&I-ZlOQL-9s1G+$;YAwxbCd6sPoZp zfA5u?v82LuonA`IH3pHwVO*_oocKXxDX}CR62WQ znHxLV6M^pB+NO=o11C0#f82==i|wA0Z+~BQ^$v?JE|wi)``gU^J&{E(Js$rbSB<2D z=1P2uB!?*co@x3cizf8H85~H6`wxRX3B*pv0dS2Ogi?tMw)2c92XJ$FcLZuBY1MtD8Bhx97ZZ2eUr`{NB4afboVkj_1)fW+eqKst|>i-f(ecG zG58|>@!M8nd3AT8LQFyj>l(D>?dBG|sd7weWYu@M4ro%1uIpUnvO6}JAfZ!7Q&rWN zWnG_hWN`pR6`EU1s+RaM(kuBf2FSj7vw<{3u8k~UUs;~cxmX$)^ZFen21A5FNz$dE zv>qX>IU3i4y1C4Fc%Q_3--4$Vqd{W0(`0lS9$NqDTL2oAC=^WqIU(q8DH%mjRjyQC z*|;`+;uh)%P>`#_|9CC-MDNrV+a3KjQq(}?a4<93diEkcKmmJ z{Hb-4LP9qS; zJ9f9j>d!yF=yJv+QPY4QaajN91l)dRiyc||JXhUaJ+bj)pfQ&|Zjj}p2^_&q$vtfb zjM&+q5cdDNZ5Q>e+}EX2-nV*Tw%0QFV62T^%mjzWd2iC0?U;Tx{`03b4!4a^ml=t9 zU7qgNAV*e(W1&$^7!8NP(0JIYe7an(lUx(WncFea8c{AQ0~7;DT=QLwfq6E&UBjZg zf%HxE1;5)Ei|SftTH@w*8`vO4eJ~_|a)SXcjHm|oybGL|v@78Dh`1CpD37U_{^7JpkDQ{m=bUOkH2*OUt3t)kC z6uJZUlPtSPm~gaw=qkUb;luENY~s{NClm?MHP;xc5=iJO68G?6tUQRcrfe7tpq|mP z4308@Ra=*?j#XR|2^&YUuO}c#W(wzzD76&|vtvdtMMZEI&g7m5=04g>1T-ZBqm~%0 z3>|!$=pe5@4%WV^Y?x`ehn@$ladCj?ly%m3ditF2^Z@N-i%96D84RiK0$8+wgmLb{ zpkE6TGH}Mh-}6YU%rPO%F68+~J5b2TgQ0o6K?l%#vcG5~*GO!{u@=SAfkSXKzu@R* zz87^ABlCt+mqpUIi_vpB1qX!}-fyI`6<}$@drO1=LK&$1p~m_e%Sda3`wP7_aFkzV zC=TJ^7Z@R^e%TTWpE%!*!9koj@h5c&Op2~jv`IyvUw!X!9vLzORtoV<=1aPMkyPCZ zQ_lC8P!M$-k&ui`_@WrrQyxYhKk^#TfezqI>0Thp?nDO=q{6MRRHd*CZDMO!O!yJb z#=`bHO%qo?hnc4A0~O}mNx2J6Imeu`a&Z!lk*DH*PV)|}CycXn%q)`?kue;;<3wCN zwcNht9@p4t42%vW=DNdSrNV`J?_?QA9j@QA2&;XH4}g7qfg?uCh@-4VyoUUB07(PN z<5~O;$v!TK`g_Qy6Mo8R;h7za*r4^^Ll!2#j-o#ie#fz?Lv*PskZ zy^0cBaj4p27#N8f4#i)pRRM62B?>7;hIs%-GIAWGD9(8{K0#G*NA1{C&uYx6RQ%@lJnkMLSc%_xV6ha_O>sYGCe#*%#=xThkuFj_4s za|r3+j>|9$=m4;F!ws1azo3usegEk2@~4hh!l=4M781_Iv=@X8)EXaruChZ|tUO|? zus<`(I6XKx?($1PSGwChs9kB95V>s;G4kmw%Mp3myD*vk?rJbLdDs+I3}%MCmW!q@ zDrSYSJfI24e9pX-uvc269w;0wFQ=A9ww7v*&$gw-GH5Nci-C0z=V&uK1 zbz_1=DYP1-3IxUsCrhs#KRdQf@8e1YDt<%I$%KQeRWu~Fp9wO+sBTEQlg&=ouK#}4MBLqjENf45-n z%@(2**`%G%PaAT01h7W({xbvkd@zs$4Ba=?d=sz)$~GW(#Q$>}=3+W=)>t6#W!O!H z7ex=@5m83T@^xlU&TPgT#g668KWxwpsFwv1RM2vhRDt5lBzIYYg3vbwBl)|N2^Qpb z+Ph~PYqWRh_Mx1a^BL-Ee(LJ89Tn;XZz;A#Ym22?f_|Z>Ft&u`sKoO4+dxh(dwqJU z)#c%j&nZG!Jv)mhk(i5NHz*ncS8ooNfqK_vVE|Je#ub#>LFlLU;HYW^#bdcub465kdMr)KqBPhOhta8;P zSvLJ*u%i5^wDRO&u}av$P;PqXtDboursvU+whQJT(eyCsj3*eDj6(0}hDmU=;jVAD zezfYUm}`vm>(25>Ryhd>gj>62tZfGRm_avY-*En=JGGQDyvFm|7LvW0a$;ZCW zW{5g=?U$-RtWQW)IWQ$@q9OBBR0vG|W*AY8#_Py(fjT~Zzv@DbJy1@oE!Af2x_p#Z z3u;O}D=J5a(fjN#!teyI;{j`YEBK^h#XzfPRnONvm~Tee48{&zQqUKDy!FkLCfP6( z#^-YsB9dw$X`~ZLeW8d+D89wz1?blz7loRA*>?7{3{4(g{o^@OfZekDFcM8LJ!q`DOyfQaL*)Ksv05XeLTZLl7wjxm2{#hGECUnrCYn&&S) z9vz%se1(r-Q9Fr%@;^H2w=1MgrvJAy*}<`6$@~}OGZqYo3Axgy9(=R_2a&V4CD;7j zbj01!K>CKabr|&t3xSt&#Um0{FEb3?SnHdP>x-YIP^Fn`tBeW6 zKTW^S(vNw3;!af$a*&Q_WnLz&IsJ3|St{V0M$e#(AH^!omtFQN;2xo?$;sU7F{(fe zH={N7f{bqAtM1qKnApPQnK?WV?nfez#?{xaz4AP6h_N|rLy*!Rla{Ml=Nmz!nntm# zXW2J#&*y?HNXAzQ{QkXrbEJuvB%{*%3^C&`7blsk59ZxTvl%R%mlO2Z4;OD<;eJlQ z(3ie%>fV8RCYCn#5p~&Z&&q*tYcm(kH-na*d@npm?vi|UlxqehnXX}&vky`J7Wc!{ z84IcNG{D*TW5|U`xJOK-ELogoC*_bqO{VKy?)B=+t{BEa|6L}B>&y(**ry_0rI>us zKWhnoYMdf`qL}krACT^>s7%^=W*Zfb)LJ-5{51<#%?2(;B)ryis1I`7^%aJ%thZwQ7fyp5Eq=7gW!MuEW7;h; zOD|XA9)~4LYa!7eP*`;|KD+g^ww~R<1Mp-3nX(tgUXb)<})aeO;0lVY1#5ZNL{=ilbDO}7n4q2J3&AQQ1pS$l}MrTI%( zMoP(gp_SsZ-$)3;PPXZr7bq9uF;4%A`!YVLq zrAKdc!^Kak1ph&+Jmb-8*n?tQ88|;d4PmbybJMyTtb{%9NL*kYS47*+9MWpBtAjd} z?{XwetbT?w&1)m z4zbk=wk5YLt#FI5n>FL{Abx)98;TWqFRVM421HwsO>y?-Ool)R+un@{;p{D1`@J|b zF2xI+>zISrRXRn^iDgYYNvCeAJNetNf|a(d2X$5KwL80wW3S!RPKitje`NzxZu}%3 zON0y9qR&a(=&K$u<6pVUpZ6qqDI3JyA@c$@@b=}8XhgzLDTrJovkR5?bxxI52almb zxt%04EJ?d{#cEH0NKi4YUQnX6m02z-`{+4 zb;D*Q`ZxQZeR^`BD|^GO`|rNz5@M`rdDmt6xnj7Ju`f6L6|#%O2)e9<|{7k;w@SFlBeh_rqVQ7y2V8hxKV@K4!?@e{)l}12#4wo^CHpF ze46st<|$^IMJzEg!(<%}%?YBp;zoYO`6LHc41F_;3_VW)KlV@}LSFkwoM4wxV(U8L zTdxotFU(gjBF{I$TjKHEWg|V>X1NS+6v%J3G}qo(aP=wVdZi;M5zIRAq5tZ&`OT2U zL*&>I;gBy=0^mC7!n!!UoF{eudKh@PqmrsNu{c-GFJ&&#>y7hN^;FQ!&(06Bolt~J zbRX`&a7X?a_gY|1ft=~{OuBZVTlFTV^m)u8cWp>7I!>o$@uq54W>yiBI#DqwC{F z5fEWjaYEyu&!f?ATRgW*cPeTkULQQDI{8JrPa;yefLARF$cI;g#{Hy*5F{Z%e;uhi z4b*QIR*RR|@nWX|ao(ILabBU>wEnj3=D7883nH5HAo8a37AE2@T-_5K^b@(xKPwQVMD6V#R7!S?EHba$ zp}>)%I?7{z2tL0`G$`A;HUx+or7FQ7kA?8k=PCcQSp7H+E{1@!jn&|zdf>MQPtqSc zm6Zj5SsV#l1lH50FSeme#gS8}O-|obukT=hGw6Y`raLvT^vLHdzY4EU8j|A35gZau zp4gdCFyr3-43@@fZqAfJXCV0L>(bj~LAKHb;U?kJAt7xe;#|)ecrhaLx#%ah6ZvZC zT-)$-K8;{n@wbZjd0#&ktUrlLj1sCGjM2Z!&&-R&6`U>PvbwOrE{!zt+V~M6)!x=JjAeR2nLJB*YX`g($bgBci*a7!JFVl}vd zj%ctc`!7(cnRGsbl0-tdVzMMI9b5^lY#eg<`-=NhfiQbRIDZpoD7=W}#6$NQMg8NZ z(te6^%@}QJjeHZ`_E*NOt~IloB*lm~Hf!~dYyS?JIR*j>L`=WL$yCBQKq*#Bwo()n z1T8g7f|i+TPFdRL4-~<{k1VNyDxz=hsyaMtbeqZ;r$c;+E+)sX3&_hD2|L^yMZ&_C z+k!8U$6G##v#7d~bR~c6$MwtT^5^q3TQ5J+q?T%yu)zdvme-@>;zBChkv%>`eXf$$ zlMYnB%~>Kx$?%zD_B2W7~LES z@&T(%TR}FrT-q7#=MH`_F-ma78HZIdWebfz>ivn7@k@JQ;nxBP&Tf zgrVmQUsAKUQp-3{FJ`m*H(R%0q1`R{krtCXG5y1pq5{8hmQR;2>kXO_yJ905>~>g-1mhwFpxJ{|n!#!-co#6zQrjani;pMZoeL;aXT)od8-<;K}t z=(>b$pJS8354P)LFl~9VXoSy_ zABjgZV6Z2P{@z(Yvn3hXE}(0e>-(|!BvM`Mh>nPtY=plS07ib-`;FxSA=8e_75H91 zhsFHnHA7vu@(L%H`_iJV@~^mrMRS6rZcgH(MmheLR`Yzb{`Oz2i10rvDg_^zsuka= z=k0)GdN0^KO-E}TnjCg0ODlferUnhj_sp}$vAxW3p-m&*gmN40+Z zdUDvLSWMmr)d{`jT@UYz5L;z~s=P+)dMq-^8e&Z&L?)-w77f)+h8kjOh*nh@tmEJ( zpvvd{@9R;m03q<~2D*;&L0HpI&GYALy-6RMrnp{RopX11vYM4_K1s#ZcfA#ln=^;` z!ZB9630AvH)sx4#UQW9{g^enS%}?k^84Fc_xt-bnO$V6UrIKJklSvA4aDvz^xXplO zKnpVqPM{gL2`>+?sVSc^AJ7;C;xiZi|5v2`!hn{7@cqA!BJPDzEhS@u-#16!B#X!> zBxJCX+c|!OMc{y`ILsnAc+#kf6ULJAM;G!&1Z}FaOyl8?o)MX|KHyO@H(rHeCy#ul zowuKa+#R=>C^^`7X!hbQZ2AD_ze}G{1#bA<_T_y1?XPheBgVFAwQlf@_ky(`H$pvw zV#n2cMD|RGreFN}(0So})nQC+4wSn}ur$pQjXg+w9eHC!Mec{=8E1oA#sk0b0;DSk znpfwcy7Cjg+9U!!h=%ugiDUhFq3QOhv!cX!!8A{?*&tjZAVH}ylH#y(mcKD0yl{&< z?N+}IK}zq12qs=uAF*wYQt=d%%$o)(P4PUKuqfNz3H`e&aYn5GThnfWXCJ2pIUxM!2@S`k3VXdQ#gHl$YLDi%WR$pS6C#cicIF?|VaL+5-og?Y zZ-tdMc_Qgnt zp2N*PdB-Q;q2kMkUqv+qo3P-wzPe+%1!)h3IBLh7hVplbAxfO=|0F|oQis_ENU?{*>emW9C`v5(R3PB-N?Eu9M&6*O0km55Cl$})KY zZ3=Qh;wmkF_sG|!)n0)G9f$?od3>i|^zCFt|b-2XQ-{f*m z@P`BgjP1~8u{Pdx*%c{r9keX!<_nThi0$-u!f-XKC{CqHjzce={W!-Y+)x58UY&iO z{9c!+-I+8=#=kDa=$Ha~d(#|BxxE<`s{A21tE~$u%>mujYK`GMH!#ogbGIQKJ$6P1 z=whu4%~g|4cUISG1$iK0j9MVEQo%Y1}C~=#mo$W0Xnc7Q+-BO;nRor6R;Q9j_R3D$$kOQ!Y*0 zZ;zfvpUb%`t-VHD>K8A*&LC{8JJ3ZwDR{*t6E~-_E_Is#ds?F(h8+>hry~7?ouQbN z_n%cxhVcPYQCnOD;s2Cu_`4n>qe-3uN5Y^T<$e1vfm46`0t1p|jho))b-wkUAa-Uy zLHXZrIoa9PZR1VK{#w)P{syPdT9ik{hc87PH$^(gKzZhuI8CiooxF3C>}UtD@_N*% zrSq-LPoT}8=;iA5-8`VJ^*Vg|)pt*e{^z*I-LKi4=J7hM z=mDv+1E4v1eXWxofP6mSCi(1Ww=nzmfTQx*!RYQ_qO-Mp(YzI7-uiE{{SK#* z`eUb+NgJ=HT7N#pYp@#GFhpI7Ef@-gfS}y{ z?0|Np!4}~+L^2|Hdx2DO!M$8v{P#GERXB^=g650Xv_mR+g2Fp1==7vgjUiHv@ytmW z`4|eHy@qL~9^k&H(7G{tQQL*5#nU#0pMMHFk5?-~z}A-2RP<11m#Rv_l&Y%2iGzM4 z6`>alMFxdNq}@53b5Wp#A~S_XM7EKLNCAnkv$%_x<*_SYY?$aN=@!cx^M0@iW#I^6 zW?4)fgcdTD=1NR+LboAPr_s<%tl!@s&x?^X}&Ok;rFDGzK*!QY$Py=F(`l7U{1^ z)+Dyzu0dFSmEECF9FG1UVn(<+`(UUnEmrS`LeG{QLyTS9Ka#KCu+2&u@!lU{n-7)q>M_uZ&9?(2C zW$5hZCM)PtTw4i3Lb5dJ^JwTjL1CG9WAUXeTvr1=p{2h+OnjK1zuGDGExuhQ_t*|u zwD31mw^OC_?PKpS}VZhFES!qGA;at@ZWFw>VPH{W_yY`$|uWY z%%~O$TyT=u(6e_2?Pvh6k(PWag?p^%53!ZQmm4u-W@O1DQd54*329fNJDMS(!X^HbvuhG=C^ zxNP>6aE;851&Su*`8b=4!>30pS$7Ac?UTl?ho$nQjs^||joF49W`a(49n2}{l?G;4 z`EQlQ54V6v%{tKe^TucgNcrk`>+)3jygl?JHdS6{8N7ZLqE&Wo>ig3~cA0XeRi{F$ za0aSiTT{?g5#c_abKLBG((G92xGF8`XBTI><3%2>x~#`vF2MS$)(GP6zP@mzHan+t z3N6qQ5iQa`DJhAmw0(Wb;glE7I_gT!=}X#U5sAAh#ArY*%;O0GG^~`(krGP6hJVdX z@=Z37Sjie=os5Lk6R^@;*E7i_mC#BS#+Q=I5zwm=h?8u=Oi+3x8tOBsN9@H3jnt1(3qqzK)Jz%8)=bIQQAiYruVyo)>$uPr+L)7mFKVUc;UB0{~>oOnhBOK z=^>g1kc`Z;R2yZ!93Y z{ICjj>zgezW{fst@I~}ifyvimWV9t(plm&~Js&v{!<2pZ7pA?b@})Stv2#1}4@4=d zcbS2f@*@S4GAS7=GQIat71#&wtl-?gZ)-nt4}ESGW6in>LBerA3Z3?yFqqNz*>h)v z?aB5I+PpM-KAX*%cXGX1606Gjxs@k-umZ0zoIkyc|4l9oE081_O9HfV7g86GAZa)W z<#DYRC!I7E%j)P(;~j;)1z4|z5QCUS1V;qV9-@eM>Kn#swmj$4DKWPe7M7-3+XW=I z#7hQctQijnF_bpf8?>kgqTOd5%TuHbPrJlA`LSz`?B})llo@x_Uid!OzZn)O%((UM zOxUW@nQ^4HA9?3CH2^~@W1kanF6N%ZuuoY}cila6pDvEaXaAD<;(0y|aC|@J@PAWe zUzDI%S4Z$no8#1o2u{sF{8!6-n40j;ruk=5clYjUBKsRI9Ol&7a}`TK0UoXMKK!F?&As z|Hm5s53H|Kwg5Nx|Fxr8L)+Bl!GxD=lOV^b<80W+m*ESWuT%*|QcZ-4QCVEi;t7+H zxDg~t6eFWG>%Q$0czPpiwT+irUM{t)HIBfYgj!XeUH%v$@U=tbz! zuomchqDW}eQhCz2h=W?RVU1x;Vc)P)0e@OTqJ1^F6S}#OPv62|?Ms~c{k}fP5P_j} zutqG>u*C}UMkHL5NJF~J+dU*2x@PFn-YTJRiplfUwQ0m6)xvUcBX-OUe;sRQ{tyxj zh$|uEQbr1*PSFL}I-tr%(r}GOEC%(oxH)PFVoMM>g<1utcc#eka$k& zVBpBBx1xb1gjl)}SO-xH<4ICAAQXxN%e~|%AVLHe?QH}}i3!)g{rghDC^9SR5m{zMK0M1RVIM-@dzmXl)^Ll~fC!^Y8pW260Z`UyX=l*lKN zn~@ooULXkR-7LFuw>-Usk<@70a2*LN^C(6gPE0|j0+N0Ps4A;M+{C7R^K2kQ303?l#KbeJ*Hi;FB#(_a&BS;PAfMS(e}5lgWH|27 zjUILnL}0JT*kFm~D)O-(5i<{Ss0Csm5Q5j>+pj`PA%RgqLxX3z(JL499wu8BoiowR zbQ_jU<}fnW$A?Aqd$|l7goaiZg{RG4gLx0(QCJzLlv7_bfn~xCf)Xq1ZOyH^+mF;e zVKn_IH@FsV6~ZzCh<1bx%cE0~z036vyyK3M^q&;Oq~sy;ztOrmoR&5cO`FKny1Bd@ zkLt=+-8-v@7>8*+kv>!sLawa1Avx9n~rQZkW z)gaAA52=>x+)Pv)E;mTxc5maM{%5V9Yza?W?-ZH;z_uq~X9+^x$z7bG6r=lH|)w9S|ug}Q!;Hq?cl2p@hf9%+)iL zBk8t;_iD5ZjBVpj2^?a+czPeSwDqf{fg2$*aRkw{ct&XgS8kc+IL@9cGh0JzY245ZnBFN4+LTkCU?Fur+?gJ0mA!>?V(FQ&54u>Y1e?Z($pI( z?f@OW=pQym+WIL(rsR$Cqv)9Lgwn24>gtQ6gMyxQ^82NdsTD1CtEOvK z?GKawu0E#$BLFw|TshIPAb!Mz@xF zb2GD#_Aa)tK4AS=l?hqb>d0r8~lHyWh+)CK(5Rcy7 zpZjrqXcTL_DcPYLr?DvE9w+;>F+cphjFOL!ux80dSKxKDSC>cIYJim3d_~+?nB@9j z+%=O|gaeJ955l<+3;R)!;!(hT^UQI~oYQ)C)V5U3u!iJI)#P;;3afZejU8z)(*l#> zj@!;Nk$uDCOZE41H^vxzydSm8g}*6VZ-T;lWrf?ZGllovLdw+8geO8*McQ^z^lVaBhlqO-6sT&(Mnfj> zRc*-M+zA1Zz#1)QY2r-(II!XI_55=CsV!qpP+E2EMbqC(%`RO*2LVOo1GNV^Z<*8x zvg2GbtgIPr^X5km`pA3RAv%^Oeh?i(P*0T`!&zJfObtY?{+!S9*^ybd{Kvw19^^*o z?;KG;I6<7<>8U>W{wIpxFhJV)Wz$9g>TQA9hx14cA1$LkxLo5@&7N{0-nryfGL!P_ ztAX(9!K##eP}?UrvC#u5M1n${jiO}G^bbn{%<{>f{&s+H-rVG$VRt`@j_{PG+OqSU zjxwL=$$OF21ASurZ*@1Ki5dHzfAm|4#v6VCyLiNfYsN3*AVYBWeUy^tK=_{ui#0V`D%rp^zWKs;#o;H z&kvr-L|;t-;w|q+g9xWi!K;;$F_G1l7(4|Z7^e!wayTNt#7Q}3B5ny*v-*qYpG1*( zwmQto1HG0<5y%qXuz`u1(HRNJugpM}N=ir?%GGSRn;f$oCm+S)&Z1pIVNg6*Gx7RF z<^a^NruYi?8#A_D)Hb9A$B`alS>q`q zhdNuD6~3QShOXveN~E9J+@ikE$+yh>T>{uy2*I(-{uqf|PKdZ} z_#rncde)K_w?6-q8oq26b$jf$Q!39oSb%QDrIEylBQ^J(%Bc}e?zf1-gWp@rnC&YZ zLuYEW;u;H@kE=aj$Dw?JHB8eNC@Cg<+txM(_zMqEG^S>|hwMRK*k)Xwo>a~i`hJB) zDtobVNskfR`F7x>W!opPCdaNn*duxEHFR?H8kd({xmkAl`}s>eCT6$r(EW{_80Il; z>zeJ#J5*)cx*}Yadz;Z+CY%Cn!87ps!2@S$wW~vOgE0R6XG~NF&WnuIb|inRW>3tg z>DRO^Pd!Wg^2QOUn9V9x-;{p~R%6IhphxTiAr6?l)fYge%Pc?(p-f42Y%(8!HLm@# zI8BOSs-JvPy&J8Ayx`7MW|Wde(a9fMV`pRvJC^RX17}=df!etmpSD|jF%N+!Lky!^kTZFMz(z86z8}) z+mp=f_oxk;i}LAPI>Gb}ZLZKTA^in&98(u_M_#XUgX?c?epV}KTJ<$@rAB^$n7S7? zfv_!e%@pLN*yy=%4qGdddk|S{TojRPpbTpMa11=GoCFpNS3KPnVN$=EXz8k1o9y=` zAHk~)sp)93PdVPy^NM{d-Sj0!9=7vw2EjWr-@t!N7NU?ap4UW%VQg|aDmdJC5##ck z&12Kw_Uev9cK83@EY{i9UL~3T#a1!k(?aVJRlzKd*o|}|=<68GZEl=gAwRzHlK9mBKX$fLf^S@@n9pEgV zGq;IIO{&dPgZf-9Xg=Ie-(@^Zt;klkw`}z$^6Y(OyBDpAvSCddC9q=?!jnUQWsBWrOTk^KO|zwyj{?MRI;l-2vD`G^U+AHvPdlygVoG1biJqsy_|3 z@!T6+T^}N$4*GfoKE1#RuuUNG_(}g#ZIDc@>0W}nzq7V@kaPfTUD|kfuxiCE1$<@< zq1Kqg#<4n!cvV99p!48fTSMQp*lIf)i6 z(EvF(zzFp@8$qIyVR(+^GMG+L`>$yLTMCw5aQr6T7_3BadofMVDJq>~%$lJqk)Sd0 z8f8cXf&H(Q!-kt;-qE>B%nHZD5x48w2SHJK=hAsU!>-R4R_b51$DvtAgROybEGScJ zbbA-xx5e6tCyrA@Jnkyb01N56_Y|Xyo#PZ`Te$#%`3WP&)NLQe1`BGjJ{L$Y@tNu&@9*2!7q+*cs#-C+x z7ps#%PK_mjT5XIh70`Eb=A&@qoe)x~Pc#A_uEk=J6XZ>$C*e8W^2zLlW{ok;NJxsp zHPYxYuEVa<<`l{%0d75TUu$NXdiUM#yS-3kEQhL_6&n*pV z3j%LH-+QBEpo1};6T+;{dD$s|;t$k+meV9lz}B7Ug%6*j0j1D;t#>6)&-(PiOgWn2 zh31-Mupj4t$%SR?j-?2AGezzDUGQU=b`R8L4LHh|`jfzsD#|7^4gLJen^0)4AC4_6 zxuTH$08s@;XLtH)mW7Od-trdS_$z}hhxJUNC0Pga-923{=9_h?$QaUl5KI`g`e!5A zWHn2367wpV1;AMSzHu-@I~tb0B>9teZOB;M5M35LgGdvf6jdM;f1^X`cAFUIpwBGz zHBpPp92R^uZN7wc#G?v}L16{f*wT`uQLLsE>P9W-Pg5UZahfvdx6a_6*t?{WXV6Q_ zA-x25E%Wf0$wlWY(OXeh@>*?Q(MNZb(L?IdEmZlU0Bhk9vltuEVqR&Xa6ipVW!m@A z+Gg68v75tppt%gNP^&+roh1fe-r&zg!v=Bb^R~|Vcj<;citmjQZPZIJygQ9Ey2X)@ z_&Y{J{fP~D_k_ZaTo5d}XU0s<_l1Ni40ggfB(M}5f)Pr(GKG0@_rb7SqZ0zLk$U`z zr-k(nK!a*q9?eXwE8{``f8psaTRA?i zq(YTx!^N3fvF4T-P8rksLv`x*z%^Ve*q>??fO)+~&Q;RjCx;WGPD+ z6_&%)(-$ERixiWQgzpv2+H#HUsHXohhsNWzb^SDcvOqIR{eGE{Y# zcw?M3L;D20Z*Qh`fMWV}{=I@$&64i^9lM4W_{zWh)NQ(TCc%W3zIpln#H>zT9|KC* zrtiLJJoKqKcQDtv2J zz<$VtNRL+N*6|OGM%Uy#pYIXdT5lOa2Tsif095V#eM~lCUG7p6)dxT@aj=+&zyTlR ze|Q8PIDs;(z<@;mI>r$C6M{~6b-iz{sZ~whT-lXd6f2LDn-v1oy^o$MmUG*5B`jy5 z3~K>~2Omw4K`tdtT9=HC_Im%WhDuXLf?kPdnrndj<*(}=VCdX73U^!&)A+y`R6D}o|{~wX+g99rA zE0EfQ14{&q#DlNjnWv~F>ZLlwPo$k|f#i*r0D?CitRBuEAV zbWm|k@d9V>ao5eLk1O!LLqSh@*j*b;$6te3O`U-z7Z_vpFV{=-C^b+ntbPdr5=o~> z1-pck%TJFOlx&{HKq{DT`?bxY)W%Ty%l z>Vg3H48pxtdi4TG1RI8TSXL;{g7a5x6w z*!t-V!-cuw`8Pis#-XN#wL9@wcl)hG*mC2}Z_5~l$hTnee_|yv16TJGuno9~gjils zWDlY1h7(`l@oiaDcC%LYkyQ?mdtsh!D*v>O_0$|CUwLnwexN^lqQC8aMX!mMm9gY6Uzu1XTPXHG7fUNu78M vnFcBiGy{lr0guND_X<}N1_6_bY?OpIT|DR5)J{BD8WcV>1_o(W8MOZaickc; diff --git a/search.json b/search.json index 2331d41..e3cbe19 100644 --- a/search.json +++ b/search.json @@ -80,196 +80,218 @@ ] }, { - "objectID": "posts/weeknotes-2025-W06.html", - "href": "posts/weeknotes-2025-W06.html", - "title": "Week notes (2025-W06)", + "objectID": "posts/weeknotes-2025-W07.html", + "href": "posts/weeknotes-2025-W07.html", + "title": "Week notes (2025-W07)", "section": "", - "text": "This week was a bit slower than usual. The highlight was my meeting with Isabel to go over details about harmonization procedures and to discuss projects that may serve as potential cases. I got a better sense of the community composition and the kinds of challenges that are commonly experienced, and Isabel was able to provide me with contacts to get in touch with once I’m ready to begin collecting data.\nI was less active with regards to my methodology notes this week, having spent much time re-organizing and consolidating them. I’m also reading about coding techniques and the foundational principles and debates surrounding grounded theory, but there is a lot of ground to cover and I’m taking it all in before I begin recording my thoughts in a systematic way.\nI did some non-postdoc service this week too. I started a peer-review that I had committed to, participated in the monthly SSLA meeting, and completed the first-pass screening of archaeology-related journals in the Directory of Open Access Journals to verify their inclusion in diamond.open-archaeo." + "text": "This week I continued my methodological readings. Today I got some requests for minor modifications to my REB protocol, which will be rather painless to implement.\nI also attended the Public Health day, a student conference where grad students presented their practicum research. I really enjoyed engaging with the students at the poster session, I actually learned quite a lot.\nNext week I’ll try to round off my methodology readings in anticipation of my REB application being approved by the end of this month." }, { - "objectID": "posts/weeknotes-2025-W04.html", - "href": "posts/weeknotes-2025-W04.html", - "title": "Week notes (2025-W04)", + "objectID": "posts/weeknotes-2025-W05.html", + "href": "posts/weeknotes-2025-W05.html", + "title": "Week notes (2025-W05)", "section": "", - "text": "This week was a bit slower than last. I spent much of it finalizing the content for my IRB application, and the rest preparing for a meeting with a key stakeholder relating to my research.\nThe IRB application is more or less done, just waiting on David and the department head to sign off. It was a major opportunity to re-organize my research protocol and related documents. I shuffled some things over into various placeholder sections in my methodology notes, and pushed a revised research protocol to the website.\nYesterday I posted about on the role of AI in my research. It’s mainly meant to lay out my current state of thinking on AI. I’m not fixed to those ideas, and I think there is much more nuance than I do justice to in that post, but putting it on the page helped me consolidate and put aside some scrambled opinions.\nAfter playing around with qc on Sunday, I started to assemble some feedback. I may post a github issue later this week, once I’ve had a chance to consolidate and edit my thoughts.\nI participated in the weekly CITF logistics update, after which I met with Aklil to discuss the overall strategy for her project and strategize on how we might form the focus groups for that work. We’re gonna meet more regularly, just to share some updates on our respective projects which have a lot in common.\nOn Thursday I met with Isabel Fortier, with the intention of discussing data harmonozation initiatives that might serve as potential cases. It was a bit of a tough meeting. During the first 45 minutes I struggled to communicate the purpose of my work, but I think by the end we reached a greater understanding of what this work will entail and the unique perspective it will bring. One surprising outcome that I still need to think through is Isabel’s suggestion that I slow down a bit, immerse myself more in the world of data harmonization. While she is absolutely right that I’ve been rushing through this first month, I do feel pressure to get the project going and to start accumulating data — I felt a similar pressure when starting my PhD, too. So I need to put my eagerness aside so that the data are relevant and of good enough quality. Isabel offered to schedule regular meetings with me, and even to have me work in the Maelstrom office once a week, and I’m extremely grateful for her support! Plus, I’ll get to have lunch with my mom who works in the same hospital complex, one building over :)" + "text": "The first part of my week largely involved reading and taking notes on methodological texts. I focused on case study design and qualitative coding techniques. I plan to continue my methodology readings on coding techniques, memoing, interview methods and systematic note-taking, as well filling in gaps in my understanding of grounded theory and related debates. These readings are especially useful in this planning stage, but also serve to fill time while I wait for my IRB approval.\nOn that note, I finally submitted my ethics application on Thursday. I expect an expedited review based on the low-risk nature of the work. I posted the materials I submitted on the ethics protocol page.\nI had my biweekly meeting with David, and he was very encouraging.\nI met with Isabel Fortier again yesterday and we came up a list of six projects that may serve as potential cases. We will discuss them in greater depth next week.\nI finally sent some feedback on qc.\nNext week I also need to fulfill a few commitments not as related to the postdoc: I need to work on a peer-review I had committed to, continue assemmbling constructive feedback for qc, and continue going through the DOAJ for the diamond.open-archaeo initiative." }, { - "objectID": "posts/2025-01-24-ai-in-my-work.html", - "href": "posts/2025-01-24-ai-in-my-work.html", - "title": "On the role of AI in my research", + "objectID": "posts/weeknotes-2025-W03.html", + "href": "posts/weeknotes-2025-W03.html", + "title": "Week notes (2025-W03)", "section": "", - "text": "AI is upon is, and although I would probably be ok if it wasn’t around, I have been (and still am, to a certain extent) tempted to use it in my research. So here I’m gonna articulate some of my thoughts on AI. This isn’t written to convince anyone, or even to convince myself. Just to lay out all my thoughts and take stock of my preconceptions, disapointments, hopes and desires, etc.\nAlso, I’m gonna use AI, LLM and whatever other brand names and marketing buzzwords interchangably here. Draw whatever conclusions you want about that.\nI see AI as being potentially useful in a few productive activities I regularly engage in:\n\nTranscribing spoken words into written text\nTranscription is a significant component of processing interview data, and this can be extremely slow work. It’s a lot easier to edit a transcript produced through a computer algorithm rather than start from scratch. I used trint, otter and other similar tools before all the AI hype, and more recently I’ve been using whisper to transcribe voice notes that I record while I’m waiting for the bus or drifting off to sleep. I’m not really sure how they’re much different, to be honest. Is AI just a rebrand of natural language processing in these contexts? Either way, I will most certainly be using some automatic transcrion tool in my research.\nSummarizing, breaking down and simplifying complex bundles of ideas\nI do a lot of reading, and it can be hard to get through everything on my list. I therefore make lots of compromises and refrain from reading some things because I just can’t make enough time to get through everything. I imagine that AI can help summarize some key points across a whole corpus of articles on my to-read pile, and I may try it out once I have time to figure out the right tooling for the job. However, I do gain a lot of value from the process of reading. Specifically, as a scholar of scientific practice, I’m interested in the language and rhetoric authors use to describe and situate their methods and findings, and I’m not sure if automatic summary tools can capture and communicate this nuance in ways that I want.\nGenerating code snippets for data processing and visualization\nThis is arguably the most productive potential application I can imagine. Specifically, I’m thinking about using this to generate R code that processes and visualizies data according to imagined outcomes. This is directly relevant to a project I’m working on where I’ve already finished the workflows for scraping and processing the data, I have the questions I want to ask of it, but I don’t have the practical know-how to generate the code that will allow me to address them. ggplot is just so dense to me, and stitching together code snippets from stack exchange is a major pain in the ass that produces a horrible abomination of code that would not pass the muster of any rigorous code review. What’s more, those queries to search stack exchange are already half-formed AI prompts! At least an AI would generate some harmony in the code, and I might learn something by having a tidy and consistent template.\n\nI’m more ambivalent and critical about using AI in these contexts where it’s been really hyped:\n\nAny form of writing, including generating emails and abstracts\nFor me, writing is a creative process and a way of unerstanding. It’s a mechanism through which I come to learn about something. The experience of drafting and revising a document is crucial to my research process. This is especially important for honing my position as a scholar at the intersection of various disciplinary communities, who have distinct language and modes of communication.\nQuerying for truth claims\nTo be clear, the idea that knowledge can be total, absolute and disembodied is deeply flawed, and the popular reception of AI as a neutral observer and reporter of nature makes me sad. That being said, I’m still ambivalent about the potential for specialized, home-grown LLMs as means of parsing, sorting through and obtaining greater value from under-used resources. There are patterns in even the messiest and least formal documents we create, and even if we can’t draw information from these documents, LLMs may be useful to help us reflect on the circumstances of their creation. I keep thinking about Shawn Graham’s twitter bots in this context (which were not based on AI, but whatever), which attempted to spit out segments of artificial reports and fieldwork drama, which real archaeologists often related and resonded to. These responses were interesting to me, often expressed as collective fascination, titilation or disgust, and reminiscient of the apprehension one might experience when hearing your own voice played back while standing at the opposite end of a long hallway. Reacting to distortions of your own experience from very different perspectives can be a really powerful reflexive exercise.\nAs a brainstorming tool, or as a rubber duck\nI’ve heard about people using AI chatbots as agents to bounce their ideas off of. Kind of like eliza, but for productive work. While I think it’s intriguing, I don’t know where I’d start. Also, drawing up the prompt and figuring out how to ask the right questions may already be enough to get the ideas flowing. I think I already do this in some ways by drafting little ephemeral notes, usually directed toward a specific person or imaginary audience while anticipating their feedback. It also somehow seems like a perverse way to de-socialize work, and in a world where students and postdocs feel increasingly isolated, I’d much rather solicit and provide feedback among peers. This has been the foundation of some of my most solid friendships and professional partnerships, and should be encouraged.\n\nI also have some previously-unstated opinions in relation to some common critiques of AI:\n\nProcess versus product\nAI seems to be really good at devising formulaic outputs. That is, it’s good at getting things to look like things whose shapes are already well-defined. This can be valuable in various use cases, like writing emails according to a template or translating texts between languages. I could imagine it being really helpful for those who are coming into a field where certain skills are taken for granted, such as learning how to write “proper” academic emails as a student who is not fluent in english. Imagine being up against a deadline for a job application, while also being knee-deep in unpaid work to get your name out there; an LLM could be a godsend. So I don’t discount easy outputs as inherently bad. A standard output for one is a week-long struggle for another, so I think this distinction between product and process is a false and misleading dichotomy.\nBad instructions\nSometimes I find it really hard to believe that people could earnestly follow whatever an AI tells them. But I think we’re getting to the point of urban mythmaking, similar to the older wariness about following your GPS into a lake. There’s a story behind every warning sign, even if it’s a projection of what you think might happen if you disregard it.\n“Intelligence”\nOne weird thing about AI branding is the smushing together of some unified idea of what constitutes “intelligence”. We’ve already been through this with “smart” gadgets, which have always just been ways to capture consumer products under a platforms proprietary injected plastic molds and information protocols. AI is literally just a way to sell you a new version of the smart gadget you threw out last year.\nTruthiness, i.e., AI’s ability to sound authoritative while also making false claims\nI cringe at any retort to a screenshot of AI giving a wrong definition of a thing. Accuracy of responses should come secondary to critique of the notion that all forms of knowledge can be presented in terms of absolute, disembodied and universally truths. For example, when people ridicule AI’s inability to identify the capitols of various nation states, I see missed opportunities to challenge the value of any answer that anyone might provide. True subversion would be to reject or re-frame the question and the simplicity with which it is addressed.\nOne another related note, I see a lot of weird parallels between myths about truth claims made by AI and by practitioners of qualitative data analysis (QDA) — and, as a qualitative researcher, this is obviously a bit unsettling. Specifically, in both QDA and AI, there is no actual attempt to make absolute truth claims, but the focus is rather on attempting to identify and draw out meaningful elements of elicitations in a corpus, and to trace patterns between them. In my current opinion, the key difference lies in positionality. Any QDA researcher who laim that their cases are representative of all experiences will be laughed out of the room. Meanwhile, AI is lauded for the claims made by their creators that it can derive unambiguous and concrete knowledge from inherently situated and biased data sources. Humility is key while contributing to collective knowledge bases, and AI risks changing the dynamic away from deriving greater value from constructive discourse and toward a system where the loudest voice in the room wins.\nClimate change\nAI uses a lot of energy, and is therefore said to be wasteful. However I think there are certain wasteful components of AI. For instance, generative models that spit out a full sentence to wrap around the answer to a question don’t have to do all that extra work. Also, not everyone is reliant on fossil fuels, and the critique that AI is necessarily bad for the environment is laden with a thick American accent (as is the case with so many of the loudest opinions on the internet).\nThat being said, there are enormous problems with resource allocation in AI, and I’m not trying to dismiss all concerns. I see these concerns as relating to the distribution of power and wealth in society at large, and AI is one aspect of this. Sometimes I wonder if comparisons can be made between using AI in selective research contexts and eating a burger or a banana, which each have their own environmental costs. But thinking in this way is a bit of a trap.\n\nI also see that rhetoric, including anxieties about AI, differs in the various communities I participate in:\n\nIn digital-x, where x = {archaeology | humanities | librarianship | whatever}\nThere’s a lot of experimentation going on. Honestly, I don’t know much about it and I tend to scroll past any discussion about AI applications in archaeology that appears in my feed. Part of me sees it as a passing trend, but it could be better framed as a wild frontier, as is the case with many other things in digital archaeology. People are still in the process of taming the landscape, to make it better suit their needs, and maybe I’ll join in once the settlement is established. But I’m not personally motivated by the dynamism of the current landscape, at least in this particular domain.\nEpidemiology, biostats, public health\nI’m still too new in this community to really make sense of this yet. I’ll continue to watch and learn and listen.\nBroader social science and humanities, as well as libraries, archives and museums\nCritique tends to follow broader, more abstract, and more common-sense lines of thought. In my view, much of this does not really account for the material problems and imperfections in which the social sciences and humanities operate. AI is a lifeline for many people in an overworked, overburdened, under-resourced and hyper-competitive environment, and tut-tutting around how other people use AI sometimes comes across as tone-deaf and disrespectful. Some criticisms of AI being used in real, practical circumstances make me second guess critics’ supposed commitments to improving the social experience of research. The fundamental problem is inequitable access to financial and material resources, and AI’s prevalence is a major symptom of, or — depending on your perspective — resolution to that. People’s who recognize this have no choice but to post broader and more abstract criticisms, which come across as somewhat hollow when disconnected from real and tangible experiences.\nSenior faculty\nProbably the most ambivalent of all communities are senior faculty, who want AI to be useful and will test the waters without fully committing. Which is fine and very prudent, and honestly I identify most with this perspective, despite my position as a lowly postdoc.\nGrad students\nI engage with many grad students. I share my workspace with grad students and encounter them constantly in my day to day neighbourhood forays, where I overhear and sometimes participate in conversations about AI. In my new work environment (Epidemiology, Biostatistics and Occupational Health), the grad students who I engage with have a relatively positive perception of AI. They seem to find greater value in the ability to automate complex processes, using it as a black box of sorts, with predictable and abstracted inputs and outputs, which they see as especially helpful for coding. Outside of this space I’m encountering way more diversity of thought on AI, and I’m not quite sure how to group these viewpoints to structure a proper reaction. I think this in fact contributes to the multitude of perspectives, since no one really cares that much one way or the other to really have a strong opinion (though I sense an overwhelming dissatisfaction when it comes to AI in consumer contexts; this post is largely about productive uses of AI in research and pedagogy).\nI was also told about students learning RStats by just having AI generate their code. The person who pointed this out to me related this to the growing misconception that to learn stats you first need to learn how to code. This in turn relates to the sense that to learn how to do RStats, you just need to memorize a series of steps and copy the text from the slides into the IDE. So, in the end, AI reveals the inadequacy of the teaching mechanisms for programming and stats classes, similarly to how AI has revealed the inadequacy of essay-writing as a pedagogical technique.\nOn the other hand, some students are concerned about dulling their skills, or even not being able to take advantage of opportunities to learn new skills, due to the temptation to automate these tasks. Some upper-year PhD students are glad that they were trained in the fundamentals prior to the AI hype wave. This makes me wonder how students are determining what skills they think they need to know how to do on their own and what is worth running through an LLM. Does it basically operate as a bullshit sensor, where you can smell from a distance that the work is just gonna be tedium and irrelevant? Or is it more out of practical necessity, where you’re stretched so thin that you simply have to rely on these tools to achieve anything meaningful, almost as a mechanism for salvaging one’s work from the claws of austerity? In either case, this points to PhD programs’ inadequacy to match students’ needs and desires, and overwhelming amount of administravia or (seemingly) irrelevant work that students are made to do, which get in the way of their true interests.\n\nMaybe I’ll have more to share some other time." + "text": "I’m trying out a new way to track and communicate my progress on this project. Every week I’ll write a post to track the work I’ve been doing and reflect on my activities. I’ll try to maintain this document throughout the week, tidy it up and post it here on Friday afternoons. However the specific process will probably vary as it grows into the rest of my workflow.\nI’m purposefully trying to not tie this into the personal knowledge management trend. It’s for me and my own purposes, and I don’t want to get bogged down with the unabashed managerial phoniness that belies the PKM phenomenon.\nAnyway, I didn’t take notes on my work this past week, but here’s an overview of what I’ve done from memory:\nContinued to take notes on readings produced by the Maelstrom Project and its partners.\nContinued to investigate and maintain notes on potential cases.\nSet up a placeholder document for notes on methodological concerns.\nMet with David for our bi-weekly check-in (meeting notes are private, at least for now). I was also given access to the lab’s private git server but haven’t really had much of a chance to explore what it’s being used for or devise my own plans to make use of it.\nWorked extensively on the ethics protocol. David and I went back and forth deciding on whether this was necessary, given how the project is based on his grant which already has IRB approval. But it’s better to play it safe than sorry, especially when it’s necessary to obtain informed consent. So to this end, I revised the research protocol and responses to the ethics form, and I also drafted an informed consent document. I’ll share all these things once the whole package is put together (probably in a week or two), but my responses to the ethics form already appears on the ethics protocol page.\nI simplified the way private documents are handled in the quarto project and git respository. May still need to so some fiddling, especially for draft blog posts and notes.\nI started drafting a blog post about the potential use of AI/LLMs in my research. Stay tuned.\nOn a related note, I watched this recent video about the use of LLMs in qualitative data analysis, which did not prompt me to draft the post but which is well-timed, nevertheless.\nI worked a bit more on the data management plan, which prompted me to think more about which QDA software I’ll use. I started filling in a university form to use cloud services provided by MaxQDA, but stumbled upon qualitative-coding (abbreviated as qc), an open source CLI-based QDA system. It represents a very innovative approach to QDA rooted in computational thinking and plain text social science, while also remaining true to the core tenets and purpose of QDA, which make it unique and difficult to design software for. If this is the sort of thing that appeals to you, I highly recommend you read the docs.\nI had a bit of trouble installing it and getting it running, but I met remotely with Chris Proctor, who develops the tool through his work at the Computational Literacies Lab, based in the Department of Learning and Instruction at University at Buffalo (SUNY). He helped me resolve some issues, gave me a guided tour of the system and we just talked about the overall state of qualitative data analysis and its tooling. I don’t really have the capacity right now to post everything he showed me but I will definitely be posting about my experiences tinkering around with qc in the coming weeks.\nSimilarly, I asked on Mastodon about whether there are any tools that might support automatic generation of transcripts that include support for specialized notation. A few linguists and conversation analysis scholars responded with recommendations to use GailBot, and with discussion about the tool’s capabilities and limitations. I requested access to the software but haven’t heard back from the dev team yet. I also created a thread on the whisper github repo, which I now realize it a bit of a naive place to put it, and it hasn’t yet gotten any responses.\nI attended a talk from the epidemiology seminar series, which went wayyyy over my head.\nDid my usual amount of engagement on masotodon, I suppose. And I continued to make new friends in the department too :)" }, { - "objectID": "posts/2024-12-09-hello-world.html", - "href": "posts/2024-12-09-hello-world.html", - "title": "Hello World!", + "objectID": "posts/2024-12-11-technical-specs.html", + "href": "posts/2024-12-11-technical-specs.html", + "title": "Technical specs for this website", "section": "", - "text": "Welcome to the website for my CITF Postdoc! This will serve as a hub for documenting and sharing my work. I decided to do this as a way of managing and sharing always-updated drafts of research protocols with my supervisor and team members, but it is also generally useful for keeping my thoughts organized. I will also use this blog section to write my thoughts as the project progresses." + "text": "I’m using this website as a way to help organize and share key documents and resources. The research protocols are in flux at this stage in the project’s development, and this will make it easier to distribute up-to-date drafts with partners, while simultaneously enhancing transparency.\nThis post outlines the technical specifications for this website and outlines a roadmap for its further development. It will therefore be continually updated as the site evolves." }, { - "objectID": "posts.html", - "href": "posts.html", - "title": "Blog", - "section": "", - "text": "Date\n\n\nTitle\n\n\nCategories\n\n\n\n\n\n\nFeb 7, 2025\n\n\nWeek notes (2025-W06)\n\n\nweek notes\n\n\n\n\nJan 31, 2025\n\n\nWeek notes (2025-W05)\n\n\nweek notes\n\n\n\n\nJan 25, 2025\n\n\nWeek notes (2025-W04)\n\n\nweek notes\n\n\n\n\nJan 24, 2025\n\n\nOn the role of AI in my research\n\n\nAI / LLM, Methods, QDA\n\n\n\n\nJan 18, 2025\n\n\nWeek notes (2025-W03)\n\n\nweek notes\n\n\n\n\nDec 18, 2024\n\n\nTechnical specs for this website\n\n\nwebsite\n\n\n\n\nDec 9, 2024\n\n\nReflection on first team meeting\n\n\nmeeting notes, general thoughts\n\n\n\n\nDec 9, 2024\n\n\nHello World!\n\n\nintroduction, website\n\n\n\n\n\nNo matching items", - "crumbs": [ - "Blog" - ] + "objectID": "posts/2024-12-11-technical-specs.html#fundamentals", + "href": "posts/2024-12-11-technical-specs.html#fundamentals", + "title": "Technical specs for this website", + "section": "Fundamentals", + "text": "Fundamentals\nThis website is based on Quarto, a platform for writing and publishing scientific and technical writing. I had used quarto before but without fully understanding it, and now I am starting to see its elegance.\nI had started off using Hugo, but there were too many limitations that Quarto was able to accomodate. You can find an older version of this post reflecting that setup here: #2346852.\nThe site is hosted on GitHub Pages. The repo is located at https://github.com/zackbatist/CITF-Postdoc." }, { - "objectID": "notes/methodology-notes.html", - "href": "notes/methodology-notes.html", - "title": "Methodology notes", + "objectID": "posts/2024-12-11-technical-specs.html#generating-pdfs", + "href": "posts/2024-12-11-technical-specs.html#generating-pdfs", + "title": "Technical specs for this website", + "section": "Generating PDFs", + "text": "Generating PDFs\nAs an avid user, one thing I really like about Quarto is the ability to generate PDFs alongside html versions served over the web. I started tinkering with includes but I need to review how Quarto passes info from YAML frontmatter. It is not at all straightforward and I will need to experiment a bit more with this to get the hang of it." + }, + { + "objectID": "posts/2024-12-11-technical-specs.html#archiving-and-version-control", + "href": "posts/2024-12-11-technical-specs.html#archiving-and-version-control", + "title": "Technical specs for this website", + "section": "Archiving and Version Control", + "text": "Archiving and Version Control\nEvery change is tracked using git. I would also like to archive each research protocol in Zenodo once they reach a point of stability. This would ensure that they ca be aassigned DOIs and detailed metadata, which will make them easier to reference.\nHowever, I do not want to rely on Zenodo’s GitHub integration for two reasons: (1) I want this to be as platform-agnostic as possible, and (2) that system relies on GitHub’s release system which operates on the level of the whole repository rather than specific files.\nI might be able to write a custom CI workflow to archive specific files to Zenodo using their API. But, I want to be able to toggle this option, rather than have it occur for every single detected change. Maybe I can accomplish this by pushing the changes that I want to archive to a dedicated branch that the CI workflow is configured to operate on. Or it might be easier to simply do this manually, since I’m not sure I will be using it that often anyway." + }, + { + "objectID": "posts/2024-12-09-first-team-meeting.html", + "href": "posts/2024-12-09-first-team-meeting.html", + "title": "Reflection on first team meeting", "section": "", - "text": "This document is an overview of methodological topics and concerns. It is a place where I think through and justify my methodological decisions, and identify the methods and procedures through which I implement them." + "text": "Last week (2024/12/04) I finally met with David Buckeridge, Tanya Murphy and Aklil Noza in person. The meeting was meant to convey my vision for the project to the whole team, to align perspectives, and to articulate how this will actually work in practice.\nThe gist is that I will be investigating the role of social and cultural factors in data-sharing initiatives such as CITF and other Maelstrom-affiliated projects, and how these relate to, overlap with, or conflict with technical and institutional/administrative factors. To be clear, these are all very important aspects of data-sharing, but we generally recognized that the social and cultural aspects are under-explored relative to their impact.\nWe briefly talked about how we will go about selecting cases, and I emphasized the importance of strategic case selection. This also involves carefully articulating the project’s goals so that the cases will complement them. We agreed that the dataset will likely comprise between 12-15 interviews of around 60-90 minutes in length with representatives from 4-5 cases (one of them being CITF), in addition to representatives of the Maelstrom team. Maelstrom will serve as a “fixed point” that limits the scope of the cases’ breadth and ensures that participants have a common frame of reference. It also potentially allows me to “offload” or “consolidate” reference to technical and administrative aspects of data-sharing through targeted interviews with Maelstrom personnel, instead of dealing with those things with the representatives for each case.\nWe discussed timelines and overlap with Aklil’s work, which will be more concerned with focus groups with CITF databank users. There is definitely overlap with the emphasis of my own work and we will coordinate data collection to enhance the potential for analytical alignment.\nAfter the meeting I chatted with Tanya and Aklil who helped familiarize me with the bigger-picture theoretical discourse and tensions in epidemiology. Much of it seemed familiar since these concerns are common across disciplines, but I still need to read more to concretize my understanding. Tanya recommended I read the “Baby Rothman” which is a condensed version of a very long-lived textbook in this field, among a few other papers she sent me.\nOverall, this meeting got me really excited about this project :)" }, { - "objectID": "notes/methodology-notes.html#significant-concepts-and-frameworks", - "href": "notes/methodology-notes.html#significant-concepts-and-frameworks", - "title": "Methodology notes", - "section": "Significant Concepts and Frameworks", - "text": "Significant Concepts and Frameworks\n\nMulticase Studies\nThese notes describe the features, affordances and limitations of case study research, and articules factors correspoding with variable kinds of case studies.\nI do notice a distinction between two schools of thought, which seem to be spearheaded by Stake and Yin. I generally favour Stake’s flexible approach, and it seems well aligned with other methodological works I’ve been reading (e.g. Abbott 2004; Charles C. Ragin and Becker 1992).\n\nStake’s Approach\nIn case-study research, cases represent discrete instances of a phenomenon that inform the researcher about it. The cases are not the subjects of inquiry, and instead represent unique sets of circumstances that frame or contextualize the phenomenon of interest (Stake 2006: 4-7).\nCases usually share common reference to the overall research themes, but exhibit variations that enable a researcher to capture different outlooks or perspectives on matters of common concern. Drawing from multiple cases thus enables comprehensive coverage of a broad topic that no single case may cover on its own (Stake 2006: 23). In other words, cases are contexts that ascribe particular local flavours to the activities I trace, and which I must consider to account fully for the range of motivations, circumstances and affordances that back decisions to perform activities and to implement them in specific ways.\nMoreover, the power of case study research derives from identifying consistencies that relate cases to each other, while simultaneously highlighting how their unique and distinguishing facets contribute to their representativeness of the underlying phenomon. Case study research therefore plays on the tensions that challenge relationships among cases and the phenomenon that they are being called upon to represent (C. C. Ragin 1999: 1139-1140).\nStake (2006: 4-6) uses the term quintain1 to describe the group, category or phenomenon that bind together a collection of cases. A quintain is an object, phenomenon or condition to be studied – “a target, not a bull’s eye” (Stake 2006: 6). “The quintain is the arena or holding company or umbrella for the cases we will study” (Stake 2006: 6). The quintain is the starting point for multi-case research.\n1 The term refers to a medieval jousting target: see https://en.wikipedia.org/wiki/Quintain_(jousting)According to Stake (2006: 6):\n\nMulticase research starts with the quintain. To understand it better, we study some of its single cases — its sites or manifestations. But it is the quintain we seek to understand. We study what is similar and different about the cases in order to understand the quintain better.\n\nStake (2006: 8) then goes on:\n\nWhen the purpose of a case is to go beyond the case, we call it an “instrumental” case study When the main and enduring interest is in the case itself, we call it “intrinsic” case study (Stake 1988). With multicase study and its strong interest in the quintain, the interest in the cases will be primarily instrumental.\n\nAbbott’s (2004: 22) characaterization of Small-N comparison is very reminiscient of Stake’s (2006) account of the case-quintain dialectic:\n\nSmall-N comparison attempts to combine the advantages of single-case analysis with those of multicase analysis, at the same time trying to avoid the disadantages of each. On the one hand, it retains much information about each case. On the other, it compares the different cases to test arguments in ways that are impossible with a single case. By making these detailed comparisons, it tries to avoid the standard critcism of single-case analysis — that one can’t generalize from a single case — as well as the standard criticism of multicase analysis — that it oversimplifies and changes the meaning of variables by removing them from their context.\n\nIt should be noted that case study research limits my ability to define causal relationships or to derive findings that may be generalized across the whole field of epidemiology. This being said, case study research allows me to articulate the series of inter-woven factors that impact how epidedemiological researchers coordinate and participate in data-sharing initiatives, while explicitly accounting for and drawing from the unique and situational contexts that frame each case.\nStake (2006: 23) recommends selecting between 4-10 cases and identifies three main criteria for selecting cases:\n\nIs the case relevant to the quintain?\nDo the cases provide diversity across contexts?\nDo the cases provide good opportunities to learn about complexity and contexts?\n\n\nFor qualitative fieldwork, we will usually draw a purposive sample of cases, a sample tailored to our study; this will build in variety and create opportunities for intensive study (Stake 2006: 24).2\n2 Though Yin (2014: 40-444) is dismissive of such use of the term “sample” since he sees case study research as only generalizable to similar situations, and not to a general population from which a sample is typically said to be drawn. I agree with this focus on concrete situations over Stake’s prioritization of theory-building as an end unto itself.\nStake’s (2010: 122) prioritizes doing research to understand something or to improve something, and I generally agree with his rationalization; research helps reframe problems and establish different decision options.\n\n\nYin’s Approach\nAccording to Yin (2014: 16), “a case study is an empirical inquiry that investigates a contemporary phenomenon (the”case”) in depth and within its real-world context, especially when the boundaries between phenomenon and context may not be clearly evident.”\nHe goes on to document some features of a case study: “A case study inquiry copes with the technically distinctive situation in which there will be many more variables of interest than data points, and as one result relies on multiple sources of evidence, with data needing to converge in a triangulating fashion, and as another result benefits from the prior development of theoretical propositions to guide data collection and analysis.” (Yin 2014: 17)\nYin (2014) is more oriented toward what he refers to as a realist perspective, which he pits against relativist and interpretivist perspectives (used interchangably, it seems), and which I might refer to as constructivist. He characterizes relativist perspectives as “acknowledging multiple realities having multiple meanings, with findings that are observer dependent”. His prioriting of a realist approach corresponds with the analysis by Yazan (2015), who compared Yin with Stake and Merriam. According to Yazan (2015: 137), Yin evades making statements about his epistemic commitments, and is characterized as post-positivist.\nYin (2014) is very concerned with research design in case study research He posits that, in a colloquial sense, “a research design is a logical plan for getting from here to there, where here may be defined as the initial set of questions to be answered, and there is some set of conclusions (answers) about these questions.” (Yin 2014: 28)\nYin distinguishes between a research design and a work plan. A research design deals with a logical problem, whereas a work plan deals with a logistical problem. Seems reminiscient of Brian Cantwell Smith’s distinction between skeletons and outlines.\nYin lists five components of a research design:\n\nA case study’s questions;\nits propositions, if any;\nits unit(s) of analysis;\nthe logic linking the data to the propositions; and\nthe criteria for interpreting the findings.\n\nInterestingly, I have been instinctively following these steps, and am currently hovering somewhere between components 3 and 4, while dipping back to 2 once in a while too.\nThe problem of defining the unit of analysis is salient to me right now. According to Yin (2014: 32), the unit of analysis may change as the project progresses, depending on initial misconceptions (he uses the example of a unit of analysis changing from neighbourhoods to small groups, as contextualized by the socio-geographical entity of the neighbourhood, which is laden with issues of class, race, etc). In my own situation, the unit of analysis may hover between the harmonization initiative, the people, activities or infrastructures that make it work.\nIn the section on criteria for interpreting the findings, Yin emphasizes the role of rival theories, which is akin to a concern with falsifiability as a means of validating truth claims, and which betrays his positivist leanings. This may be compared with Stake’s emphasis on triangulation, which is more concerned with internal cohesiveness. Similarly, Yin cites Corbin and Strauss regarding the role of theory or theoretical propositions in research design, which similarly reveals a concern with rigorous upfront planning and strict adherence to research design as a key aspect of deriving valid findings.\nRegarding generalizability, Yin (2014: 40-41) states that “Rather than thinking about your case as a sample, you should think of it as the opportunity to shed empirical light about some theoretical concepts or principles, not unlike the motive of a laboratory investigator in conceiving of and then conducting a new experiment.” He goes on to state that case studies tend to strive for analytic generalizations that go beyond the specific case that has been studied, and which apply to other concrete situations rather than just abstract theory building.\n\n\nLogistics of case study design\n\nPreparing to select case study data\nYin (2014: 72-23) identifies five desired attributes for collecting case studt data:\n\nAsk good questions — and interpret answers fairly.\n\n\n“As you collect case study evidence, you must quickly review the evidence and continually ask yourself why events or perceptions appear as they do.” (73)\nA good indicator of having asked good questions is mental and emotional exhaustion at the end of each fieldwork day, due to the depletion of “analytic energy” associated with being attention on your toes. (73-74)\n\n\nBe a good “listener” not trapped by existing ideologies or preconceptions.\n\n\nSensing through multiple modalities, not just spoken words.\nAlso subtext, as elicited through choices of terms used, mood and affective components. (74)\n\n\nStay adaptive, so that newly encountered situations can be seen as opportunities, not threats.\n\n\nRemember the original purpose but willing to adapt to unanticipated circumnstances. (74)\nEmphasize balancing adaptability with rigour, but not with rigidity. (75)\n\n\nHave a firm grasp of what is being studied, even when in an exploratory mode.\n\n\nNeed to do more than merely record data, but interpret information as they are being collected and to know immedately whether there are contradictions or complementary statements to follow-up on. (75-76)\n\n\nAvoid biases of being sensitive to contrary evidence, also knowing how to conduct research ethically.\n\n\nMaintain strong professional competence, including keeping up with related research, ensuring accuracy, striving for credibility, and knowledging and mitigating against bias.\n\nYin advocates for adoption of case study protocols. He provides an example of a table of contents for case study protocols, which generally comprise four sections:\n\nOverview of the case study\nData collection procedures\nData collection questions\nGuide for the case study report\n\n\n\nTriangulation\nTriangulation is a process of gaining assurance. Also sometimes called crystallization.\n“Each important finding needs to have at least three (often more) confirmations and assurances that key meanings are not being overlooked.” (Stake 2006: 33) Triangulation is a process of repetitous data gathering and critical review of what is being said. (Stake 2006: 34)\nWhat needs triangulation? (Stake 2006: 35-36)\n\nIf the description is trivial or beyond question, there is no need to triangulate.\nIf the description is relevant and debatable, there is much need to triangulate.\nIf the data are critical to a main assertion, there is much need to triangulate.\nIf the data are evidence for a controversial finding, there is much need to triangulate.\nIf a statement is clearly a speaker’s interpretation, there is little need to triangulate the quotation but not its content.\n\nStake (2006: 37) cites Denzin (1989) who highlighted several kinds of triangulation, leading to a few advisories:\n\nFind ways to use multiple rather than single observers of the same thing.\nUse second and third perspectives, i.e. the views of teachers, student and parents.\nUse more than one research method on the same thing, i.e. document review and interview.\nCheck carefully to decide how much the total description warrants generalization.\n\nDo your conclusions generalize across other times or places?\nDo your conclusions about the aggregate generalize to individuals?\nDo findings of the interaction among individuals in one group pertain to other groups?\nDo findings of the aggregate of these people generalized to a population?\n\n\n\n\nCross-Case Analysis Procedure\nStake (2006: Chapter 3) lays out a procedure for deriving synthetic findings from data collected across cases. He frames this in terms of a dialectic between cases and quintains. He identifies three tracks (Stake 2006: 46):\n\nTrack 1: Maintains the case findings and the situationality.\nTrack 2: Merges similar findings, maintaining a little of the situationality.\nTrack 3: The most quanitative track, shifts the focus from findings to factors.\n\nAccording to Stake, case reports should be created independently and then brought together by a single individual when working in a collaborative project. In keeping with the case-quintain dialectic, this integration must involve strategically putting the cases aside and bringing them back in to identify convergences and divergences, similarities and differences, normalitities and discrepancies among them.\nThere is some detailed discussion about different kinds of statements, i.e. themes, findings, factors and assertions, but I find this a bit too much detail for me to get at at this point in mymethodological planning. In general though, Stake documents a process whereby an analyst navigates back and forth between the general and the situational, presenting tentativr statements that are shored up, modified or discarded through testing compatability of the evidence across cases.\n\n\nSingle cases\nStake (2000) is concerned with identifying what can be learned from a single case. He (2000: 437) identifies three kinds of cases:\n\nIntrinsic case studies as being driven by a desire to understand the particular case.\nInstrumental case studies are examined “mainly to provide insight into an issue or to redraw a generalization.”\nCollective case studies “investigate a phenomenon, population or general condition”.\n\nStake (2000) frames case research around a tension between the particular and the general, which echoes the case-quintain dilemma he described in (Stake 2006: 4-6).\n\n\nSome scattered practical guidance\nStake (2006: 18-22) provides a detailed and realistic overview of common challenges involved in collaborative qualitative research. This could be handy in future work when planning a multicase project involving multiple researchers.\nStake (2006: 29-33) provides guidance on how to plan and conduct interviews in multicase research, including a series of helpful prompts and questions to ask yourself while designing the interview. One thing that stands out is his recommendation that an interview should be more about the interviewee than about the case. It’s necessary to find out about the interviewee to understand their interpretations, but what they reveal about the quintain is more important.\nOn page 34, Stake (2006) also provides some practical tips for documenting and storing data, after Huberman and Miles (1994).\nStake (2006: Chapter 4) includes a chapter on procedures for reporting the findings, and I may return to this later on once I need to initiative this phase of work. It addresses concerns about how to articulate comparisons, concerns about generalization, and how to handle advocacy based on findings.\nSee Stake (2006) Chapter 5 for a step-by-step overview of a multicase study analysis. The rest of the volume after that includes three very detailed examples from his own work.\n\n\n\n\nGrounded theory\nThese notes are largely drawn from Charmaz (2000), which I understand to be a fairly balanced and comprehensive overview of the Glaser / Strauss and Corbin debate, and of the situation of specific methods and techniques in relation to these different stances. I also value Charmaz’s position as someone who subscribes to her constructivist approach.\nAccording to Charmaz(2000: 509):\n\nEssentially, grounded theory methods consist of systematic inductive guidelines for collecting and analyzing data to build middle-range theoretical frameworks that explain the collected data.\n\nCharmaz(2000: 511) goes on to situate grounded theory in relation to what was the norm prior to its invention:\n\nGlaser and Strauss’s (1967) work was revolutionary because it challenged (a) arbitrary divisions between theory and research, (b) views of qualitative research as primarily a precursor to more “rigorous” quantitative methods, (c) claims that the quest for rigor made qualitative research illegitimate, (d) beliefs that qualitative methods are impressionistic and unsystematic, (e) separation of data collection and analysis, and (f) assumptions that qualitative research could produce only descriptive case studies rather than theory development (Charmaz 1995).\n\nPrior to Glaser and Strauss (1967), qualitative analysis was taught rather informally — they led the way in providing written guidelines for systematic qualitative data analysis with explicit procedures for data analysis (Charmaz 2000: 512)\nGlaser brought his very positivist assumptions from his work at Columbia, and Strauss’ work in Chicago with Herbert Blumer and Robert Park infused a pragmatic philosophical approach to the study of process, action and meaning that reflects symbolic interactionism.\n\nGlaser\nGlaser’s position comes close to traditional positivism, with assumptions of an objective, external reality and a neutral observer who discovers data. and a reductionist form of inquiry of manageable research problems. According to Charmaz (2000: 511), regarding Glaser’s approach:\n\nTheoretical categories must be developed from analysis of the collected data and must fit them; these categories must explain the data they subsume. This grounded theorists cannot shop their disciplinary stores for preconceived concepts and dress their data in them. Any existing concept must earn its way into the analysis. … The relevance of a grounded theory derives from its offering analytic explanations of actual problems and basic processes in the research setting. A grounded theory is durable because researchers can modify their emerging or established analyses as conditions change or further data are collected.\n\n\n\nCorbin and Strauss\nStrauss and Corbin assume an objective reality, aim toward unbiased data collection, propose a series of technical procedures, and espouses verification. However, they are postpositivism because they propose giving voice to their respondents,3 representing them as accurately as possible, discovering and reckoning with how their respodents’ views on reality differ from their own, and reflecting on the research process as one way of knowing.\n3 Charmaz uses the term “giving voice” in this specific context. I’m not sure if this is meant to represent Strauss and Corbin’s attitude, and whether this is an accurate representation on their views, but in my mind this should be framed as elevating, amplifying or re-articulating respondents’ voices (and this is a tenet of constructivist grounded theory in general, which derives from Charmaz). My take diverges from the position that we “give voice” to respondents in that it acknowledges (1) that the voices are already there, (2) that respondents are in fact giving us their voices, and (3) that the researcher plays an active editorial role, transforming the respondents’ elicitations into a format that is more amenable to analysis.Corbin and Strauss (1990) “gained readers but lost the sense of emergence and open-ended character of Strauss’s earlier volume and much of his empirical work. The improved and more accessible second edition of Basics (Strauss and Corbin 1998) reads as less prescriptive and aims to lead readers to a new way of thinking about their research and about the world.” (Charmaz 2000: 512)\nStrauss apparently became more insistent that grounded theory should be more verificational in nature in personal communications.\nGlaser (1992) responded to Strauss and Corbin (1990), repudiating what he perceived as forcing preconceived questions and frameworks on the data. Glaser considered it better to allow theory to “emerge” from the data, i.e. to let the data speak for themselves.\nCharmaz identifies these two approaches as having a lot in common: hey both advocate for mitigating factors that would hinder objectivity and minimize intrusion of the researcher’s subjectivity, and they are both embedded in positivist attitudes, with a researcher sitting outside the observed reality; Glaser exemplifies these through discovering and coding data, and using systematic comparative methods, whereas Strauss and Corbin maintain a similar distance through their analytical questions, hypotheses and methodological applications. They both engage in “silent authorship” and usually write about their data as distant experts (Charmaz and Mitchell 1996).\n\n\nConstuctivist Grounded Theory\n\nConstructivist grounded celebrates firsthand knowledge of empirical worlds, takes a middle ground between postmodernsm and positivism, and offers accessible methods for taking qualitative research into the 21st century. (510)\n\n\nThe power of grounded theory lies in its tools for understanding empirical worlds. We can reclaim these tools from their positivist underpinnings to form a revised, more open-ended practice of grounded theory that stresses its emergent, constructivist elements. We can use grounded theory methods as flexible, heuristic strategies rather than as formulaic procedures. (510)\n\nThree aspects to Charmaz’s argument (510):4\n4 Very much in line with the pragmatist turn of the late ’90s and early ’00s, as also documented by Lucas (2019: 54-57) in the context of archaeological theory, vis-a-vis positivism, postmodernism, and settling on a middle ground between them.\nGrounded theory strategies need not be rigid or prescriptive;\na focus on meaning while using grounded theory furthers, rather than limits, interpretive understanding; and\nwe can adopt grounded theory strategies without embracing the positivist leanings of earlier proponents of grounded theory.\n\nRepudiation of the notion that data speak for themselves, that data do not lie. Recognition that data are constructs of the rsearch process, are framed by the questions we ask informants and the methodological tools of our collection procedures.\nCharmaz (2000: 515) advocates for what seems to be a dialogical approach to coding, between researcher and the data:\n\nWe should interact with our data and pose questions to them while coding. Coding helps us to gain a new perspective on our material and to focus further data collection, and may lead us in unforeseen directions. Unline quantitative research that requires data to fit into preconceived standardized codes, the researcher’s interpretations of data shape his or her emergent codes in grounded theory.\n\nDistinguishes articulates open/initial coding as proceeding line by line to get a general sense of what the data contains. It is meant to keep the researcher close to the data, to remain attuned to the subjects’ views of their realities.\n\nLine-by-line coding sharpens our use of sensitizing concepts — that is, those background ideas that inform the overall research problem. Sensitizing concepts offer eays of seeing, organizing, and understanding experience; they are embedded in our disciplinary emphases and perspectival proclivities. Although sensitizing conceots may deepen perception, they provide starting points for building analysis, not ending points for evading it. We may use sensitizing concepts only as points of departure from which to study the data.\n\nMuch of the rest of the Charmaz (2000) paper is an overview of coding and memoing methods, as well as theoretical sampling. The emphasis is on situating these techniques in the Glaser / Strauss and Corbin debate, and it will be better to refer to Charmaz (2014) for in-depth notes on these techniques.\nCharmaz (2000: 521-522) provides an apt account of a significant critique of grounded theory, and poses her constructivist approach as a potential means of resolving it. Specifically, she refers to the notion that grounded theory (as traditionally conceived by both Glaser and Strauss and Corbin) “fractures” the data, making them easier to digest in an analytical sense, but also making it more difficult to engage with in a holistic manner. This is precisely the point of the original approach, to present qualitative data as data — as conceived and valued by quantitative researchers, i.e. as discrete, corpuscular, disembodied, re-arrangable and distant entities. The text of these two large paragraphs is copied here:\n\nConrad (1990) and Riessman (1990) suggest that “fracturing the data” in grounded theory research might limit understanding because grounded theorists aim for analysis rather than the portrayal of subjects’ experience in its fullness. From a grounded theory perspective, fracturing the data means creating codes and categories as the researcher defines themes within the data. Glaser and Strauss (1967) propose this strategy for several reasons: (a) to help the researcher avoid remaining immersed in anecdotes and stories, and subsequently unconsciously adopting subjects’ perspectives; (b) to prevent the researcher’s becoming immobilized and overwhelmed by voluminous data; and (c) to create a way for the researcher to organize and interpret data. However, criticisms of fracturing the data imply that grounded theory methods lead to separating the experience from the experiencing subject, the meaning from the story, and the viewer from the viewed. In short, the criticisms assume that the grounded theory method (a) limits entry into subjects’ worlds, and thus reduces understanding of their experience; (b) curtails representation of both the social world and subjective experience; (c) relies upon the viewer’s authority as expert observer; and (d) posits a set of objectivist procedures on which the analysis rests.\nResearchers can use grounded theory methods to further their knowledge of subjective experience and to expand its representation while neither remaining external from it nor accepting objectivist assumptions and procedures. A constructivist grounded theory assumes that people create and maintain meaningful worlds through dialectical processes of conferring meaning on their realities and acting within them (Bury 1986; Mishler 1981). Thus social reality does not exist independent of human action. Certainly, my approach contrasts with a number of grounded theory studies, methodological statements, and research texts (see, e.g., Chenitz and Swanson 1986; Glaser 1992; Martin and Turner 1986; Strauss and Corbin 1990; Turner 1981). By adopting a constructivist grounded theory approach, the researcher can move grounded theory methods further into the realm of interpretive social science consistent with a Blumerian (1969) emphasis on meaning, without assuming the existence of a unidimensional external reality. A constructivist grounded theory recognizes the interactive nature of both data collection and analysis, resolves recent criticisms of the method, and reconciles positivist assumptions and postmodernist critiques. Moreover, a constructivist grounded theory fosters the development of qualitative traditions through the study of experience from the standpoint of those who live it.\n\nCharmaz’s (2000: 523) proposal for a re-visioned grounded theory poses research as a materializing process:\n\nA re-visioned grounded theory must take epistemological questions into account. Grounded theory can provide a path for researchers who want to continue to develop qualitative traditions without adopting the positivistic trappings of objectivism and universality. Hence the further development of a constructivist grounded theory can bridge past positivism and a revised future form of interpretive inquiry. A revised grounded theory preserves realism through gritty, empirical inquiry and sheds positivistic proclivities by becoming increasingly interpretive.\n\nCharmaz (2000: 523) addresses realism and truth in constructivist grounded theory, and explicitly relates it to Blumerian situated interactionism:\n\nA constructivist grounded theory distinguishes between the real and the true. The constructivist approach does not seek truth — single, universal, and lasting. Still, it remains realist because it addresses human realities and assumes the existence of real worlds. However, neither human realities nor real worlds are unidimensional. We act within and upon our realities and worlds and thus develop dialectical relations among what we do, think, and feel. The constructivist approach assumes that what we take as real, as objective knowledge and truth, is based upon our perspective (Schwandt 1994). The pragmatist underpinnings in symbolic interactionism emerge here. Thomas and Thomas (1928: 572) proclaim, “If human beings define their situations as real, they are real in their consequences”. Following their theorem, we must try to find what research participants define as real and where their definitions of reality take them. The constructivist approach also fosters our self-consciousness about what we attribute to our subjects and how, when, and why researchers portray these definitions as real. Thus the research products do not constitute the reality of the respondents’ reality. Rather, each is a rendering, one interpretation among multiple interpretations, of a shared or individual reality. That interpretation is objectivist only to the extent that it seeks to construct analyses that show how respondents and the social scientists who study them construct those realities — without viewing those realities as unidimensional, universal, and immutable. Researchers’ attention to detail in the constructivist approach sensitizes them to multiple realities and the multiple viewpoints within them; it does not represent a quest to capture a single reality.\nThus we can recast the obdurate character of social life that Blumer (1969) talks about. In doing so, we change our conception of it from a real world to be discovered, tracked, and categorized to a world made real in the minds and through the words and actions of its members. Thus the grounded theorist constructs an image of a reality, not the reality — that is, objective, true, and external.\n\nOn the other hand, Charmaz (2000: 524) frames objectivist grounded theory as believing in some kind of truth:\n\nObjectivist grounded theory accepts the positivistic assumption of an external world that can be described, analyzed, explained, and predicted: truth, but with a small t. That is, objectivist grounded theory is modifiable as conditions change. It assumes that different observers will discover this world and describe it in similar ways That’s correct — to the extent that subjects have comparable experiences (e.g., people with different chronic illnesses may experience uncertainty, intrusive regimens, medical dominance) and viewers bring similar que-tions, perspectives, methods, and, subsequently, concepts to analyze those experiences. Objectivist grounded theorists often share assumptions with their research participants — particularly the professional participants. Perhaps more likely, they assume that respondents share their meanings. For example, Strauss and Corbin’s (1990) discussion of independence and dependence assumes that these terms hold the same meanings for patients as for researchers.\n\nCharmaz (2000: 525) further embeds construvist grounded theory as a way to fulfill Blumer’s symbolic interactionism:\n\nWhat helps researchers develop a constructivist grounded theory? How might they shape the data collection and analysis phases? Gaining depth and understanding in their work means that they can fulfill Blumer’s (1969) call for “intimate familiarity” with respondents and their worlds (see also Lofland and Lofland 1984, 1995). In short, constructing constructivism means seeking meanings — both respondents’ meanings and researchers’ meanings.\n\nCharmaz (2000: 524) on the concretization of procedures from what were orginally meant to be guidelines:\n\nGuidelines such as those offered by Strauss and Corbin (1990) structure objectivist grounded theorists’ work. These guidelines are didactic and prescriptive rather than emergent and interactive. Sanders (1995: 92) refers to grounded theory procedures as “more rigorous than thou instructions about how information should be pressed into a mold”. Strauss and Corbin categorize steps in the process with scientific terms such as axial coding and conditional matrix (Strauss 1987; Strauss and Corbin 1990, 1994). As grounded theory methods become more articulated, categorized, and elaborated, they seem to take on a life of their own. Guidelines turn into procedures and are reified into immutable rules, unlike Glaser and Strauss’s (1967) original flexible strategies. By taking grounded theory methods as prescriptive scientific rules, proponents further the positivist cast to obiectivist grounded theory.\n\n\nOn the modes of reasoning behind grounded theory\nKelle (2005) is an overview of the Glaser / Strauss and Corbin split. References to Kelle (2005) have no page numbers since it is published in an online-only journal and does not specify paragraph numbers.\nHighlights a primary impetus behind Glaser and Strauss (1967), which used political analogies to distinguish between “theoretical capitalists” and “proletariat testers”, and unify the field of sociology by de-centering emphasis on theories developed by “great men”.\nA common thread in this paper is sensitivity to the practical challenges of actually doing grounded theory according to Glaser’s approach:\n\nThe infeasibility of an inductivist research strategy which demands an empty head (instead of an “open mind”) cannot only be shown by epistemological arguments, it can also be seen in research practice. Especially novices in qualitative research with the strong desire to adhere to what they see as a basic principle and hallmark of Grounded Theory — the “emergence” of categories from the data — often experience a certain difficulty: in open coding the search for adequate coding categories can become extremely tedious and a subject of sometimes numerous and endless team sessions, especially if one hesitates to explicitly introduce theoretical knowledge. The declared purpose to let codes emerge from the data then leads to an enduring proliferation of the number of coding categories which makes the whole process insurmountable.\n\nKelle (2005) basically takes down the original Glaser and Strauss (1967) and subsequent reflection on theoretecal sensitivity (Glaser 1978). He highlights fundamental contraditions and oversights with regards to the role of theory in grounded theory, specifically with regards to the notion that such research can be accomplished with inductive purity:\n\nConsequently, in the most early version of Grounded Theory the advice to employ theoretical sensitivity to identify theoretical relevant phenomena coexists with the idea that theoretical concepts “emerge” from the data if researchers approach the empirical field with no preconceived theories or hypotheses. Both ideas which have conflicting implications are not integrated with each other in the Discovery book. Furthermore, the concept of theoretical sensitivity is not converted into clear cut methodological rules: it remains unclear how a theoretically sensitive researcher can use previous theoretical knowledge to avoid drowning in the data. If one takes into account the frequent warnings not to force theoretical concepts on the data one gets the impression that a grounded theorist is advised to introduce suitable theoretical concepts ad hoc drawing on implicit theoretical knowledge but should abstain from approaching the empirical data with ex ante formulated hypotheses.\n\nKelle (2005) recognizes that Glaser identified a series of “theoretical families” to help assist with the practical experience of coding. I find it somewhat interesting that many of the terms in these first families are very reminiscient of so-called “natural language”, as used in the wave of cybernets that was contemporary with Glaser (1978) and which largely dealt with “expert systems”.\n\nIn the book “Theoretical Sensitivity” (1978) GLASER presents an extended list of terms which can be used for the purpose of theoretical coding loosely structured in the form of so called theoretical “coding families”. Thereby various theoretical concepts stemming from different (sociological, philosophical or everyday) contexts are lumped together, as for example:\n\nterms, which relate to the degree of an attribute or property (“degree family”), like “limit”, “range”, “extent”, “amount” etc.,\nterms, which refer to the relation between a whole and its elements (“dimension family”), like “element”, “part”, “facet”, “slice”, “sector”, “aspect”, “segment” etc.,\nterms, which refer to cultural phenomena (“cultural family”) like “social norms”, “social values”, “social beliefs” etc.\n\n\nThis is substantiated by other observations by Kelle (2005) that ad hoc coding actually follows implicit theoretical knowledge:\n\nOne of the most crucial differences between GLASER’s and STRAUSS’ approaches of Grounded Theory lies in the fact that STRAUSS and CORBIN propose the utilization of a specified theoretical framework based on a certain understanding of human action, whereas GLASER emphasises that coding as a process of combining “the analyst’s scholarly knowledge and his research knowledge of the substantive field” (1978, p.70) has to be realised ad hoc, which means that it has often to be conducted on the basis of a more or less implicit theoretical background knowledge.\n\nand that the Glaserian approach is better suited for more experienced, rather than novice sociologists, who will have internalized the theory that they then apply in their coding.\nKelle then goes on to address how grounded theory can or can not be applied in alignment with inductivist or hypothetic-deductivist reasoning, and raises abductive reasoning an an alternative means of arriving at legitimate and verifiable conclusions. There is too much detail in the paper to copy here.\nBut here is another nice conclusive gem from the end:\n\nWhereas STRAUSS and CORBIN pay a lot of attention to the question how grounded categories and propositions can be further validated, GLASER’s concept shows at least a gleam of epistemological fundamentalism (or “certism”, LAKATOS 1978) especially in his defence of the inductivism of early Grounded Theory. “Grounded theory looks for what is, not what might be, and therefore needs no test” (GLASER 1992, p.67). Such sentences carry the outmoded idea that empirical research can lead to final certainties and truths and that by using an inductive method the researcher may gain the ability to conceive “facts as they are” making any attempt of further corroboration futile.\n\n\n\nRebuttals by Glaser\nGlaser (2002) constitutes a rebuttal to Charmaz (2000). As Bryant (2003) points out in his response to Glaser (2002), it is very angry, polemical and irrational. I don’t want to go too in depth with the fundamental problems with Glaser’s response (see Bryant’s paper for the details), but the gist is that Glaser never really got the message about data being inherently constructed by researchers decisions, actions and circumstances. Glaser seems to continue believing in the inherent neutrality of data as a matter of faith.\nThis being said, Glaser (2002) did highlight the large emphasis on descriptive rather than explanatory potential in Charmaz’s approach. This aligns with my own apprehensions when I try to address the relevance of my work. I tend to use the term “articulate” as a way to frame my work as descriptive, but in a way that lends value, and this very fuzzy distinction between the power of identying the shapes and relationships among things and explaining their causes and effects in a generalizable way (i.e., theories, or explanations), still somehow troubles me. I wonder if Glaser is drawing a false distinction here, and through that, a false prioritization of explanation over description as a desired outcome. This would put my mind at ease, as would dismissing Glaser’s dismissal of people who simply don’t know how to do the “real” grounded theory (which, in his mind, include all feminist and critical researchers).\n\n\nOn the utility of grounded theory\nI completely agree with this statement from Clarke (2003: 555):\n\nTo address the needs and desires for empirical understandings of the complex and heterogeneous worlds emerging through new world orderings, new methods are requisite (Haraway 1999). I believe some such methods should be epistemologically/ ontologically based in the pragmatist soil that has historically nurtured symbolic interactionism and grounded theory. Through Mead, an interactionist grounded theory has always had the capacity to be distinctly perspectival in ways fully com patible with what are now understood as situated knowledges. This fundamental and always already postmodern edge of a grounded theory founded in symbolic interactionism makes it worth renovating.\n\nThis is super interesting, and really contextualizes how Strauss imagined grounded theory to be useful for him:\n\nSome years ago, Katovich and Reese (1993:400–405) interestingly argued that Strauss’s negotiated order and related work recuperatively pulled the social around the postmodern turn through its methodological [grounded theoretical] recognition of the partial, tenuous, shifting, and unstable nature of the empirical world and its constructedness. I strongly agree and would argue that Strauss also furthered this “postmodernization of the social” through his conceptualizations of social worlds and arenas as modes of understanding the deeply situated yet always also fluid orga nizational elements of negotiations. He foreshadowed what later came to be known as postmodern assumptions: the instability of situations; characteristic changing, porous boundaries of both social worlds and arenas; social worlds seen as mutually constitutive/coproduced through negotiations taking place in arenas; negotiations as central social processes hailing that “things can always be otherwise”; and so on. Significantly, negotiations constitute discourses that also signal micropolitics of power as well as “the usual” meso/macrostructural elements—power in its more fluid forms (e.g., Foucault 1979, 1980). Through integrating the social worlds/arenas/ negotiations framework with grounded theory as a new conceptual infrastructure, I hope to sustain and extend the methodological contribution of grounded theory to understanding and elaborating what has been meant by “the social” in social life — before, during, and after the postmodern turn.\n\nIt also echoes Charmaz’s vision of grounded theory as a powerful too, and Bryant’s (2003) call to “look at what Glaser and Strauss actually did, rather than what they claimed — and continued to claim — they were doing” to uncover “the basis for a powerful research approach”. Bryant (2003) further cites Baszanger and Dodier (1997), who characterize grounded theory as a method “consisting of accumulating a series of individual cases, of analyzing them as a combination between different logics of action that coexist not only in the field under consideration, but even within these individuals or during their encounters”. Bryant (2003) summarizes this by stating that “[t]he aim of such methods is generalization rather than totalization, with the objective of producing”a combinative inventory of possible situations”.\n\n\n\nTheoretical sampling\nSee Charmaz (2000): 519-520.\nFrom Clarke (2003: 557):\n\nUnique to this approach has been, first, its requiring that analysis begin as soon as there are data. Coding begins immediately, and theorizing based on that coding does as well, however provisionally (Glaser 1978). Second, “sampling” is driven not necessarily (or not only) by attempts to be “representative” of some social body or population (or its heterogeneities) but especially and explicitly by theoretical con cerns that have emerged in the provisional analysis. Such “theoretical sampling” focuses on finding new data sources (persons or things) that can best explicitly ad dress specific theoretically interesting facets of the emergent analysis. Theoretical sampling has been integral to grounded theory from the outset, remains a fundamen tal strength of this analytic approach, and is crucial for the new situational analyses." + "objectID": "notes/potential-cases.html", + "href": "notes/potential-cases.html", + "title": "Potential cases", + "section": "", + "text": "Isabel Fortier came up with a shortlist based on consultations to help determine which Maelstrom partner projects may serve as potential cases. We then met on 2025-02-04, when, among other topics, we discussed the shortlist.\nSee the case selection protocol for further details on the parameters that guide how cases are to be determined." }, { - "objectID": "notes/methodology-notes.html#data-collection", - "href": "notes/methodology-notes.html#data-collection", - "title": "Methodology notes", - "section": "Data Collection", - "text": "Data Collection\n\nInterviews\n\nstructured, semi-structured\nlunk to more detailed transcription protocol\nSee (Yin 2014: 110-113)\nSee Becker (1998)\n\nSee Fontana and Frey (2000)\n\nFrom Charmaz (2000: 525):\n\nA constructivist approach necessitates a relationship with respondents in which they can cast their stories in their terms. It means listening to their stories with openness to feeling and experience. … Furthermore, one-shot interviewing lends itself to a partial, sanitized view of experience, cleaned up for public discourse. The very structure of an interview may preclude private thoughts and feelings from emerging. Such a structure reinforces whatever proclivities a respondent has to tell only the public version of the story. Researchers’ sustained involvement with research participants lessens these problems.\n\n\nTranscribing\nThis section describes how I transcibe interviews and accounts for the decisions to encode certain things and not others. It goes on to explains the procedures for transcribing spoken dialog into textual formats, including the notation applied to encode idiosyncratic elements of conversational speech.\nCheck out Silverman (2000), who writes about the nuanced challenges of working with and between verbal and textual media, and what this means for transcription.\n\nTranscript notation\nDerived from the transcription protocol applied for the E-CURATORS project.\n\n\nCleaning audio\nTo clean the audio:\n\nI select a clip that is representative of a single source of background noise, and then filter that wavelength throughout the entire audio file.\nAfter selecting the clip, go to Effect >> Noise Reduction and select Get Noise Profile, then press OK.\nClose the noise reduction menu, select the entire range of audio using the keyboard shortcut Command + A.\nThen go back to the noise reduction window (Effect >> Noise Reduction) to apply the filter based on the noise profile identified for the noisy clip.\nExport the modified audio file to the working directory (File >> Export >> Export as .WAV).\nUse ffmpeg to replace the dirty audio track with the clean one:\n\n ffmpeg -i dirty.mp4 -i clean.wav -c:v copy -map 0:v:0 -map 1:a:0 clean.mp4\n\n\n\n\nObservations\nSee Angrosino and Mays de Pérez (2000)\n\n\nField notes\nSee (Yin 2014: 124-125)\n\n\nRecording video" + "objectID": "notes/potential-cases.html#general-notes", + "href": "notes/potential-cases.html#general-notes", + "title": "Potential cases", + "section": "", + "text": "Isabel Fortier came up with a shortlist based on consultations to help determine which Maelstrom partner projects may serve as potential cases. We then met on 2025-02-04, when, among other topics, we discussed the shortlist.\nSee the case selection protocol for further details on the parameters that guide how cases are to be determined." }, { - "objectID": "notes/methodology-notes.html#qda", - "href": "notes/methodology-notes.html#qda", - "title": "Methodology notes", - "section": "QDA", - "text": "QDA\nMy QDA processes are most influenced by Kathy Charmaz and Johnny Saldaña, as well as the practical experiences instilled during my PhD and while working on E-CURATORS.\n\nSensitizing concepts\nFrom Kelle (2005):\n\nHerbert BLUMER invented the term “sensitizing concepts” to describe theoretical terms which “lack precise reference and have no bench marks which allow a clean cut identification of a specific instance” (1954, p.7). Sensitizing concepts are useful tools for descriptions but not for predictions, since their lack of empirical content permits researchers to apply them to a wide array of phenomena. Regardless how empirically contentless and vague they are, they may serve as heuristic tools for the construction of empirically grounded theories.\n\nSee Bowen (2006)\n\n\nCoding\nThese notes are largely derived from my reading of Saldaña (2016), provides a practical overview of what coding entails and specific methods and techniques.\nCoding as component of knowledge construction:\n\nCoding is an intermediate step, “the”critical link” between data collection and their explanation or meaning” (Charmaz (2001), as quoted in Saldaña (2016): 4)\n“coding is usually a mixture of data [summation] and data complication … breaking the data apart in analytically relevant ways in order to ead toward further questions about the data” (Coffey and Atkinson (1996): 29-31, as quoted and edited by Saldaña (2016): 9)\n\nThis relates to the paired notions of decodng when we reflect on a passage to decipher its core meaning, and encoding when we determine its appropriate code and label it (Saldaña 2016: 5).\n\nCoding “generates the bones of your analysis. … [I]ntegration will assemble those bones into a working skeleton” (Charmaz (2014): 113, quoted in Saldaña (2016): 9)\nTo codify is to arrange things in a systematic order, to make something part of a system or classification, to categorize\n\nWhat I sometimes refer to as arranging the code tree\nWhat Saldaña (2016) refers to as categories, I tend to refer to as stubs\n\nCategories are arranged into themes or concepts, which in turn lead to assertions or theories\n\nPre-coding techniques: - Data layout - Separation between lines or paragraphs may hold significant meaning - Putting interviewer words in square brackets or capital letters - Semantic markup - Bold, italics, underline, highlight - Meant to identify “codable moments” worthy of attention (Boyatzis (1998), as referenced in Saldaña (2016): 20) - Relates to Saldaña (2016): 22’s prompt: “what strikes you?” - Preliminary jottings - Tri-column exercise with the text on the left, first impression or preliminary code in the middle, and code on the right, after Liamputtong and Ezzy (2005): 270-273.\nAsking questions back to the interviewer, or participating in an imagined dialogue. I imagine this might be useful in situations where the time to hold an interview is quite limited and I have to work with limited responses that don’t touch on everything I want to cover. The form of questions maintains my tentativity, my unwillingness to commit or assume their responses, and opens the door for their own responses in rebuttal.\nMagnitude coding can be applied to tag positive/negative attitudes, but also other gradients like hard/soft, technical/social skills. May be useful to apply symbols using my little 12-button keypad.\nCan also use colons to identify a magnitude associated with a code’s usage, as per 88-89.\nNote: create a qc issue for sub-documents, for identifying sections of a document that are especially relevant and hiding less relevant sections. I don’t necessarily want to delete these lines, but I may want to hide them from view. Maybe this is possible using vscode, outside of qc (see https://stackoverflow.com/a/72954133).\nIn lieu of initial/open coding, I think I will opt to devise sensitizing concepts, which may amalgamate as memos. I could use the prefix “SC:” to denote sensitizing concepts.\nWhat saldana refers to as “concept coding” is what I have previously referred to as “theoretical coding” to a certain extent. It’s a form of lumping, identifying specific instances under the label of cohesive concepts.\nFrom Clarke (2003: 558) on process coding:\n\nIn a traditional grounded theory study, the key or basic social process is typically articulated in gerund form connoting ongoing action at an abstract level. Around this basic process are then constellated the particular and distinctive conditions, strategies, actions, and practices engaged in by human and nonhuman actors in volved with/in the process and their consequences. For example, subprocesses of disciplining the scientific study of reproduction include formalizing a scientific disci pline, gleaning fiscal support for research, producing contraceptives and other techno scientific products, and handling any social controversies the science provokes (such as cloning and stem cell research).\n\n\n\nMemos\nSaldana chapter 2 on “analytic memos”\n\n\nPreliminary analyses\nYin (2014: 135-136 5) identifies various strategies for analyzing case study evidence.\n\nA helpful starting point is to “play” with your data. You are searching for patterns, insights, or concepts that seem promising. (Yin 2014: 135)\n\nCiting Miles and Huberman (1994), Yin (2014) lists a few strategies at this playful stage:\n\nJuxtaposing data from different interviews\nPutting information into different arrays\nMaking a matrix of categories and placing the evidence within them\nTabulating the frequency of different events\nPutting information in chronological order or using some other temporal scheme\n\nYin (2014: 135) also emphasizes memo-writing as a core strategy at this stage, citing Corbin and Strauss (2014). These memos should include hints, clues and suggestions that simply put into writing any preliminary interpretation, essentially conceptualizing your data. He uses the specific example of shower thoughts.\n\n\nAnalytical strategies and techniques\nYin (2014: 136-142) then goes on to describe four general strategies:\n\nRelying on theoretical propositions\nWorking your data from the “ground up”\nDeveloping a case description\nExamining plausible rival explanations\n\nYin (2014: 142-168) then goes on to describe five analytical techniques:5\n5 I wonder: would Abbott (2004) call these heuristics?\nPattern matching\nExplanation building\nTime-series analysis\nLogic models\nCross-case synthesis\n\nRyan and Bernard (2000) describe various analysis techniques for analyzing textual elicitations in structured and codified ways.\n\n\nThe constant comparative method\nThe constant comparative method is based on action codes, similar to what Saldaña (2016) refers to as process codes. According to Charmaz (2000: 515): > The constant comparative method of grounded theory means (a) comparing different people (such as their views, situations, actions, accounts, and experiences), (b) comparing data from the same individuals with themselves at different points in time, (c) comparing incident with incident, (d) comparing data with categori, and (e) comparing categories with other categories.\nMy initial impression is that this is very well suited for Stake’s (2006) multicase study framework, specifically with regards to his notion of the case-quintain dilemma. It also seems very well suited for analysis of situational meaning-making, as per Suchman (1987), Lave and Wenger (1991), Knorr Cetina (2001) and symbolic interactionism at large.\n\n\nSituational analysis\nSituational analysis originates from Strauss’s social worlds/arenas/negotiations framework. From Clarke (2003: 554):\n\nBuilding on and extending Strauss’s work, situational analyses offer three main cartographic approaches:\n\nsituational maps that lay out the major human, nonhuman, discursive, and other elements in the research situation of concern and provoke analyses of relations among them;\nsocial worlds/arenas maps that lay out the collective actors, key nonhuman elements, and the arena(s) of commitment within which they are engaged in ongoing negotiations, or mesolevel interpretations of the situation; and\npositional maps that lay out the major positions taken, and not taken, in the data vis-à-vis particular discursive axes of variation and difference, con cern, and controversy surrounding complicated issues in the situation.\n\n\nRefer to highlighted sections in Clarke (2003), bring those over at some point.\nClarke (2003) refers to Shim (2000) as an exemplary case of situational analysis in action.\n\n\nStatistical methods\ncrosstab\n\n\nOn software\nWeitzman (2000) provides an overview of software and qualitative research, including a minihistory up to the year 2000 when the chapter was published.\nDescribing the first programs specifically designed for analysis of qualitative data, Weitzman (2000: 804) writes:\n\nEarly programs like QUALOG and the first versions of NUDIST reflected the state of computing at that time. Researchers typically accomplished the coding of texts (tagging chunks of texts with labels — codes — that indicate the conceptual categories the researcher wants to sort them into) by typing in line numbers and code names at a command prompt, and there was little or no facility for memoing or other annotation or markup of text.6 In comparison with marking up text with coloured pencils, this felt awkward to many researchers. And computer support for the analysis of video or audio data was at best a fantasy.\n6 This caught my eye since its the same approach as that adopted by qc!\nThis history if followed by a sober account of what software can and can not do in qualitative research, as well as affirmation and dismissed of hopes and fears. Very reminiscient of Huggett (2018)." + "objectID": "notes/potential-cases.html#possible-candidates", + "href": "notes/potential-cases.html#possible-candidates", + "title": "Potential cases", + "section": "Possible candidates", + "text": "Possible candidates\n\nCITF\n\n\nReACH\nTitle: Stress and Anxiety During Pregnancy and Length of Gestation Harmonization Initiative + ReACH\nContact: Julie Bergeron \nReason: A small project and a very complex infrastructure-oriented network coordinated by Julie Bergeron. Both projects are finalized.\nNotes: Julie Bergeron was a PhD student at Maelstrom, and Isabel says that she is probably the most knowledgeable person regarding data harmonization who I will encounter during my research. She worked on her dissertation project (Stress and Anxiety During Pregnancy and Length of Gestation Harmonization Initiative) while also leading work on ReACH, and her dissertation essentially served as one of a few pilot projects under the aegis of ReACH. ReACH was led by Isabel as its PI.\nBoth projects are complete, but Isabel thinks that Julie Bergeron will be able to share some significant insights on this past work. My instinct is that this presents an opportunity to explore how/whether harmonization is suited for doctoral training, the role of pilot projects within broader initiatives, and impact that closeness to the method of data harmonization might have.\nLinks:\n\nhttps://www.maelstrom-research.org/network/reach\n\n\n\nCAPACIty\nTtle: Capacity: Building CAPACIty for pediatric diabetes research and quality improvement across Canada\nAcronym: CAnadian PediAtric diabetes ConsortIum (CAPACIty)\nContact: Shazhan Amed \nReason: Across Canada, focus on clinical data. A lot of work already achieved, and harmonization will start soon. Will use a federated approach for statistical analysis.\nNotes: A network of 15 childhood diabetes centers from across Canada. Went through four years of administrative work, and is now just starting harmonization after finally going through all theose hurldes, despite being very eager to get into the data work early on. Despite these challenges, Isabel thinks they will be very receptive to participating in the study.\n\n\nSHAIRE\nTitle: SHAIRE: Scientific & Health Data Assets In Respiratory Research\nContact: Sanja Stanojevic \nReason: New project just starting, very interesting and dynamic.\nNotes: Extremely new, just got funding very recently. I don’t know that much, to be honest. Could potentially provide redundant value to my study as Capacity, but need to find out more.\n\n\nRespiratory study\nTitle: High-dose docosahexaenoic acid for bronchopulmonary dysplasia severity in very preterm infants: A collaborative individual participant data meta-analysis\nContact: Isabelle Marc \nReason: Very specific and small project, very clinical.\nNotes: A very small project, harmonizing two datasets. I asked if this scale of work is common and Isabel says that it is, so it’s not an outlier.\nLinks:\n\nhttps://maelstrom-research.org/study/n3pi-hi\n\n\n\nMORGAM\nTitle: MOnica Risk, Genetics, Archiving and Monograph\nContact: Kari Kuulasmaa \nReason: European, started several years ago.\nNotes: Older project, ended around 10 years ago, the PI is retired. Might be valuable for looking at broader impact and potential offshoots after the work has been completed.\nLinks:\n\nhttps://www.maelstrom-research.org/network/morgam\n\n\n\nLifecycle\nVery improvized approach to data harmonization, did a lot of “manual” work. According to Isabel, Julie Bergeron will be able to tell me more.\nLinks:\n\nhttps://pmc.ncbi.nlm.nih.gov/articles/PMC7387322" }, { - "objectID": "notes/methodology-notes.html#writing", - "href": "notes/methodology-notes.html#writing", - "title": "Methodology notes", - "section": "Writing", - "text": "Writing\nSee Richardson (2000), who frames writing as a method of inquiry.\nSee Mitchell and Charmaz (1996)\nSee Charmaz (2000: 526-528)" + "objectID": "notes/potential-cases.html#recommended-but-no-longer-considered", + "href": "notes/potential-cases.html#recommended-but-no-longer-considered", + "title": "Potential cases", + "section": "Recommended but no longer considered", + "text": "Recommended but no longer considered\n\nCanPath\nTitle: CanPath, formerly called the Canadian Partnership for Tomorrow’s Health\nContact: Noah Frank \nReason: One of the most important harmonization initiative, infrastructure oriented, long-term started more than 10 years ago.\nNotes: Other contacts include John Mclaughlin and Paula Robson.\nMy instinct is to look at how things change over the course of a very long and institutional initiative, especially across discrete phases marked by leadership transitions. But the history here is so vast and I will probably not get much of it through a series of interviews.\n\n\nMindmap\nPromoting mental well-being and healthy ageing in cities. Seems very similar to CanPath in terms of scope and governance, and I would likely face similar challenges." }, { - "objectID": "notes.html", - "href": "notes.html", - "title": "Notes", + "objectID": "notes/maelstrom-readings.html", + "href": "notes/maelstrom-readings.html", + "title": "Maelstrom reading notes", "section": "", - "text": "Modified\n\n\nTitle\n\n\nCategories\n\n\n\n\n\n\nFeb 4, 2025\n\n\nPotential cases\n\n\ncases, brainstorming\n\n\n\n\nFeb 12, 2025\n\n\nMethodology notes\n\n\nreading, general thoughts\n\n\n\n\nJan 28, 2025\n\n\nMaelstrom reading notes\n\n\nreading\n\n\n\n\n\nNo matching items", - "crumbs": [ - "Notes" - ] + "text": "Initial overview of data harmonization procedures, using the Healthy Obesity Project (HOP) as an illustrative case.\nOutlines the technical apparatus, especially for DataShield, but also broadly describes the discursive process of arriving at a DataSchema that is both functional and flexible.\n\nThis description is quite broad and abstracy, seems somewhat ideal and aspirational.\n\nDescribes reliance on international standards, such as the International Labour Organization’s International Standard Classification of Occupations.\n\nIt seems like these are used as black boxes that encapsulate a series of tensions which epidemiologists are unconcerned with; in effect, they simplify the need for stretching the collaborative ties even further than they are already extended, they represent matters out of scope for deeper discursive engagement.\n\nIt is notable that they emphasize that it’s easy to set up and use DataShield and Maelstorm toolkits independently of university IT and that it can be run using RStudio installed on a basic laptop.\n\nMaybe look into the historical context (2013) and the evolving role of university IT in software selection.\n\nThe conclusion states that the HOP project was successful in its harmonization efforts, but does not go as far as to state that it produced meaningful findings as a result of harmonization.\n\nI may take some time to find and read studies that used these data to see what’s what.\nThis seems like the main one: https://doi.org/10.1186/1472-6823-14-9, but these other papers may or not not also be relevant:\n\nhttps://doi.org/10.1016/j.smhl.2021.100263\nhttps://doi.org/10.1007/s10654-014-9977-1\nhttps://doi.org/10.1530/EJE-14-0540\nhttps://doi.org/10.1007/S13679-020-00375-0\nhttps://doi.org/10.1093/eurpub/ckac061" }, { - "objectID": "index.html", - "href": "index.html", - "title": "CITF-Postdoc", + "objectID": "notes/maelstrom-readings.html#doiron2013", + "href": "notes/maelstrom-readings.html#doiron2013", + "title": "Maelstrom reading notes", "section": "", - "text": "This website serves as a hub for my postdoctoral research at McGill University’s Covid-19 Immunity Task Force Databank.\nThe project is concerned with articulating social, collaborative and discursive aspects of epidemiological data-sharing initiatives, and how they relate to, overlap with or conflict with technical, institutional and epistemic factors.\nThis website hosts a series of preparatory protocols that structure the project, as well as notes about key concepts and reflections on the progress of work. Please keep in mind that this is a continually evolving site and its contents may change as the project goes on. All content is hosted and tracked at github.com/zackbatist/CITF-Postdoc.\nHere’s an overview of what’s on this site:\nContext: My motivations for doing this work and the circumstances that surround the establishment of the project.\nResearch Protocol: Outlines the project’s overall vision and contextualizes it in relation to specific objectives.\nCase Selection: Articulates the parameters that inform how cases are selected.\nEthics Protocol: Specifies ethical considerations, including risks of harm and strategies for mitigating them.\nInterview Protocol: The questions I will be asking research participants, including the rationale for asking them.\nData Management: Procedures that circumscribe collection, management and curation of research data.\nQDA Protocol: The code system, memoing guidelines, and specific QDA procedures.\nGlossary: A series of key terms and their definitions, with reference to the literature and expanded notes about their meanings.\nNotes: Some semi-structured ideas that situate my work in relation to extant literature.\nBlog: Updates and reflections on key events, or general thoughts I wish to share.\nGitHub: A link to the GitHub repository where this website’s files are hosted.\nBib: A biblatex file containing a continually-updated list of sources cited in all documents hosted on this website.\nRSS: RSS feed you can use to subscribe to the blog.", - "crumbs": [ - "Home" - ] + "text": "Initial overview of data harmonization procedures, using the Healthy Obesity Project (HOP) as an illustrative case.\nOutlines the technical apparatus, especially for DataShield, but also broadly describes the discursive process of arriving at a DataSchema that is both functional and flexible.\n\nThis description is quite broad and abstracy, seems somewhat ideal and aspirational.\n\nDescribes reliance on international standards, such as the International Labour Organization’s International Standard Classification of Occupations.\n\nIt seems like these are used as black boxes that encapsulate a series of tensions which epidemiologists are unconcerned with; in effect, they simplify the need for stretching the collaborative ties even further than they are already extended, they represent matters out of scope for deeper discursive engagement.\n\nIt is notable that they emphasize that it’s easy to set up and use DataShield and Maelstorm toolkits independently of university IT and that it can be run using RStudio installed on a basic laptop.\n\nMaybe look into the historical context (2013) and the evolving role of university IT in software selection.\n\nThe conclusion states that the HOP project was successful in its harmonization efforts, but does not go as far as to state that it produced meaningful findings as a result of harmonization.\n\nI may take some time to find and read studies that used these data to see what’s what.\nThis seems like the main one: https://doi.org/10.1186/1472-6823-14-9, but these other papers may or not not also be relevant:\n\nhttps://doi.org/10.1016/j.smhl.2021.100263\nhttps://doi.org/10.1007/s10654-014-9977-1\nhttps://doi.org/10.1530/EJE-14-0540\nhttps://doi.org/10.1007/S13679-020-00375-0\nhttps://doi.org/10.1093/eurpub/ckac061" }, { - "objectID": "ethics-protocol.html", - "href": "ethics-protocol.html", - "title": "Ethics Protocol", - "section": "", - "text": "Project Title: Articulating epidemiological data harmonization initiatives as practical and collaborative experiences\nSubmitted Materials: zackbatist.info/CITF-Postdoc/irb-docs.pdf\nPrincipal Investigator: Zachary Batist\nProtocol Number: 25-01-057\nSubmitted: 2025-01-30\nApproved:", - "crumbs": [ - "Ethics Protocol" - ] + "objectID": "notes/maelstrom-readings.html#doiron2017", + "href": "notes/maelstrom-readings.html#doiron2017", + "title": "Maelstrom reading notes", + "section": "Doiron et al. (2017)", + "text": "Doiron et al. (2017)\n\nAn overview of the key software that facilitates data harmonization practices under Maelstrom, also briefly touched upon in Doiron et al. (2013).\nPage 1373 refers to graphical and programmatic interfaces and assumes certain roles and tasks associated with each.\nBriefly describes its use by the Canadian Longitudinal Study on Aging (CLSA), the Canadian Partnership for Tomorrow Project (CPTP) and InterConnect, primarily by describing the range and quantity of data that these systems manage in each case.\n\n\nOpal provides a centralized web-based data management system allowing study coordinators and data managers to securely import/export a variety of data types (e.g. text, nu merical, geolocation, images, videos) and formats (e.g. SPSS, CSV) using a point-and-click interface. Opal then converts, stores and displays these data under a standar dized model.\n\n\nMica is used to create websites and metadata portals for individual epidemiological studies or multi-study consor tia, with a specific focus on supporting observational co hort studies. The Mica application helps data custodians and study or network coordinators to efficiently organize and disseminate information about their studies and net works without significant technical effort." }, { - "objectID": "ethics-protocol.html#recruitment-and-consent", - "href": "ethics-protocol.html#recruitment-and-consent", - "title": "Ethics Protocol", - "section": "Recruitment and consent", - "text": "Recruitment and consent\nWill this study involve recruitment of human study participants?\n\nYes\nNo\n\nHow are potential study participants identified and/or recruited to the study? Explain how potential participants are identified or introduced to the study, and who will recruit participants. Will the investigator/s require any special permissions or access to the target population e.g. clinic access, patient registries or records, mailing lists, community access?\nThrough consultation with key community stakeholders, the principal investigator will devise a list of prospective projects to serve as cases.1 The principal investigator will then write to the leaders of these projects inviting them to participate in the study. These invitations to project leaders will explain the project’s purpose and scope, and will encourage the recipient to reply with any questions or concerns they may have. If they accept the invitation, the principal investigator will then work with project leaders to devise a list of individuals who may serve as interview candidates based on their roles in the project. The principal investigator will be clear with project leaders that they should not pressure those who work for them to participate in the study, and that individuals’ participation should be treated as separate from their regular duties; if project leaders cannot or will not abide by this condition, their project will be rejected as a prospective case. The principal investigator will then write to the recommended individuals to introduce the study and its objectives and to invite them to participate as research subjects. If these individuals express interest in participating in the study, the principal investigator will schedule a time to sit for an interview. Some interviews may be conducted remotely using internet-based video conferencing software, depending on participants’ availability.\n1 See the case selection protocol for further details.Describe the consent process. If alternate processes for seeking consent are planned (e.g. verbal, online, waiver), please provide a rationale and outline the procedure of obtaining and documenting consent and/or assent, where applicable.\nOnce individuals express their interest in participating, participants will provided with an informed consent document that outlines in more detail the goals of the study, the roles of the participant, how they will be recorded, how data pertaining to them will be retained, and the potential risks and benefits pertaining to their involvement. This document will also describe how participants’ personally identifiable information will be managed and used. Participants will be asked to read and sign the document in order to obtain written informed consent. For interviews that will be held remotely using internet-based video conferencing software, participants will asked to send their signed informed consent documents in PDF format to the principal investigator. At the start of each interview the researcher will reiterate participants’ rights and ask them to orally reaffirm their consent before proceeding.\nIs there a relationship between the study participants and the person obtaining consent and/or the principal investigator/s?\n\nYes\nNo\n\nIf yes, please explain the nature of the relationship, and outline the steps that will be taken to avoid the perception of undue influence.\nOne project that serves as a case in this research is the Covid-19 Immunity Task Force (CITF), which the principal investigator currently serve as postdoctoral researcher. Some of the participants will therefore be his colleagues. The interviews will remain structured and limited in scope, and will not touch on matters relating to other aspects of their work. Moreover, prior to and throughout their involvement as research participants, frank and open discussion will be encouraged regarding collective expectations and to articulate the boundaries between participants’ relationships with the principal investigator as colleagues and as research subjects.\nThe principal investigator will consult with David Buckeridge, who leads the CITF, as one key community stakeholder to help devise a shortlist of projects that may serve as prospective cases.", - "crumbs": [ - "Ethics Protocol" - ] + "objectID": "notes/maelstrom-readings.html#fortier2010", + "href": "notes/maelstrom-readings.html#fortier2010", + "title": "Maelstrom reading notes", + "section": "Fortier et al. (2010)", + "text": "Fortier et al. (2010)\n\nA very grandiose paper presenting the grand vision for DataSHaPER, which would eventually become Maelstrom.\n\nLots of co-authors!\n\nInvokes the pan-European EPIC project (European Prospective Investigation into Cancer and Nutrition), which faced numerous data synthesis challenges despite its proactive effort to coordinate work across numerous research centres.\n\n\nTwo complementary approaches may be adopted to support effective data synthesis. The first one principally targets ‘what’ is to be synthesized, whereas the other one focuses on ‘how’ to collect the required information. Thus: (i) core sets of information may be identified to serve as the foundation for a flexible approach to harmonization; or (ii) standard collection devices (questionnaires and stand ard operating procedures) may be suggested as a required basis for collection of information.\n\n\nDataSHaPER is an acronym for DataSchema and Harmonization Platform for Epidemiological Research.\n\n\nIn an ideal world, information would be ‘prospectively harmonized’: emerging studies would make use, where possible, of harmonized questionnaires and standard operating procedures. This enhances the potential for future pooling but entails significant challenges —- ahead of time -— in developing and agree ing to common assessment protocols. However, at the same time, it is important to increase the utility of existing studies by ‘retrospectively harmonizing’ data that have already been collected, to optimize the subset of information that may legitimately be pooled. Here, the quantity and quality of infor mation that can be pooled is limited by the heterogeneity intrinsic to the pre-existing differences in study design and conduct.\n\nCompares prospective and retrospective harmonizatiom, with the former being presented as ideal, and the latter being a pragmatic reconciliation in acknowledgement that the former is essentially impossible to achieve.\n\nDataSHaPER is strikingly similar to OCHRE:\n\nXML-based data structures\nGenesis of a generic and ultimately optional base-level schema that illustrates the kind of data that the data structure may hold in ways that are immediately recognizable to all practitioners (at OCHRE it was associations between contexts and finds)\nSeparate harmonization platform where users can edit and manipulate records and associations between them\n\n\n\nThe question ‘What would constitute the ultimate proof of success or failure of the DataSHaPER approach’ needs to be addressed. Such proof will necessarily accumulate over time, and will involve two fundamental elements: (i) ratification of the basic DataSHaPER approach; and (ii) confirmation of the quality of each individual DataSHaPER as they are developed and/or extended. An important indication of the former would be provided by the widespread use of our tools. However, the ultimate proof of principle will necessarily be based on the generation of replicable scientific findings by researchers using the approach. But, for such evidence to accumulate it will be essential to assure the quality of each individual DataSHaPER. Even if the fundamental approach is sound, its success will depend critically on how individual DataSHaPERs are constructed and used. It seems likely that if consistency and quality are to be assured in the global development of the approach, it will be necessary for new DataSHaPERs to be formally endorsed by a central advisory team." }, { - "objectID": "ethics-protocol.html#risk-benefit-assessment", - "href": "ethics-protocol.html#risk-benefit-assessment", - "title": "Ethics Protocol", - "section": "Risk-benefit assessment", - "text": "Risk-benefit assessment\nDescribe the foreseeable risks to study participants. What risks are attributable to the research, including cumulative risks? Which risks are participants normally exposed to in the course of their clinical care or in their daily activities as they relate to the research questions/objectives?\nParticipation in this study does not involve any physical, psychological or legal risks. However, the principal investigator will be asking participants to share detailed information about their work practices and work relationships, and public association with their responses may potentially disrupt or complicate their professional reputations. To mitigate against this potential harm, the principal investigator will give participants the option to render their responses confidential.\nWhat procedures are in place to monitor and assess participant safety for the duration of the study?\nPrior to each interview, and as part of the procedure for obtaining informed consent, participants will be asked about whether they want to render their responses confidential. Immediately after each interview, participants will be given an additional opportunity to reflect on their responses, and will be prompted to either confirm or alter their decision regarding whether or not to maintain confidentiality. Furthermore, for participants who have not requested that their responses be treated as confidential immediately before and after the interview, a follow-up email will be sent one week after the interview to reiterate the option to render their responses confidential.\nDescribe the potential benefits of the study for: (1) the study participants; (2) the population under investigation, and (3) the field of research.\nThis study contributes to the development of better epidemiological data-sharing infrastructures by articulating social, collaborative and discursive aspects of data harmonization, and how these factors relate to, overlap with or conflict with technical, institutional and epistemic factors. By explicitly framing data harmonization as a social and collaborative activity, we may devise more effective data-sharing infrastructures that better support the contextualization of data and enhance their value in contexts of data reuse. This work therefore poses new ways to document how epidemiologists mobilize distributed records in the constitution of synthetic knowledge and helps develop practical solutions that enable greater reflexivity. Additionally, this study may directly benefit participants by framing the experiences they address during interviews in ways that they might not have otherwise considered, thereby encouraging greater reflexivity in their own work.", - "crumbs": [ - "Ethics Protocol" - ] + "objectID": "notes/maelstrom-readings.html#fortier2011", + "href": "notes/maelstrom-readings.html#fortier2011", + "title": "Maelstrom reading notes", + "section": "Fortier et al. (2011)", + "text": "Fortier et al. (2011)\nThis paper responds to Hamilton et al. (2011), which presents an effort to devise a standardized nomenclature. The response is basically to advocate for a more flexible approach, rather than a stringent one promoted by Hamilton et al. (2011). It draws extensively from concepts published in the foundational paper by Fortier et al. (2010).\n\nTwo complementary approaches to harmonization may be adopted to support effective data synthesis or comparison across studies. The first approach makes use of identical data collection tools and procedures as a basis for harmoni zation and synthesis. Here we refer to this as the ‘‘stringent’’ approach to harmonization. The second approach is con sidered ‘‘flexible’’ harmonization. Critically, the second ap proach does not demand the use of identical data collection tools and procedures for harmonization and synthesis. Rather, it has to be based on sound methodology to ensure inferential equivalence of the information to be harmonized. Here, standardization is considered equivalent to stringent harmonization. It should, however, be noted that the term standard is occasionally employed to refer to common con cepts or comparable classification schemes but does not necessarily involve the use of identical data collection tools and procedures (12, 13).\n\nThis directly parallels the distinction made in Fortier et al. (2010) between “ideal” prospective and more pragmatic retrospective approaches to data harmonization.\n\nSynthesis of data using a flexible harmonization approach may be either prospective or retrospective. To achieve flexible prospective harmonization, investigators from several studies will agree on a core set of variables (or measures), compatible sets of data collection tools, and standard operating procedures but will allow a certain level of flexibilit in the specific tools and procedures used in each study (16, 17). Retrospective harmonization targets synthesis of information already collected by existing legacy studies (15, 18, 19). As an illustrative example, using retrospective harmonization, researchers will define a core set of variables (e.g., body mass index, global level of physical activity) and, making use of formal pairing rules, assess the potential for each participating study to create each variable (15). The ability to retrospectively harmonize data from existing studies facilitates the rapid generation of new scientifi knowledge.\n\nI wonder why there is no example provided for prospective data harmonization. Is it because it is ideal and not realistic? I’d argue that it is simply what occurs within individual projects." }, { - "objectID": "ethics-protocol.html#privacy-and-confidentiality", - "href": "ethics-protocol.html#privacy-and-confidentiality", - "title": "Ethics Protocol", - "section": "Privacy and confidentiality", - "text": "Privacy and confidentiality\nPlease describe the measures in place for meeting confidentiality obligations. How is information and data safeguarded for the full cycle of the study: i.e. during its collection, use, dissemination, retention, and/or disposal?\nThe specific circumstances that frame each case are significant factors that will shape the findings, and the study will benefit from participants’ consent to associate their identities with their interview responses. However, they may choose to render their interview responses confidential while maintaining their role a research participant. Participants may change their decision regarding whether or not to associate their identities with their interview responses up to one week after the interview, at which point the principal investigator will begin transcribing and analyzing the records pertaining to the interview. Participants will be reminded about this option immediately after the interview and one week following the interview via email.\nThe study engages with a relatively small community, and there is minimal social risk that others may be able to determine the identities of those whose research practices and professional relationships are being documented, even if their responses are rendered confidential. To address this issue, if any single participant from a case decides to render their responses confidential, the responses of all participants pertaining to that case will be rendered confidential as well, and the identify of the project that serves as the case will be obfuscated too.\nIn situations whereby a participant decides to render their responses confidential, or has their responses rendered confidential due to another member of their case deciding to do so, only the principal investigator will have access to records containing un-obfuscated information that may identify them. These un-obfuscated records, which may include audio and video records of interview sessions, as well as unedited transcripts and textual notes containing information that may reveal the participants’ identities, will be kept in secure and encrypted media, and destroyed within five years of concluding the study, which provides sufficient time to revisit the data and produce additional research outputs. However, edited transcripts scrubbed of all information that may identify research participants may be kept, published and archived. If participants consent to maintaining association between their responses and their identities, un-obfuscated records and transcripts may be kept, published and archived.\nThe study is committed to adhering to fundamental data security practices, including those specified in McGill University’s Cloud Directive which regulates the curation of sensitive research data. Physical records will be kept in a locked drawer in secure workspaces, either at McGill University’s School of Public and Global Health or at the principal researcher’s home office. Digital records will be stored on encrypted and password-protected drives and on secure servers approved or managed by McGill University under the Cloud Directive.2\n2 Refer to the data management plan for further details on how information pertaining to this project will be collected, curated and shared.Recordings of remote interviews conducted using internet-based video conferencing software will be made using the software’s built-in recording tools. Only video conferencing software approved by the Cloud Directive will be used. Participants will be instructed to disable their microphones or video cameras prior to initiating recording if they have opted to not be recorded through these media. The researcher will record all media locally and refrain from using any cloud services to store or modify the records which the video conference software may provide.\nIf a contracted cloud/storage service provider or online survey tool is used, provide information on the service provider’s security and privacy policy, location of its servers, data ownership, and what happens to the stored data after the contract is terminated. For more information, please consult the University’s directive.\nThe study uses file-sharing software hosted by the Covid-19 Immunity Task Force at McGill University’s School of Public and Global Health to backup all files maintained for this study. These backups will include files containing information that might reveal participants’ identities. The software used to manage these backups is managed by McGill University and has been approved for storing sensitive research data by the Cloud Directive.\nThe study may use the secure GitLab instance hosted by the surveillance lab within the Clinical and Health Informatics Research Group at McGill University to store and track changes to sensitive research data. This software is managed by McGill University and has been approved for storing sensitive research data by the Cloud Directive.\nThe study maintains a website where the principal investigator shares documentation that supports the study and reflects on the work as it progresses. This is hosted using GitHub Pages and is backed up using Dropbox. No sensitive research data will pass through these services.\nRecordings of remote interviews conducted using internet-based video conferencing software will be made using the software’s built-in recording tools. Only video confering software approved by the Cloud Directive will be used. Participants will be instructed to disable their microphones or video cameras prior to initiating recording if they have opted to not be recorded through these media. The researcher will record all media locally and refrain from using any cloud services to store or modify the records which the video conference software may provide.\nPlease explain any reasonable and foreseeable disclosure requirements (e.g. disclosure to third parties such as government agencies or departments, community partners in research, personnel from an agency that monitors research, research sponsor, the REB/IRB, or regulatory agencies).\nNo disclosure requirements are foreseen.\nIf there are plans for retaining participant and/or study data for future use, please describe the context for its use, requirements for potentially re-contacting study participants and consent, and how the data will be stored and maintained for the long term.\nResearch data will be published in compliance with ethical standards for sharing open social science research data. Records that contain personally-identifying information pertaining to participants who have requested that their responses be rendered confidential and to those who have had their responses rendered confidential due to another member of their case deciding to do so will not be published.\nThe database containing codings, memos and trends deriving from qualitative data analysis will be published only after being scrubbed of all personally-identifying information pertaining to participants who have requested that their responses be rendered confidential and to those who have had their responses rendered confidential due to another member of their case deciding to do so.\nThe principal investigator may follow up with the leaders of the data-sharing initiatives that serve as cases for this project to share the results with them and to present them with constructive feedback deriving from the study’s findings. The principal investigator may also invite select participants to collaborate on a position paper advocating for reforms based on the project’s findings.\nSecondary use of data studies: if the study involves data linkage, please describe the data that will be linked and the likelihood that identifiable information will be created through the linkage.\nThis project does not rely on data deriving from other studies. The data may be reused in related work being undertaken under the same grant and by those who access the openly accessible data after they are published.", - "crumbs": [ - "Ethics Protocol" - ] + "objectID": "notes/maelstrom-readings.html#fortier2017", + "href": "notes/maelstrom-readings.html#fortier2017", + "title": "Maelstrom reading notes", + "section": "Fortier et al. (2017)", + "text": "Fortier et al. (2017)\nExplicit statement regarding the rationale and presumed benefits of harmonization right in the first paragraph:\n\nThe rationales underpinning such an approach include ensuring: sufficient statistical power; more refined subgroup analysis; increased exposure hetero geneity; enhanced generalizability and a capacity to under take comparison, cross validation or replication across datasets. Integrative agendas also help maximizing the use of available data resources and increase cost-efficiency of research programmes.\n\nSummarized in bullet points:\n\nensuring sufficient statistical power\nmore refined subgroup analysis\nincreased exposure heterogeneity\nenhanced generalizability\na capacity to undertake comparison, cross validation or replication across datasets.\nmaximizing the use of available data resources\nincrease cost-efficiency of research programmes\n\nClearly defines harmonization and its benefits:\n\nEssentially, data harmonization achieves or improves comparability (inferential equivalence) of similar measures collected by separate studies.\n\nAdds an additional argument for retrospective harmonization on top of prior discussion of retrospective/prospective approaches (cf. Fortier et al. (2010); Fortier et al. (2011)):\n\nRepeating identical protocols is not necessarily viewed as providing evidence as strong as that obtained by exploring the same topic but using different designs and measures.\n\nAlso relates retrospective harmonization from systematic meta reviews. In fact, the paper basically responds to calls for more structured guidelines for data harmonization, similar to those that had been produced to support structured metareviews in the years prior to this publication. The authors identify several papers that have done similar guidelines or reports on harmonization practices, which they claim are too broad. Those papers include:\n\nRolland et al. (2015)\n\nhttps://doi.org/10.1093/aje/kwv133\n\nSchaap et al. (2011)\n\nhttps://doi.org/10.1186/1471-2474-12-272\n\nBennett et al. (2011)\n\nhttps://doi.org/10.1002/gepi.20564\n\nHohmann et al. (2012)\n\nThis paper reports the findings of a systematic inquiry made to data harmonization initiatives, whose data comprise responses to a questionnaire. The findings indicate that procedures were more attentively follows during earlier stages, such as when matching and aligning available data with the project’s designated scope. However, procedures were less sound with regards to documenting procedures, validating the results of data processing, and dissemination strategy. There is a notable division between work that occurs before and after people actually begin handling the data, which indicates a tension between aspirational plans and tangible experiences.\n\nRespondents were asked to delineate the specific procedures or steps undertaken to generate the harmonized data requested. Sound procedures were generally described; however, the terminologies, sequence and technical and methodological approaches to these procedures varied considerably. Most of the procedures mentioned were related to defining the research questions, identifying and selecting the participating studies (generally not through a systematic approach), identifying the targeted variables to be generated and processing data into the harmonized variables. These procedures were reported by at least 75% of the respondents. On the other hand, few reported steps related to validation of the harmonized data (N=4; 11.8%), documentation of the harmonization process (N=5; 14.7%) and dissemination of the harmonized data outputs (N=2; 5.9%).\n\nThe paper summarizes some specific “potential pitfalls” reported by respondents to their survey:\n\nensuring timely access to data;\nhandling dissimilar restrictions and procedures related to individual participant data access;\nmanaging diversity across the rules for authorship and recognition of input from study-specific investigators;\nmobilizing sufficient time and resources to conduct the harmonization project;\ngathering information and guidance on harmonization approaches, resources and techniques;\nobtaining comprehensive and coherent information on study-specific designs, standard operating procedures, data collection devices, data format and data content;\nunderstanding content and quality of study-specific data;\ndefining the realistic, but scientifically acceptable, level of heterogeneity (or content equivalence) to be obtained;\ngenerating effective study-specific and harmonized datasets, infrastructures and computing capacities;\nprocessing data under a harmonized format taking into account diversity of: study designs and content, study population, synchronicity of measures (events measured at different point in time or at different intervals when repeated) etc;\nensuring proper documentation of the process and decisions undertaken throughout harmonization to ensure transparency and reproducibility of the harmonized datasets;\nmaintaining long-term capacities supporting dissemination of the harmonized datasets to users.\n\nIt’s not made clear how these responses were distributed among respondents.\nThe authors then identify several absolute essential requirements needed to achieve success:\n\nCollaborative framework: a collaborative environment needs to be implemented to ensure the success of any harmonization project. Investigators involved should be open to sharing information and knowledge, and investing time and resources to ensure the successful implementation of a data-sharing infrastructure and achievement of the harmonization process.\nExpert input: adequate input and oversight by experts should be ensured. Expertise is often necessary in: the scientific domain of interest (to ensure harmonized variables permit addressing the scientific question with minimal bias); data harmonization methods (to support achievement of the harmonization procedures); and ethics and law (to address data access and integration issues).\nValid data input: study-specific data should only be harmonized and integrated if the original data items collected by each study are of acceptable quality.\nValid data output: transparency and rigour should be maintained throughout the harmonization process to ensure validity and reproducibility of the harmonization results and to guarantee quality of data output. The common variables generated necessarily need to be of acceptable quality.\nRigorous documentation: publication of results generated making use of harmonized data must provide the information required to estimate the quality of the process and presence of potential bias. This includes a description of the: criteria used to select studies; process achieved to select and define variables to be harmonized; procedures used to process data; and characteristics of the study-specific and harmonized dataset(s) (e.g. attribute of the populations).\nRespect for stakeholders: all study-specific as well as network-specific ethical and legal components need to be respected. This includes respect of the rights, intellectual property interests and integrity of study participants, investigators and stakeholders.\n\nThe authors describe how they arrived at guidelines following the results of this study:\n\nA consensus approach was used to assemble information about pitfalls faced during the harmonization process, establish guiding principles and develop the guidelines. The iterative process (informed by workshops and case studies) permitted to refine and formalize the guide lines. The only substantive structural change to the initial version proposed was the addition of specific steps relating to the validation, and dissemination and archiving of harmonized outputs. These steps were felt essential to em phasize the critical nature of these particular issues.\n\nThe paper outlines a checklist of stages that data harmonization initiatives need to go through to produce ideal outcomes. For each task, they describe a scenario in which the task can be said to be complete, whhich resembles an ideal outcome. This is described in the paper, summarized in a table, and more comprehensively documented in the supplementary materials.\nAlso worth noting, this paper includes a list of harmonization initiatives that I may consult when selecting cases. I’m not quite sure how useful it will be since the findings don’t really break down the distribution of responses in any detail, but maybe the authors have done this analysis and not published it." }, { - "objectID": "ethics-protocol.html#managing-conflicts-of-interest", - "href": "ethics-protocol.html#managing-conflicts-of-interest", - "title": "Ethics Protocol", - "section": "Managing conflicts of interest", - "text": "Managing conflicts of interest\nConflicts of interest do not imply wrong-doing. It is the responsibility of the investigator to determine if any conflicts apply to any person/s involved in the design and/or conduct of the research study or any member of their immediate family. Disclose all contracts and any conflicts of interest (real, perceived, or potential) relating to this research project. Conflict of interest may also arise with regard to the disclosure of personal health information.\n\nNot applicable. There are no conflicts of interest to disclose.\nYes, there are conflicts of interest to disclose.\n\nIf yes, please describe the conflicts of interest (real, potential, and perceived), and the procedures for managing declared conflicts. Not applicable.", - "crumbs": [ - "Ethics Protocol" - ] + "objectID": "notes/maelstrom-readings.html#bergeron2018", + "href": "notes/maelstrom-readings.html#bergeron2018", + "title": "Maelstrom reading notes", + "section": "Bergeron et al. (2018)", + "text": "Bergeron et al. (2018)\nThe authors reference the drive for efficiency as a motivating factor that drives open data:\n\nHowever, many cohort databases remain under-exploited. To address this issue and speed up discovery, it is essential to offer timely access to cohort data and samples.\n\nHowever the paper is actually about the need for better and more publicly accessible documentation about data.\nThe authors state that catalogues exist to promote discoverability of data and samples and to answer the data documentation needs of individual studies.\nThey draw attention to the importance of catalogues in research networks (analyzing data across studies), which establish portals that document “summary statistics on study subjects, such as the number of participants presenting specific characteristics (e.g. diseases or exposures)”.\nThe authors outline several challenges that inhibit or limit the potential value of catalogues:\n\nThe quality of a catalogue directly depends on the quality and comprehensiveness of the study-specific information documented. But, maintaining and providing access to understandable and comprehensive documentation to external users can be challenging for cohort investigators, and require resources not always available, particularly for the very small or long-established studies. In addition, the technical work required to build and maintain a catalogue is particularly demanding. For example, gathering comprehensive and comparable information on study designs necessitates the implementation of rigorous procedures and working in close collaboration with study investigators. Manual classification of variables is also a long and a tedious process prone to human error. Moreover, the information collected needs to be regularly revised to update metadata with new data collections. These challenges, among others, can lead to the creation of catalogues with partial or disparate information across studies, documenting limited subsets of variables (e.g. only information collected at baseline) or including only studies with data dictionaries available in a specific language or format.\n\nBullet point summary:\n\nA catalogue’s quality depends on the quality and comprehensiveness of documentation provided by individual studies\nCohort investigators, i.e. leaders of individual studies, are under-equipped to provide such comprehensive documentation\n\nDo they just need material support? Or also guidance on how to do it, what factors to account for, etc?\n\nTechnical work for building and maintaining a catalogue is demanding\n\nI’m not sure if they example they provide to illustrate these challenges aligns with what I would call “technical work”; they refer to precise and detailed documentation in direct consultation with individual study maintainers, and I suppose the discussion about and documentation of methodological details is technical in that it corresponds with the work that was actually done on the ground, using data collection and processing instruments\n\nClassification of variables is a long and tedious process\n\nWhat makes it long and tedious? This isn’t really specified\nThey recite that this is prone to human error, but I wonder what successful or error-ful (?) outcomes would look like and how they would differ\n\nThe information needs to be regularly revised and updated\n\nThe authors recommendations to resolve these concerns:\n\nHowever, to truly optimize usage of available data and leverage scientific discovery, implementation of high quality metadata catalogues is essential. It is thus important to establish rigorous standard operating procedures when developing a catalogue, obtain sufficient financial support to implement and maintain it over time, and where possible, ensure compatibility with other existing catalogues.\n\nBullet point summary:\n\nestablish rigorous standard operating procedures when developing a catalogue\nobtain sufficient financial support to implement and maintain it over time\nwhere possible, ensure compatibility with other existing catalogues" }, { - "objectID": "context.html", - "href": "context.html", - "title": "Context", - "section": "", - "text": "I am Zack Batist — a postdoctoral researcher at McGill University, in the School of Global and Public Health’s Department of Epidemiology, Biostatistics and Occupuational Health. I’m working with David Buckeridge, who leads the Covid-19 Immunity Task Force (CITF) Databank, to investigate data sharing in epidemiological research — with an emphasis on the practical and situated experiences involved in data sharing.\nThe CITF is a “data harmonization” initiative, which entails coordinating a systematic effort to align the information contained in datasets collected by distributed teams of epidemiologists. These efforts to integrate the records collected during various discrete studies are motivated by a desire to establish larger integrated datasets bearing greater statistical power and that facilitate comparison across cohorts. However, epidemiologists must reckon with the diversity of minor variations in data collection procedures, as well as ethico-legal concerns relating to the sharing of individual health records pertaining to human research subjects across numerous institutional and regional jurisdictions.\nAs a scholar of scientific practice, with a primary interest in data-sharing and the formation of information commons, data harmonization represents a fascinating mechanism through which scientists derive technical, administrative, social and epistemic frameworks to enhance the value of their collective endeavours in response to disciplinary needs, warrants, desires and expectations. This study therefore articulates the motivations for doing data harmonization, identifies how value is ascertained, and describes the strategies employed to achieve the desired goals — including perceived and actual challenges, setbacks, opportunities, realizations, and lessons learned.\nThis relates to my previous work that (a) explores tensions that arise when attempting to establish information commons in archaeology, specifically relating to inability to cope with a superficial perception of data’s stability and an intuitive understanding of their situated nature; and that (b) investigates how the open science movement attempts (and fails) to reshape practices relating to data sharing, integration and reuse. I continue in my approach that frames data-sharing — whether it occurs in relatively “closed” curcumstances between close colleagues, or as mediated by open data platforms among strangers — as comprising a series of collaborative commitments that govern who may contribute to and obtain value from the information commons, and in what ways.", - "crumbs": [ - "Context" - ] + "objectID": "notes/maelstrom-readings.html#bergeron2021", + "href": "notes/maelstrom-readings.html#bergeron2021", + "title": "Maelstrom reading notes", + "section": "Bergeron et al. (2021)", + "text": "Bergeron et al. (2021)\nIdentifies several registries of relevant cohorts, but notes that they face challenges getting the data together. Namely, issues concerning institutional policies concerning data-sharing, lack of open access to cohort data and to documentation about the data, the data’s complexity which makes it difficult to harmonize across studies, and lack of access to funding, secure data environments, and specialized expertise and resources.\nThe Research Advancement through Cohort Cataloguing and Harmonization (ReACH) initiative was establihed in collaboration with Maelstrom to overcome some of these barriers in the context of Developmental Origins of Health and Disease (DOHaD) research.\nThe authors briefly summarize some projects that rely on ReACH data, and provide a more comprehensive table of ongoing and unpublished work.\nIn the supplementary materials, the authors also include an illustrative example specific tasks, decisisions and actions that one might got through when using ReACH data. It is a broad-level but fairly sober account of how one would navigate the catalogue and engage with collaborators." }, { - "objectID": "case-selection.html", - "href": "case-selection.html", - "title": "Case Selection", + "objectID": "notes/maelstrom-readings.html#wey2024", + "href": "notes/maelstrom-readings.html#wey2024", + "title": "Maelstrom reading notes", + "section": "Wey and Fortier (2024)", + "text": "Wey and Fortier (2024)\nAn overview of the harmonization procedires applied in CanPath and MINDMAP. Authored by two members of the Maelstrom team, but no one from these initiatives.\nAt first, comes across as another broad-level overview of processes. But, as is elicited in the conslusion, the paper highlights some subtle divergent approaches, some of which are relevant to my project and I picked up here.\nInteresting bit about use of centralized systems or systems that are more conducive to end-users’ individual workflows.\n\nIn both illustrative projects, information about participating studies and data collected was gathered and made available on the central data server. In the CanPath BL-HRFQ, harmonization documentation and processing scripts were held centrally and only updated by Maelstrom Research. In MINDMAP, because multiple groups simultaneously worked on generating harmonization processing scripts for different areas of information, working versions of the DataSchema and processing scripts were held on a GitHub, allowing for better version control and dynamic updating of scripts by multiple remote data harmonizers.\n\nMore about the balance being struck at MINDMAP between institutional and popular tech and documentation platforms:\n\nIn the MINDMAP project, R markdown documents with applied harmonization scripts (Figure 13.1b) and any comments were preserved in the GitHub repository, which is also publicly accessible. Furthermore, summary statistics for MINDMAP variables are only available to approved data users through the secure server, as harmonized datasets are only intended for use within the MINDMAP network. Overviews of the harmonization process and outputs have also been published as open-access, peer-reviewed articles for both projects (Fortier et al. 2019; Wey et al. 2021).\n\nBit about existing collaborative ties making things much easier:\n\nthe prospective coordination and unu sually high ability for the harmonization team (Maelstrom Research) to work directly with study data managers on recently collected and documented study data resulted in high standardization and ability to resolve questions.\n\nData curation as an explicitly creative task, involving impactful decisions. Interesting that this was unexpected enough to warrant inclusion as a key challenge:\n\nIn MINDMAP, these differences were clearly documented in comments about harmonization for each source dataset to allow researchers to decide how to use the harmonized variable. In-depth exploration of the best statistical methods to harmonize these types of measures to maintain integrity of content while minimizing loss of information and methodological bias are important and active areas of research (Griffith et al. 2016; Van den Heuvel and Griffith 2016). The approach taken in this case was to apply simpler harmonization meth ods (i.e. rescaling) and document the transformation, leaving investigators the flexibility to further explore and analyze the harmonized datasets as appropriate for their research questions.\n\nAs with the above excerpt, documentation was considered as a viable mode of resolving or accounting for significant discrepancies:\n\nThe papers describing the harmonization projects attempt to highlight these considerations, for example, providing some comparison of demographics of the harmonized populations against the general populations from which they were drawn (Dummer et al. 2018; Fortier et al. 2019; Wey et al. 2021), listing sources of study-specific heterogeneity in the harmonized datasets to consider (Fortier et al. 2019), and pointing users to individual study documentation where more information on weights to use for analysis should be con sidered (e.g. Wey et al. 2021).\n\nThe bit about use of github was rationalized as a way of facilitating collaboration across insitutional boundaries. They refer to R and RMarkdown being open standards as the main reason, but I wonder if a part of it is that GitHub, being a non-institutional platform, was easier to use from an oboarding perspective:\n\nIn MINDMAP, due to the need for multiple international groups to work simultaneously and flexibly on the harmonization processing and frequently evolving versions of study-specific harmonization scripts, scripts for harmonization processing were written and applied entirely through R markdown in an RStudio interface, and the DataSchema and R markdown versions were maintained and frequently updated in a GitHub repository.\n\nPerhaps ironically, the private company and platform may have been used due to the strength of pan-european collaborative ties that institutions may not be able to keep up with. Whereas in Canada, with centralized project-oriented work, it may be much easier to enforce adoption of centralized tooling. This is just speculation." + }, + { + "objectID": "notes/maelstrom-readings.html#doiron2013a", + "href": "notes/maelstrom-readings.html#doiron2013a", + "title": "Maelstrom reading notes", + "section": "Doiron, Raina, and Fortier (2013)", + "text": "Doiron, Raina, and Fortier (2013)\nThis paper summarizes what was discussed at a workshop bringing together stakeholders who would contribute to two large data harmonization initiatives: the Canadian Longitudinal Study on Aging (CLSA) and the Canadian Partnership for Tomorrow Project (CPTP). It is therefore representative of plans and challenges that were held at an early stage when collaborations were being established.\nThe authors identify series of reasons for linking data, which I summarize here:\n\nMaximizing potential of disparate information resources\n\n\nenriching study datasets with additional data not being collected directly from study par ticipants\noffer vital information on health outcomes of participants\nvalidate self-reported information\n\n\nDrawing maximum value from data produced from public expenditure\n\n\noffers a cost-effective means to maximize the use of existing publicly funded data collections\n\n\nDevelop interdisciplinary collaborative networks\n\n\nby combining a wide range of risk factors, disease endpoints, and relevant socio-economic and biological measurements at a population level, linkage lays the groundwork for multidisciplinary health-research initiatives, which allow the exploration of new hypotheses not foreseeable using independent datasets\n\n\nEstablish long-lasting infrastructure and instill a collaborative culture\n\n\nLast, a coordinated pan-Canadian cohort-to-administrative linked database would establish legacy research infrastructures that will better equip the next generation of researchers across the country\n\nThe authors use the term “data linkage”:\n\nData linkage is “the bringing together from two or more different sources, data that relates to the same individual, family, place or event”. When linking data at the individual level, a common identifier (or a combination of identifiers) such as a personal health number, date of birth, place of residence, or sex, is used to combine data related to the same person but found in separate databases. Data linkage has been used in a number of research fields but is an especially valuable tool for health research given the large amount of relevant information collected by institutions such as governments, hospitals, clinics, health authorities, and research groups that can then be matched to data collected directly from consenting individuals participating in health research.\n\nThis is distinct from harmonization in that it is not meant to combine data with similar scope and schematic structure, but rather to relate information collected under various domains so that they could be more easily queried in tandem. I imagine this as reminiscient of establishing links between tables in a relational database.\nThe authors identify the open-endedness of the linked data as a unique challenge, without elaborating on this point:\n\nCLSA/CPTP-to-AHD linkage also poses unique challenges in that, in contrast to more traditional requests to link data to answer one-off research questions, it aims to establish a rich data repository that will allow investigators to answer a multitude of research questions over time.\n\nThe workshop participants established a 5-point plan:\n\nbuild strong collaborative relationships between stakeholders involved in data sharing (e.g., researchers, data custodians, and privacy commissioners);\nidentify an entity which could provide overall leadership as well as individual “champions” within each province;\nfind adequate and long-term resources and funding;\nclarify data linkage and data-sharing models and develop a common framework within which the data linkage process takes place; and\ndevelop a pilot project making use of a limited number of linked variables from participating provinces\n\nThe second point, about identifying “champions”, is kind of interesting, and I’d like to know more about what qualities these people were expcted to have, their domains of expertise, their collaborative/soft or technical skills, and how this plays into access to funds and general governance structures\nNeed to look at Roos, Menec, and Currie (2004), which they cite in the conclusion, specifically with reference to the aspiration to develop “information rich environments”. Seems like it is a primary source for the background on linked data in Manitoba and Australia." + }, + { + "objectID": "notes/maelstrom-readings.html#fortier2023", + "href": "notes/maelstrom-readings.html#fortier2023", + "title": "Maelstrom reading notes", + "section": "Fortier et al. (2023)", + "text": "Fortier et al. (2023)\nRelates harmonization to the FAIR principles, which has not really been featured much in arguments for harmonization in prior Maelstrom papers. Specifically, this paper frames harmonization as a necessary additional condition that enables FAIR data to be made useful; FAIR is deemed not enough.\n\nIn the following paper, we aim to provide an overview of the logistics and key ele ments to be considered from the inception to the end of collabo rative epidemiologic projects requiring harmonizing existing data.\n\nInteresting acronym/framework for defining research questions:\n\nThe research questions addressed and research plan proposed by harmonization initiatives need to be Feasible, Interesting, Novel, Ethical, and Relevant (FINER). (Cummings, Browner, and Hulley 2013)\n\nTable 1 lists examples of questions that could be addressed to help delineate analytical approach, practical requirements, and operations of a harmonization initiative. These are all questions that look toward or project specific goals, which then inform the strategies through which they may be achieved.\nThe supplementary materials include very detailed information about specific practices and objectives pertaining to the REACH initiative. However, it’s unclear how well this reflects any specific challenges experienced. In other words, I was hoping for something more candid.\nMoreover, this paper is also based on survey responses from 20 harmonization initiatives, but neither the findings resulting from the analysis, nor the data, are referenced or included. Is this the same as the one that informed Fortier et al. (2017)?\nLooming in the background of this paper is the DCC lifecycle model. However they do not cite DCC, or the field of digital curation in general. The DCC lifecycle model has always been presented as a guideline, sort of divorced from practical experience, or at least that’s how I’ve always understood it. Basically, and literally, a model for expected behaviours and interstitial outcomes. I think it would be interesting to explore (a) why the authors perceived need to present a model and (b) how they arrived at the phases and principles that are included in it. Is this tacit or common-sense thinking? Or was this directly informed by my concrete thinking from the field of digital curation?\nI should brush up on more recent work regarding the DCC, and specifically, critiques of it. Through a quick search, a few papers seem directly relevant:\n\nCox and Tam (2018)\nChoudhury, Huang, and Palmer (2020)\nRhee (2024)\n\nI think Cox and Tam (2018) may be especially relevant as a critique of the lifecycle metephor in a general sense. From the abstract, it seems like they identify various downsides pertaining to lifecycle models, specifically that they “mask various aspects of the complexity of research, constructing it as highly purposive, serial, uni-directional and occurring in a somewhat closed system.” I’m not sure how explicit the connection is, but I sense this ties into the general paradigm shift toward greater recognition of curation “in the wild”, as per Dallas (2016)." + }, + { + "objectID": "notes/maelstrom-readings.html#gaye2014", + "href": "notes/maelstrom-readings.html#gaye2014", + "title": "Maelstrom reading notes", + "section": "Gaye et al. (2014)", + "text": "Gaye et al. (2014)\nIntroduces DataShield.\nFrames DataShield as a technical fix to administrative problems:\n\nMany technical and policy measures can be enacted to render data sharing more secure from a governance per spective and less likely to result in loss of intellectual prop erty. For example, data owners might restrict data release to aggregate statistics alone, or may limit the number of variables that individual researchers might access for speci fied purposes. Alternatively, secure analysis centres, such as the ESRC Secure Data Service and SAIL represent major informatics infrastructures that can provide a safe haven for remote or local analysis/linkage of data from selected sources while preventing researchers from down loading the original data themselves. However, to comple ment pre-existing solutions to the important challenges now faced, the DataSHIELD consortium has developed a flexible new way to comprehensively analyse individual level data collected across several studies or sources while keeping the original data strictly secure. As a technology, DataSHIELD uses distributed computing and parallelized analysis to enable full joint analysis of individual-level data from several sources, e.g. research projects or health or administrative data—without the need for those data to move, or even be seen, outside the study where they usually reside. Crucially, because it does not require underpin ning by a major informatics infrastructure and because it is based on non-commercial open source software, it is both locally implementable and very cost effective.\n\nAdds a social/collaborative element to earlier arguments about the challenges inherent of prospective harmonization, highlighting a need for engagement with individual studies (either through direct or peripheral participation) to conduct research that was not initially planned for:\n\nUnfortunately, both [study-level metadata] SLMA and [individual-level metadata] ILMA present significant problems Because SLMA com bines analytical results (e.g. means, odds ratios, regression coefficients) produced ahead of time by the contributing studies, it can be very inflexible: only the pre-planned analyses undertaken by all the studies can be converted into joint results across all studies combined. Any additional analyses must be requested post hoc. This hinders exploratory analysis for example the investigation of sub-groups, or interactions between key variables.\n\nProvides a detailed overview of how DataShield was implemented for HOP (Healthy Obesity Project), including the code used to generate specific figures and analyses. Hoever it does not really describe or reflect upon the processes through which the code was developed.\nThe authors highlight the fact that certain analytical approaches are not possible using DataShield, especially analysis that visualize individual data points. It’s unclear how they enforce this, or whether it’s an implicit limitation based on the data that DataShield participants provide.\n\nBecause in DataSHIELD potentially disclosive com mands are not allowed, some analyses that are possible in standard R are not enabled. In essence, there are two classes of limitation on potential DataSHIELD functional ity: (i) absolute limitations which require an analysis that can only be undertaken by enabling one of the functional ities (e.g. visualizing individual data points) that is explicitly blocked as a fundamental element of the DataSHIELD philosophy. For example, this would be the case for a standard scatter plot. Such limitations can never be circumvented and so alternatives (e.g. contour and heat map plots) are enabled which convey similar information but without disclosing individual data points; (ii) current limitations which are functions or models that we believe are implementable but we have not, as yet, under taken or completed the development work required. As examples, these latter include generalized linear mixed model (including multi-level modelling) and Cox regression.\n\nThe authors list numerous other limitations and challenges. Some have to do with what kinds of data DataShield can handle (something about horizontal and vertical that I do not yet fully understand). Other challenges include the need for data to be harmonized, and having to deal with governance concerns.\nNotably, the first challenge mentioned seems to contradict the statement earlier on (and made by Doiron et al. (2013)) that this is relatively easy to set up. The authors acknowledge the fact that coding for analysis using DataShield has a steep learning curve and requires some pre-planning to enable results from satellite computers to be properly combined. Their mitigation is to black-box these concerns by implementing simpler client-side functions that mask the more complex behaviours (and presumably translate error messages in ways that users can understand and act to resolve!).\n\nDespite its potential utility, implementation of DataSHIELD involves significant challenges. First, although set-up is fundamentally straightforward, application involves a relatively steep learning curve because the command structure is complex: it demands specification of the analysis to be undertaken, the studies to use and how to combine the results. In mitigation, most complex serverside functions are now called using simpler client-side functions and we are working on a menu-driven implementation.\n\nAlso interesting that they note how there may be unanticipated problems, either accidental or malicious, and their way of mitigating against this is to log all commands:\n\nFifth, despite the care taken to set up DataSHIELD so that it works properly and is non-disclosive, it is possible that unanticipated prob lems (accidental or malicious) may arise. In order to iden tify, describe and rectify any errors or loopholes that emerge and in order to identify deliberate miscreants, all commands issued on the client server and enacted on each data server are permanently logged.\n\nThis is even more interesting in light of their continuous reference to “data.care”, which they do not address in depth, but which seems to have been a scandal involving unauthorized release of personal health data used in research.\nThe authors add an additional caveat concerning the need to ensure that the data are cleaned in advance.\n\nBut, to be pragmatic, many of the routinely collected healthcare and administra tive databases will have to undergo substantial evolution before their quality and consistency are such that they can directly be used in high-quality research without exten sive preparatory work. By its very nature, such preparation—which typically includes data cleaning and data harmonization—cannot usually be undertaken in DataSHIELD, because it involves investigating discrepan cies and/or extreme results in individual data subjects: the precise functionality that DataSHIELD is designed to block. Such work must therefore be undertaken ahead of time by the data generators themselves—and this is de manding of time, resources and expertise that — at present — many administrative data providers may well be unwilling and/or unable to provide. That said, if the widespread us ability of such data is viewed as being of high priority, the required resources could be forthcoming.\n\nThis corresponds with another limitation identified earlier, namely with regards to identifying duplicate individual records across jurisdictional boundaries (which involves assumptions regarding nationality and identify – one of those weird myths that programmers can’t seem to let go!):\n\nSo far DataSHIELD has been applied in settings where individual participants in different studies are from different countries or from different regions so it is unlikely that any one person will appear in more than one source. However, going forward, that cannot al ways be assumed. We have therefore been consider ing approaches to identify and correct this problem based on probabilistic record linkage. In the genetic setting 48 the BioPIN provides an alternative solution. Ongoing work is required.\n\nNote the last line of the prior block quote regarding data cleaning:\n\nThat said, if the widespread us ability of such data is viewed as being of high priority, the required resources could be forthcoming.\n\nThis seems like a thread worth tugging at!" + }, + { + "objectID": "notes/maelstrom-readings.html#wolfson2010", + "href": "notes/maelstrom-readings.html#wolfson2010", + "title": "Maelstrom reading notes", + "section": "Wolfson et al. (2010)", + "text": "Wolfson et al. (2010)\nx" + }, + { + "objectID": "interview-protocol.html", + "href": "interview-protocol.html", + "title": "Interview Protocol", "section": "", - "text": "Note\n\n\n\nThis document is still a work in progress.", + "text": "Interviews may be held either in-person or through online video conference. All interviews will be held in quiet and comfortable environments, such as office spaces or conference rooms.\nI will record all in-person interviews using a SONY ICD-UX560 audio recorder to capture audio in the lossless 16 bit 44.1 kHz Linear PCM wav format, with additional audio filters to enhance playback during transcription, if necessary.\nI will also record in-person interview sessions using a GoPro Hero 4 Silver action camera, depending on participants willingness to be video recorded. Based on prior interviews with scientists about their research experiences, I found that interviewees like to show me, rather than merely tell me, about what they are working on and the means through which they engage with information systems. The camera may be leveraged to record spontaneous video records of these demonstrations and provide me with an additional rich data source for further analysis. Moreover, the camera provides an additional backup audio recording in case of data loss on the primary recording device.\nRemote interviews will be recorded using the video conferencing software’s built-in recording tools. Participants will be instructed to disable their microphones or video cameras prior to initiating recording if they have opted to not be recorded through these media. The researcher will record all media locally and refrain from using any cloud services to store or modify the records which the video conference software may provide.\nI will also record handwritten notes comprising descriptive accounts of activities and interactions when recording devices are switched off, as well as preliminary interpretations of observed behaviours and notes on things I plan to follow up on at a later time.", "crumbs": [ - "Case Selection" + "Interview Protocol" ] }, { - "objectID": "case-selection.html#key-factors", - "href": "case-selection.html#key-factors", - "title": "Case Selection", - "section": "Key Factors", - "text": "Key Factors\nTo reiterate, this project investigates the social and collaborative apparatus that scaffold data-sharing initiatives in epidemiology. Through analysis of data obtained through interviews with various relevant stakeholders attached to data-sharing initiatives, the project will ascertain the actions taken and challenges experienced to mediate the varied motivations, needs and values of those involved. In effect, the project aims to articulate the collaborative commitments that govern the constitution and maintenance of epidemiological information commons, and to relate these to technological, administrative and epistemic factors.\nIn other words, I aim to make certain under-appreciated social and collaborative commitments that underlie data-sharing initiatives more visible and to draw greater attention to certain sensibilities, attitudes, and apprehensions that are relevant to contemporary discourse on the nature of epidemiological data and ongoing development of information infrastructures designed to support data integration and re-use.\nHere I outline some key factors that will guide the selection of cases so as to ensure that the project meaningfully addressses its goals.\n\n1. Longevity\nInitiatives that have existed for different durations of time will have different capacity to reflect on their practices. Younger projects will not have had as much of a chance to produce any research outcomes, but may be valuable sources for insight on expectations. More established projects will be able to reflect on unexpected challenges they may have experienced.\nIt will be good to have at least one younger project representing an initiative still “in flux”, one or two “legacy” projects (no longer active), and one or two at intermediate stages (extracting data for meaningful analysis, expanding the initiative’s scope, etc).\n\n\n2. Community composition\nThe size and composition of the community, degree of familiarity among its members, and the mechanisms through which connections are managed constitute additional important factors to consider. Communication and decision-making may take different forms when teams are either smaller and locally-concentrated or larger and dispersed. Decision-making may also be significantly impacted by diffferent governance models and degrees of community participation. It would be interesting to identify how leaders are differentiated from other participants, norms and expectations for getting involved in leadership positions, and considerations that are made when making decisions that impact the community.\n\n\n3. Support structures\nData-sharing may be supported by diverse funding models or tech stacks to support the work, which may significantly impact how the work progresses. Comparing sources of support for data-sharing will help me to explore how data-sharing is either integrated into or supplemented as a distinct outgrowth of “normal” science.\nSpecifically, it will be interesting to compare the extent to which projects are left to cobble together their own data-sharing infrastucture, and how this impacts attitudes and norms regarding the curation and nature of research data. I wonder whether lack of government support fosters creative, entrepeneurial, experimental or community-led models, how funding is provided to supporting the development of collaborative research networks, and how these feed back into norms and attitudes regarding the independence of individual research projects and the formation of collectively-maintained information commons.1\n1 There is some precedent for this in the social sciences and humanities, which are fields that open science policies and infrastructures are not really designed to handle. This marginizaliation had contributed to experimentation with community-based governance models (as per the Radical Open Access Collective) and broader community involvement in policy decisions concerning how the rich diversity of social science and humanities data should be curated.I expect a tendency for cases to be supported by limited-term, federally-funded grants, though it might be worth exploring how supplementary funding provided by non-government agencies, including private firms (through MITACS, for instance) and philanthropic organizations (such as the Gates Foundation) impact the work. I would therefore like to included cases funded through these kinds of initiatives in this project.\n\n\n4. Disciplinary trends\nData-sharing is undoubtably impacted by attitudes concerning the nature of data and their roles in scientific knowledge production, and it is therefore necessary to account for different perspectives. Although I am still somewhat unfamiliar with the diversity of thought on such matters in epidemiology, I intuit that much of the open science movement is driven by rather positivist attitude. I would like to include cases that take on alternative approaches to science.\n\n\n5. Historical or contextual factors\nScience is beholden to political trends, which impact ability to obtain funding and collaborate accross borders (e.g. Brexit’s impact on trans-European funding, including initiatives to attract and retain talent). Moreover, certain events, such as the Covid-19 pandemic, trigger responses in the scientific community. Even if these events are not the focus of the research, they must still be accounted for due to their presumed impacts.\n\n\n6. Kinds of data\nThe nature of the data will surely impact how they are shared. In epidemiology specifically, there are ethical limitations on sharing precise patient records. This may be especially salient in studies focusing in health in Indiginous populations, which may involve additional consideration in contexts of data-sharing.2 Moreover, controls on data collection procedures, including limited or controlled scope or decisions to account for specific factors (such as race, which is prevalent in American datasets but largely ignored elsewhere) may significantly impact what can be done with them when integrated at scale.\n2 The Data Governance and Management Toolkit for Self-Governing Indigenous Governments https://indigenousdatatoolkit.ca may be helpful for exploring these concerns, but I am still looking for epidemiologically-oriented resources on such matters.", + "objectID": "interview-protocol.html#interview-records", + "href": "interview-protocol.html#interview-records", + "title": "Interview Protocol", + "section": "", + "text": "Interviews may be held either in-person or through online video conference. All interviews will be held in quiet and comfortable environments, such as office spaces or conference rooms.\nI will record all in-person interviews using a SONY ICD-UX560 audio recorder to capture audio in the lossless 16 bit 44.1 kHz Linear PCM wav format, with additional audio filters to enhance playback during transcription, if necessary.\nI will also record in-person interview sessions using a GoPro Hero 4 Silver action camera, depending on participants willingness to be video recorded. Based on prior interviews with scientists about their research experiences, I found that interviewees like to show me, rather than merely tell me, about what they are working on and the means through which they engage with information systems. The camera may be leveraged to record spontaneous video records of these demonstrations and provide me with an additional rich data source for further analysis. Moreover, the camera provides an additional backup audio recording in case of data loss on the primary recording device.\nRemote interviews will be recorded using the video conferencing software’s built-in recording tools. Participants will be instructed to disable their microphones or video cameras prior to initiating recording if they have opted to not be recorded through these media. The researcher will record all media locally and refrain from using any cloud services to store or modify the records which the video conference software may provide.\nI will also record handwritten notes comprising descriptive accounts of activities and interactions when recording devices are switched off, as well as preliminary interpretations of observed behaviours and notes on things I plan to follow up on at a later time.", "crumbs": [ - "Case Selection" + "Interview Protocol" ] }, { - "objectID": "case-selection.html#selecting-cases", - "href": "case-selection.html#selecting-cases", - "title": "Case Selection", - "section": "Selecting Cases", - "text": "Selecting Cases\nSince a significant aspect of this work is to compare different approaches to data-sharing that have not yet been systematically articulated, it will be necessary to loosely define the parameters through which each case will be initially characterized. I will rely on structured consultations with the research community to make sense of the data-sharing landscape and select cases accordingly. By consulting with key stakeholders, I will arrive at a consensus about which cases are worth approaching while documenting the rationale behind these selections.\nThe consultation process is meant to ensure that case selection adheres to community will and reasoning, while also ensuring that cases are logistically feasible. I will therefore ask for input from leading members of epidemioligical data-sharing initiatives who are familiar with the goals of the this project, and who are involved with the Maelstrom Project which establishes logistical boundaries around the scope of the project.\n\nFixed cases\nMaelstrom will serve as a “fixed point” that limits the scope of the cases’ breadth, while also ensuring that participants (and myself) have a common frame of reference. Moreover, the practices and values that support Maelstrom’s operations have already been documented to a certain extent by its leaders (cf. Doiron et al. 2017; Fortier et al. 2017; Fortier et al. 2023; Bergeron et al. 2018), by its partners (cf. Doiron et al. 2013; Wey et al. 2021; Bergeron et al. 2021) and by scholars of scientific practice (cf. M. J. Murtagh et al. 2012; Demir and Murtagh 2013; Madeleine J. Murtagh et al. 2016; Tacconelli et al. 2022; Gedeborg et al. 2023). This prior work will serve as valuable resources supporting this project.\nAdditionally, the fact that all cases interact with Maelstrom for their technical infrastructure will greatly simplify the interviews by reducing the “overhead” of having to learn or be told about the technical systems, which may distract from the primary themes I seek to address during interviews.\nCITF will also serve as a fixed case. This is partly for logistical reqasons, since the grant is meant to support the CITF Databank, and this project will align with concurrent research on user experiences pertaining to CITF specifically. At the same time, CITF is relevant to the project’s objectives in its own right, and will contribute meaningful insight in comparison with other cases.", + "objectID": "interview-protocol.html#interview-guide", + "href": "interview-protocol.html#interview-guide", + "title": "Interview Protocol", + "section": "Interview guide", + "text": "Interview guide\nInterviews are oriented by my goal to document values and attitudes concerning data harmonization efforts, as elicited by research participants in their responses. Participants will be asked to reflect on:\n\nthe motivations for their initiatives;\nthe challenges they experience;\nhow they envision success and failure;\ntheir perceptions of their own roles and the roles of other team members and stakeholders;\nthe values that inform their decisions;\nhow the technological apparatus they set up enables them to realize their goals and values; and\nways in which they believe data-sharing could be improved.\n\nTo this end, each interview will proceed following a strategic order:\n\n1. Participants’ goals and perspectives\nFollow a life-history method to better understand participants’ professional backgrounds and their roles within their projects. The goal is to obtain information about their paths, not the rehearsed origin story.\n\nTo start, can you please tell me a little bit about your background?\nWhat is the project, and what is your role?\nHow did you find yourself in this role?\nHow has your previous experience prepared you for your role?\n\n\n\n2. Projects’ missions, purposes, motivations\nThis section is about the project in general, including its purpose, scope and value. Information about practices and procedures will be sought in a subsequent phase of the interview.\n\nWhat are the project’s goals?\nWhat makes the project unique?\nWhat is the project doing that no other similar project is doing?\nDo you consider this project as similar to any other initiatives?\nWhat are they, and in what ways are they simiar or different?\n\n\n\nWhat are the expected outcomes?\nHave you achieved these goals and outcomes?\nIf not Are you on track to achieving them?\nWhat are some challenges that the project experienced, and how have you worked to overcome them?\n\n\n\n3. Practices, procedures, relationships\nThis section asks about specific actions and interactions that the participant engages in.\n\nRoles and relationships\n\nWhat does your role entail?\nCan you provide a couple examples of things that you recently did in this capacity?\n\n\n\nWho else do you frequently rely on, and what are their roles?\nCan you describe what they do, and perhaps give a few examples drawn from their recent work?\n\nThe interview might proceed in different ways depending on their initial responses. Here are some questions I might ask, corresponding with the participants’ role and area of expertise.\n\n\nMaintaining the project community\n\nPlease briefly describe the process through which you obtain new partners or users.\nCan you please recall a recent example?\n\n\n\nHow well do you know each partner?\nDid you know them before their involvement?\n\n\n\nWould you describe the project as a tight knit community, or more open-ended?\n\n\n\nHow do you communicate with partners and contributors?\nWhat kinds of media or platforms do you use, and are they targeted for specific purposes? i.e. email, newsletters, social media, skype, personal communication at conferences\n\n\n\nAre there particular people in each project who you communicate with more frequently than others?\nWho are they, and why are these the people who you connect with?\n\n\n\nWhat do you consider your role or responsibility vis-a-vis the development/growth of this community?\nHow do you foster the community’s development and growth?\nDo you consider these efforts to be effective?\n\n\n\nDoes your role as someone who leads a data harmonization initiative differentiate you from other epidemiologists?\nHow has your relationship with other epidemiologists changed after initiating this project and taking on this role?\n\n\n\nReflections on data’s value\n\nHow has the data been used or analyzed?\nDo you track how the data is used?\nIs this tracking formal or informal?\n\n\n\nWhat patterns or trends are apparent based on this tracking?\nIn your view, has the data been used in productive ways?\nIn what ways are people either maximizing or not fully utilizing the data’s full potential?\n\n\n\nCan you tell me about any unexpected or alternative uses of the data?\nWhat made them significant to you?\n\n\n\nWhich skills and competencies do you think researchers need to possess in order to be able to make proper use of the data in their work?\n\n\n\nBased on your experience, what are the main obstacles for the effective and widespread adoption of these skills?\nWhat are some positive factors, or drivers, that can make that prospect more tangible?\n\n\n\nData ownership\n\nWho has rights (in the legal sense or informally) over the information contained in the system, or in related documents and datasets?\nCan you tell me about any conflicts or tensions that emerged relating to expressions of propriety or ownership over data?\n\n\n\nCollecting data\n\nDo projects collect data with future harmonization in mind?\nIf so, how does this affect data collection procedures, and does this play a role in subsequent decision-making?\n\n\n\nCurating data\n\nPlease describe the overall process of getting data into the system and then working with the data.\n\n\n\nPlease tell me about any unexpected or problematic cases that made working with data particularly challenging.\nWhat made these cases unique or challenging?\nHow did you resolve them or work towards a solution or viable outcome?\n\n\n\nAccessing data\n\nDo you consider the system easy to access?\nCan you identify some challenges that pose as barriers to access?\n\n\n\nWho has access to data?\nHow are decisions regarding access rights made?\nCan you tell me about any unnaceptable practices regarding accessing and sharing data?\n\n\n\nUsing data\n\nIf you engage with the data with specific questions in mind, how do these questions emerge?\nWhat role does the data play in shaping the questions and analytical approach?\n\n\n\nIs the system amenable to exploratory or serendipitous modes of discovery?\nPlease tell me about specific examples where you engaged with the data in this way.\n\n\n\nWhat features does the system have to view or export data?\nHow easy is it to view, export or visualize data the data?\nDo you use the tools that are designed to export of visualize data, or do you prefer to use your own tooling?\nWhat are the reasons behind this preference?\n\n\n\nDocumentation\n\nHow is the system documented?\nWho is responsible for creating documentation?\nCan you please tell me about a great example of documentation in your project?\n\n\n\nOverall, do you consider your project’s documentation to be helpful?\nWhy or why not?\n\n\n\nIn your opinion, does the documentation accurately reflect the true nature of the documented data or work practices?\nAre specific things more accurately documented than others?\nPlease tell me why you think some things are more accurately or less accurately documented.\n\n\n\nCan you recall any instances when documentation was updated?\nWhat prompted these updates?\n\n\n\nDo people ever get in touch to ask questions about specific aspects of the data or data curation procedures?\nWhat kinds of questions do they ask?\nWhat kinds of responses are given?\n\n\n\nRelationships with Maelstrom\n\nCan you please concisely describe the role of Maelstrom as part of your project’s overall initiative?\n\n\n\nWhat are the origins of your project’s relationship with Maelstrom?\nHow has this relationship changed over time?\n\n\n\nDoes your project present any unique challenges or require any special attention?\nIf so, please tell me about some unique cases or examples that demonstrate this unique relationship.\n\n\n\nDo you believe that Maelstrom is meeting your project’s needs and enabling it to achieve its goals?\nIn what ways is Maelstrom either satisfying or failing to meet your project’s expectations or needs?\nHow would you change the current system to better suit your project’s needs more effectively?\n\n\n\nDo you engage with Maelstrom’s other partners?\nIf so, what is the nature of these relationships?", "crumbs": [ - "Case Selection" + "Interview Protocol" ] }, { - "objectID": "case-selection.html#logistical-constraints-and-sources-of-bias", - "href": "case-selection.html#logistical-constraints-and-sources-of-bias", - "title": "Case Selection", - "section": "Logistical Constraints and Sources of Bias", - "text": "Logistical Constraints and Sources of Bias\nAfter identifying potential cases, I will reach out to project leaders to invite them to participate. I will prepare a document outlining this project’s objectives and the roles that cases will play in the work. I will also set up a meeting prior to them deciding whether they would like to participate so I can ascertain whether they understand the project and to help determine who may serve as people who can sit for interviews (I expect to hold 12-15 interviews ranging between 60-90 minutes in duration).\nI may prioritize local connections, which provide favourable conditions for holding interviews (i.e., people are more willing to show things that can not be conveyed through a screen, and the pre- and post-interview phases provide meaningful insight). This may introduce bias in that I may obtain more in-depth and nuanced information from local initiatives than those occurring abroad. This can be mitigated by travelling to conduct interviews in person, however the costs of travel may introduce their own biases favouring cases that are easier to reach.", + "objectID": "glossary.html", + "href": "glossary.html", + "title": "Glossary", + "section": "", + "text": "data-sharing\nXXXX.\n\ndata-harmonization\nXXXX.\n\ndata-integration\nXXXX.\n\ncollaboration\nXXXX.\n\ndata-sharing initiative\nXXXX.\nCorresponds with the term “harmonization initiative” in Fortier et al. (2017).\ncatalogue\nXXXX.\nBergeron et al. (2018), Bergeron et al. (2021)\n\n\n\n\nReferences\n\nBergeron, Julie, Dany Doiron, Yannick Marcon, Vincent Ferretti, and Isabel Fortier. 2018. “Fostering Population-Based Cohort Data Discovery: The Maelstrom Research Cataloguing Toolkit.” PLOS ONE 13 (7): e0200926. https://doi.org/10.1371/journal.pone.0200926.\n\n\nBergeron, Julie, Rachel Massicotte, Stephanie Atkinson, Alan Bocking, William Fraser, Isabel Fortier, and the ReACH member cohorts’ principal investigators. 2021. “Cohort Profile: Research Advancement Through Cohort Cataloguing and Harmonization (ReACH).” International Journal of Epidemiology 50 (2): 396–97. https://doi.org/10.1093/ije/dyaa207.\n\n\nFortier, Isabel, Parminder Raina, Edwin R Van den Heuvel, Lauren E Griffith, Camille Craig, Matilda Saliba, Dany Doiron, et al. 2017. “Maelstrom Research Guidelines for Rigorous Retrospective Data Harmonization.” International Journal of Epidemiology 46 (1): 103–5. https://doi.org/10.1093/ije/dyw075.", "crumbs": [ - "Case Selection" + "Glossary" ] }, { @@ -323,212 +345,197 @@ ] }, { - "objectID": "glossary.html", - "href": "glossary.html", - "title": "Glossary", + "objectID": "case-selection.html", + "href": "case-selection.html", + "title": "Case Selection", "section": "", - "text": "data-sharing\nXXXX.\n\ndata-harmonization\nXXXX.\n\ndata-integration\nXXXX.\n\ncollaboration\nXXXX.\n\ndata-sharing initiative\nXXXX.\nCorresponds with the term “harmonization initiative” in Fortier et al. (2017).\ncatalogue\nXXXX.\nBergeron et al. (2018), Bergeron et al. (2021)\n\n\n\n\nReferences\n\nBergeron, Julie, Dany Doiron, Yannick Marcon, Vincent Ferretti, and Isabel Fortier. 2018. “Fostering Population-Based Cohort Data Discovery: The Maelstrom Research Cataloguing Toolkit.” PLOS ONE 13 (7): e0200926. https://doi.org/10.1371/journal.pone.0200926.\n\n\nBergeron, Julie, Rachel Massicotte, Stephanie Atkinson, Alan Bocking, William Fraser, Isabel Fortier, and the ReACH member cohorts’ principal investigators. 2021. “Cohort Profile: Research Advancement Through Cohort Cataloguing and Harmonization (ReACH).” International Journal of Epidemiology 50 (2): 396–97. https://doi.org/10.1093/ije/dyaa207.\n\n\nFortier, Isabel, Parminder Raina, Edwin R Van den Heuvel, Lauren E Griffith, Camille Craig, Matilda Saliba, Dany Doiron, et al. 2017. “Maelstrom Research Guidelines for Rigorous Retrospective Data Harmonization.” International Journal of Epidemiology 46 (1): 103–5. https://doi.org/10.1093/ije/dyw075.", + "text": "Note\n\n\n\nThis document is still a work in progress.", "crumbs": [ - "Glossary" + "Case Selection" ] }, { - "objectID": "interview-protocol.html", - "href": "interview-protocol.html", - "title": "Interview Protocol", - "section": "", - "text": "Interviews may be held either in-person or through online video conference. All interviews will be held in quiet and comfortable environments, such as office spaces or conference rooms.\nI will record all in-person interviews using a SONY ICD-UX560 audio recorder to capture audio in the lossless 16 bit 44.1 kHz Linear PCM wav format, with additional audio filters to enhance playback during transcription, if necessary.\nI will also record in-person interview sessions using a GoPro Hero 4 Silver action camera, depending on participants willingness to be video recorded. Based on prior interviews with scientists about their research experiences, I found that interviewees like to show me, rather than merely tell me, about what they are working on and the means through which they engage with information systems. The camera may be leveraged to record spontaneous video records of these demonstrations and provide me with an additional rich data source for further analysis. Moreover, the camera provides an additional backup audio recording in case of data loss on the primary recording device.\nRemote interviews will be recorded using the video conferencing software’s built-in recording tools. Participants will be instructed to disable their microphones or video cameras prior to initiating recording if they have opted to not be recorded through these media. The researcher will record all media locally and refrain from using any cloud services to store or modify the records which the video conference software may provide.\nI will also record handwritten notes comprising descriptive accounts of activities and interactions when recording devices are switched off, as well as preliminary interpretations of observed behaviours and notes on things I plan to follow up on at a later time.", + "objectID": "case-selection.html#key-factors", + "href": "case-selection.html#key-factors", + "title": "Case Selection", + "section": "Key Factors", + "text": "Key Factors\nTo reiterate, this project investigates the social and collaborative apparatus that scaffold data-sharing initiatives in epidemiology. Through analysis of data obtained through interviews with various relevant stakeholders attached to data-sharing initiatives, the project will ascertain the actions taken and challenges experienced to mediate the varied motivations, needs and values of those involved. In effect, the project aims to articulate the collaborative commitments that govern the constitution and maintenance of epidemiological information commons, and to relate these to technological, administrative and epistemic factors.\nIn other words, I aim to make certain under-appreciated social and collaborative commitments that underlie data-sharing initiatives more visible and to draw greater attention to certain sensibilities, attitudes, and apprehensions that are relevant to contemporary discourse on the nature of epidemiological data and ongoing development of information infrastructures designed to support data integration and re-use.\nHere I outline some key factors that will guide the selection of cases so as to ensure that the project meaningfully addressses its goals.\n\n1. Longevity\nInitiatives that have existed for different durations of time will have different capacity to reflect on their practices. Younger projects will not have had as much of a chance to produce any research outcomes, but may be valuable sources for insight on expectations. More established projects will be able to reflect on unexpected challenges they may have experienced.\nIt will be good to have at least one younger project representing an initiative still “in flux”, one or two “legacy” projects (no longer active), and one or two at intermediate stages (extracting data for meaningful analysis, expanding the initiative’s scope, etc).\n\n\n2. Community composition\nThe size and composition of the community, degree of familiarity among its members, and the mechanisms through which connections are managed constitute additional important factors to consider. Communication and decision-making may take different forms when teams are either smaller and locally-concentrated or larger and dispersed. Decision-making may also be significantly impacted by diffferent governance models and degrees of community participation. It would be interesting to identify how leaders are differentiated from other participants, norms and expectations for getting involved in leadership positions, and considerations that are made when making decisions that impact the community.\n\n\n3. Support structures\nData-sharing may be supported by diverse funding models or tech stacks to support the work, which may significantly impact how the work progresses. Comparing sources of support for data-sharing will help me to explore how data-sharing is either integrated into or supplemented as a distinct outgrowth of “normal” science.\nSpecifically, it will be interesting to compare the extent to which projects are left to cobble together their own data-sharing infrastucture, and how this impacts attitudes and norms regarding the curation and nature of research data. I wonder whether lack of government support fosters creative, entrepeneurial, experimental or community-led models, how funding is provided to supporting the development of collaborative research networks, and how these feed back into norms and attitudes regarding the independence of individual research projects and the formation of collectively-maintained information commons.1\n1 There is some precedent for this in the social sciences and humanities, which are fields that open science policies and infrastructures are not really designed to handle. This marginizaliation had contributed to experimentation with community-based governance models (as per the Radical Open Access Collective) and broader community involvement in policy decisions concerning how the rich diversity of social science and humanities data should be curated.I expect a tendency for cases to be supported by limited-term, federally-funded grants, though it might be worth exploring how supplementary funding provided by non-government agencies, including private firms (through MITACS, for instance) and philanthropic organizations (such as the Gates Foundation) impact the work. I would therefore like to included cases funded through these kinds of initiatives in this project.\n\n\n4. Disciplinary trends\nData-sharing is undoubtably impacted by attitudes concerning the nature of data and their roles in scientific knowledge production, and it is therefore necessary to account for different perspectives. Although I am still somewhat unfamiliar with the diversity of thought on such matters in epidemiology, I intuit that much of the open science movement is driven by rather positivist attitude. I would like to include cases that take on alternative approaches to science.\n\n\n5. Historical or contextual factors\nScience is beholden to political trends, which impact ability to obtain funding and collaborate accross borders (e.g. Brexit’s impact on trans-European funding, including initiatives to attract and retain talent). Moreover, certain events, such as the Covid-19 pandemic, trigger responses in the scientific community. Even if these events are not the focus of the research, they must still be accounted for due to their presumed impacts.\n\n\n6. Kinds of data\nThe nature of the data will surely impact how they are shared. In epidemiology specifically, there are ethical limitations on sharing precise patient records. This may be especially salient in studies focusing in health in Indiginous populations, which may involve additional consideration in contexts of data-sharing.2 Moreover, controls on data collection procedures, including limited or controlled scope or decisions to account for specific factors (such as race, which is prevalent in American datasets but largely ignored elsewhere) may significantly impact what can be done with them when integrated at scale.\n2 The Data Governance and Management Toolkit for Self-Governing Indigenous Governments https://indigenousdatatoolkit.ca may be helpful for exploring these concerns, but I am still looking for epidemiologically-oriented resources on such matters.", "crumbs": [ - "Interview Protocol" + "Case Selection" ] }, { - "objectID": "interview-protocol.html#interview-records", - "href": "interview-protocol.html#interview-records", - "title": "Interview Protocol", - "section": "", - "text": "Interviews may be held either in-person or through online video conference. All interviews will be held in quiet and comfortable environments, such as office spaces or conference rooms.\nI will record all in-person interviews using a SONY ICD-UX560 audio recorder to capture audio in the lossless 16 bit 44.1 kHz Linear PCM wav format, with additional audio filters to enhance playback during transcription, if necessary.\nI will also record in-person interview sessions using a GoPro Hero 4 Silver action camera, depending on participants willingness to be video recorded. Based on prior interviews with scientists about their research experiences, I found that interviewees like to show me, rather than merely tell me, about what they are working on and the means through which they engage with information systems. The camera may be leveraged to record spontaneous video records of these demonstrations and provide me with an additional rich data source for further analysis. Moreover, the camera provides an additional backup audio recording in case of data loss on the primary recording device.\nRemote interviews will be recorded using the video conferencing software’s built-in recording tools. Participants will be instructed to disable their microphones or video cameras prior to initiating recording if they have opted to not be recorded through these media. The researcher will record all media locally and refrain from using any cloud services to store or modify the records which the video conference software may provide.\nI will also record handwritten notes comprising descriptive accounts of activities and interactions when recording devices are switched off, as well as preliminary interpretations of observed behaviours and notes on things I plan to follow up on at a later time.", + "objectID": "case-selection.html#selecting-cases", + "href": "case-selection.html#selecting-cases", + "title": "Case Selection", + "section": "Selecting Cases", + "text": "Selecting Cases\nSince a significant aspect of this work is to compare different approaches to data-sharing that have not yet been systematically articulated, it will be necessary to loosely define the parameters through which each case will be initially characterized. I will rely on structured consultations with the research community to make sense of the data-sharing landscape and select cases accordingly. By consulting with key stakeholders, I will arrive at a consensus about which cases are worth approaching while documenting the rationale behind these selections.\nThe consultation process is meant to ensure that case selection adheres to community will and reasoning, while also ensuring that cases are logistically feasible. I will therefore ask for input from leading members of epidemioligical data-sharing initiatives who are familiar with the goals of the this project, and who are involved with the Maelstrom Project which establishes logistical boundaries around the scope of the project.\n\nFixed cases\nMaelstrom will serve as a “fixed point” that limits the scope of the cases’ breadth, while also ensuring that participants (and myself) have a common frame of reference. Moreover, the practices and values that support Maelstrom’s operations have already been documented to a certain extent by its leaders (cf. Doiron et al. 2017; Fortier et al. 2017; Fortier et al. 2023; Bergeron et al. 2018), by its partners (cf. Doiron et al. 2013; Wey et al. 2021; Bergeron et al. 2021) and by scholars of scientific practice (cf. M. J. Murtagh et al. 2012; Demir and Murtagh 2013; Madeleine J. Murtagh et al. 2016; Tacconelli et al. 2022; Gedeborg et al. 2023). This prior work will serve as valuable resources supporting this project.\nAdditionally, the fact that all cases interact with Maelstrom for their technical infrastructure will greatly simplify the interviews by reducing the “overhead” of having to learn or be told about the technical systems, which may distract from the primary themes I seek to address during interviews.\nCITF will also serve as a fixed case. This is partly for logistical reqasons, since the grant is meant to support the CITF Databank, and this project will align with concurrent research on user experiences pertaining to CITF specifically. At the same time, CITF is relevant to the project’s objectives in its own right, and will contribute meaningful insight in comparison with other cases.", "crumbs": [ - "Interview Protocol" + "Case Selection" ] }, { - "objectID": "interview-protocol.html#interview-guide", - "href": "interview-protocol.html#interview-guide", - "title": "Interview Protocol", - "section": "Interview guide", - "text": "Interview guide\nInterviews are oriented by my goal to document values and attitudes concerning data harmonization efforts, as elicited by research participants in their responses. Participants will be asked to reflect on:\n\nthe motivations for their initiatives;\nthe challenges they experience;\nhow they envision success and failure;\ntheir perceptions of their own roles and the roles of other team members and stakeholders;\nthe values that inform their decisions;\nhow the technological apparatus they set up enables them to realize their goals and values; and\nways in which they believe data-sharing could be improved.\n\nTo this end, each interview will proceed following a strategic order:\n\n1. Participants’ goals and perspectives\nFollow a life-history method to better understand participants’ professional backgrounds and their roles within their projects. The goal is to obtain information about their paths, not the rehearsed origin story.\n\nTo start, can you please tell me a little bit about your background?\nWhat is the project, and what is your role?\nHow did you find yourself in this role?\nHow has your previous experience prepared you for your role?\n\n\n\n2. Projects’ missions, purposes, motivations\nThis section is about the project in general, including its purpose, scope and value. Information about practices and procedures will be sought in a subsequent phase of the interview.\n\nWhat are the project’s goals?\nWhat makes the project unique?\nWhat is the project doing that no other similar project is doing?\nDo you consider this project as similar to any other initiatives?\nWhat are they, and in what ways are they simiar or different?\n\n\n\nWhat are the expected outcomes?\nHave you achieved these goals and outcomes?\nIf not Are you on track to achieving them?\nWhat are some challenges that the project experienced, and how have you worked to overcome them?\n\n\n\n3. Practices, procedures, relationships\nThis section asks about specific actions and interactions that the participant engages in.\n\nRoles and relationships\n\nWhat does your role entail?\nCan you provide a couple examples of things that you recently did in this capacity?\n\n\n\nWho else do you frequently rely on, and what are their roles?\nCan you describe what they do, and perhaps give a few examples drawn from their recent work?\n\nThe interview might proceed in different ways depending on their initial responses. Here are some questions I might ask, corresponding with the participants’ role and area of expertise.\n\n\nMaintaining the project community\n\nPlease briefly describe the process through which you obtain new partners or users.\nCan you please recall a recent example?\n\n\n\nHow well do you know each partner?\nDid you know them before their involvement?\n\n\n\nWould you describe the project as a tight knit community, or more open-ended?\n\n\n\nHow do you communicate with partners and contributors?\nWhat kinds of media or platforms do you use, and are they targeted for specific purposes? i.e. email, newsletters, social media, skype, personal communication at conferences\n\n\n\nAre there particular people in each project who you communicate with more frequently than others?\nWho are they, and why are these the people who you connect with?\n\n\n\nWhat do you consider your role or responsibility vis-a-vis the development/growth of this community?\nHow do you foster the community’s development and growth?\nDo you consider these efforts to be effective?\n\n\n\nDoes your role as someone who leads a data harmonization initiative differentiate you from other epidemiologists?\nHow has your relationship with other epidemiologists changed after initiating this project and taking on this role?\n\n\n\nReflections on data’s value\n\nHow has the data been used or analyzed?\nDo you track how the data is used?\nIs this tracking formal or informal?\n\n\n\nWhat patterns or trends are apparent based on this tracking?\nIn your view, has the data been used in productive ways?\nIn what ways are people either maximizing or not fully utilizing the data’s full potential?\n\n\n\nCan you tell me about any unexpected or alternative uses of the data?\nWhat made them significant to you?\n\n\n\nWhich skills and competencies do you think researchers need to possess in order to be able to make proper use of the data in their work?\n\n\n\nBased on your experience, what are the main obstacles for the effective and widespread adoption of these skills?\nWhat are some positive factors, or drivers, that can make that prospect more tangible?\n\n\n\nData ownership\n\nWho has rights (in the legal sense or informally) over the information contained in the system, or in related documents and datasets?\nCan you tell me about any conflicts or tensions that emerged relating to expressions of propriety or ownership over data?\n\n\n\nCollecting data\n\nDo projects collect data with future harmonization in mind?\nIf so, how does this affect data collection procedures, and does this play a role in subsequent decision-making?\n\n\n\nCurating data\n\nPlease describe the overall process of getting data into the system and then working with the data.\n\n\n\nPlease tell me about any unexpected or problematic cases that made working with data particularly challenging.\nWhat made these cases unique or challenging?\nHow did you resolve them or work towards a solution or viable outcome?\n\n\n\nAccessing data\n\nDo you consider the system easy to access?\nCan you identify some challenges that pose as barriers to access?\n\n\n\nWho has access to data?\nHow are decisions regarding access rights made?\nCan you tell me about any unnaceptable practices regarding accessing and sharing data?\n\n\n\nUsing data\n\nIf you engage with the data with specific questions in mind, how do these questions emerge?\nWhat role does the data play in shaping the questions and analytical approach?\n\n\n\nIs the system amenable to exploratory or serendipitous modes of discovery?\nPlease tell me about specific examples where you engaged with the data in this way.\n\n\n\nWhat features does the system have to view or export data?\nHow easy is it to view, export or visualize data the data?\nDo you use the tools that are designed to export of visualize data, or do you prefer to use your own tooling?\nWhat are the reasons behind this preference?\n\n\n\nDocumentation\n\nHow is the system documented?\nWho is responsible for creating documentation?\nCan you please tell me about a great example of documentation in your project?\n\n\n\nOverall, do you consider your project’s documentation to be helpful?\nWhy or why not?\n\n\n\nIn your opinion, does the documentation accurately reflect the true nature of the documented data or work practices?\nAre specific things more accurately documented than others?\nPlease tell me why you think some things are more accurately or less accurately documented.\n\n\n\nCan you recall any instances when documentation was updated?\nWhat prompted these updates?\n\n\n\nDo people ever get in touch to ask questions about specific aspects of the data or data curation procedures?\nWhat kinds of questions do they ask?\nWhat kinds of responses are given?\n\n\n\nRelationships with Maelstrom\n\nCan you please concisely describe the role of Maelstrom as part of your project’s overall initiative?\n\n\n\nWhat are the origins of your project’s relationship with Maelstrom?\nHow has this relationship changed over time?\n\n\n\nDoes your project present any unique challenges or require any special attention?\nIf so, please tell me about some unique cases or examples that demonstrate this unique relationship.\n\n\n\nDo you believe that Maelstrom is meeting your project’s needs and enabling it to achieve its goals?\nIn what ways is Maelstrom either satisfying or failing to meet your project’s expectations or needs?\nHow would you change the current system to better suit your project’s needs more effectively?\n\n\n\nDo you engage with Maelstrom’s other partners?\nIf so, what is the nature of these relationships?", + "objectID": "case-selection.html#logistical-constraints-and-sources-of-bias", + "href": "case-selection.html#logistical-constraints-and-sources-of-bias", + "title": "Case Selection", + "section": "Logistical Constraints and Sources of Bias", + "text": "Logistical Constraints and Sources of Bias\nAfter identifying potential cases, I will reach out to project leaders to invite them to participate. I will prepare a document outlining this project’s objectives and the roles that cases will play in the work. I will also set up a meeting prior to them deciding whether they would like to participate so I can ascertain whether they understand the project and to help determine who may serve as people who can sit for interviews (I expect to hold 12-15 interviews ranging between 60-90 minutes in duration).\nI may prioritize local connections, which provide favourable conditions for holding interviews (i.e., people are more willing to show things that can not be conveyed through a screen, and the pre- and post-interview phases provide meaningful insight). This may introduce bias in that I may obtain more in-depth and nuanced information from local initiatives than those occurring abroad. This can be mitigated by travelling to conduct interviews in person, however the costs of travel may introduce their own biases favouring cases that are easier to reach.", "crumbs": [ - "Interview Protocol" + "Case Selection" ] }, { - "objectID": "notes/maelstrom-readings.html", - "href": "notes/maelstrom-readings.html", - "title": "Maelstrom reading notes", + "objectID": "context.html", + "href": "context.html", + "title": "Context", "section": "", - "text": "Initial overview of data harmonization procedures, using the Healthy Obesity Project (HOP) as an illustrative case.\nOutlines the technical apparatus, especially for DataShield, but also broadly describes the discursive process of arriving at a DataSchema that is both functional and flexible.\n\nThis description is quite broad and abstracy, seems somewhat ideal and aspirational.\n\nDescribes reliance on international standards, such as the International Labour Organization’s International Standard Classification of Occupations.\n\nIt seems like these are used as black boxes that encapsulate a series of tensions which epidemiologists are unconcerned with; in effect, they simplify the need for stretching the collaborative ties even further than they are already extended, they represent matters out of scope for deeper discursive engagement.\n\nIt is notable that they emphasize that it’s easy to set up and use DataShield and Maelstorm toolkits independently of university IT and that it can be run using RStudio installed on a basic laptop.\n\nMaybe look into the historical context (2013) and the evolving role of university IT in software selection.\n\nThe conclusion states that the HOP project was successful in its harmonization efforts, but does not go as far as to state that it produced meaningful findings as a result of harmonization.\n\nI may take some time to find and read studies that used these data to see what’s what.\nThis seems like the main one: https://doi.org/10.1186/1472-6823-14-9, but these other papers may or not not also be relevant:\n\nhttps://doi.org/10.1016/j.smhl.2021.100263\nhttps://doi.org/10.1007/s10654-014-9977-1\nhttps://doi.org/10.1530/EJE-14-0540\nhttps://doi.org/10.1007/S13679-020-00375-0\nhttps://doi.org/10.1093/eurpub/ckac061" + "text": "I am Zack Batist — a postdoctoral researcher at McGill University, in the School of Global and Public Health’s Department of Epidemiology, Biostatistics and Occupuational Health. I’m working with David Buckeridge, who leads the Covid-19 Immunity Task Force (CITF) Databank, to investigate data sharing in epidemiological research — with an emphasis on the practical and situated experiences involved in data sharing.\nThe CITF is a “data harmonization” initiative, which entails coordinating a systematic effort to align the information contained in datasets collected by distributed teams of epidemiologists. These efforts to integrate the records collected during various discrete studies are motivated by a desire to establish larger integrated datasets bearing greater statistical power and that facilitate comparison across cohorts. However, epidemiologists must reckon with the diversity of minor variations in data collection procedures, as well as ethico-legal concerns relating to the sharing of individual health records pertaining to human research subjects across numerous institutional and regional jurisdictions.\nAs a scholar of scientific practice, with a primary interest in data-sharing and the formation of information commons, data harmonization represents a fascinating mechanism through which scientists derive technical, administrative, social and epistemic frameworks to enhance the value of their collective endeavours in response to disciplinary needs, warrants, desires and expectations. This study therefore articulates the motivations for doing data harmonization, identifies how value is ascertained, and describes the strategies employed to achieve the desired goals — including perceived and actual challenges, setbacks, opportunities, realizations, and lessons learned.\nThis relates to my previous work that (a) explores tensions that arise when attempting to establish information commons in archaeology, specifically relating to inability to cope with a superficial perception of data’s stability and an intuitive understanding of their situated nature; and that (b) investigates how the open science movement attempts (and fails) to reshape practices relating to data sharing, integration and reuse. I continue in my approach that frames data-sharing — whether it occurs in relatively “closed” curcumstances between close colleagues, or as mediated by open data platforms among strangers — as comprising a series of collaborative commitments that govern who may contribute to and obtain value from the information commons, and in what ways.", + "crumbs": [ + "Context" + ] }, { - "objectID": "notes/maelstrom-readings.html#doiron2013", - "href": "notes/maelstrom-readings.html#doiron2013", - "title": "Maelstrom reading notes", + "objectID": "ethics-protocol.html", + "href": "ethics-protocol.html", + "title": "Ethics Protocol", "section": "", - "text": "Initial overview of data harmonization procedures, using the Healthy Obesity Project (HOP) as an illustrative case.\nOutlines the technical apparatus, especially for DataShield, but also broadly describes the discursive process of arriving at a DataSchema that is both functional and flexible.\n\nThis description is quite broad and abstracy, seems somewhat ideal and aspirational.\n\nDescribes reliance on international standards, such as the International Labour Organization’s International Standard Classification of Occupations.\n\nIt seems like these are used as black boxes that encapsulate a series of tensions which epidemiologists are unconcerned with; in effect, they simplify the need for stretching the collaborative ties even further than they are already extended, they represent matters out of scope for deeper discursive engagement.\n\nIt is notable that they emphasize that it’s easy to set up and use DataShield and Maelstorm toolkits independently of university IT and that it can be run using RStudio installed on a basic laptop.\n\nMaybe look into the historical context (2013) and the evolving role of university IT in software selection.\n\nThe conclusion states that the HOP project was successful in its harmonization efforts, but does not go as far as to state that it produced meaningful findings as a result of harmonization.\n\nI may take some time to find and read studies that used these data to see what’s what.\nThis seems like the main one: https://doi.org/10.1186/1472-6823-14-9, but these other papers may or not not also be relevant:\n\nhttps://doi.org/10.1016/j.smhl.2021.100263\nhttps://doi.org/10.1007/s10654-014-9977-1\nhttps://doi.org/10.1530/EJE-14-0540\nhttps://doi.org/10.1007/S13679-020-00375-0\nhttps://doi.org/10.1093/eurpub/ckac061" - }, - { - "objectID": "notes/maelstrom-readings.html#doiron2017", - "href": "notes/maelstrom-readings.html#doiron2017", - "title": "Maelstrom reading notes", - "section": "Doiron et al. (2017)", - "text": "Doiron et al. (2017)\n\nAn overview of the key software that facilitates data harmonization practices under Maelstrom, also briefly touched upon in Doiron et al. (2013).\nPage 1373 refers to graphical and programmatic interfaces and assumes certain roles and tasks associated with each.\nBriefly describes its use by the Canadian Longitudinal Study on Aging (CLSA), the Canadian Partnership for Tomorrow Project (CPTP) and InterConnect, primarily by describing the range and quantity of data that these systems manage in each case.\n\n\nOpal provides a centralized web-based data management system allowing study coordinators and data managers to securely import/export a variety of data types (e.g. text, nu merical, geolocation, images, videos) and formats (e.g. SPSS, CSV) using a point-and-click interface. Opal then converts, stores and displays these data under a standar dized model.\n\n\nMica is used to create websites and metadata portals for individual epidemiological studies or multi-study consor tia, with a specific focus on supporting observational co hort studies. The Mica application helps data custodians and study or network coordinators to efficiently organize and disseminate information about their studies and net works without significant technical effort." + "text": "Project Title: Articulating epidemiological data harmonization initiatives as practical and collaborative experiences\nSubmitted Materials: zackbatist.info/CITF-Postdoc/irb-docs.pdf\nPrincipal Investigator: Zachary Batist\nProtocol Number: 25-01-057\nSubmitted: 2025-01-30\nApproved:", + "crumbs": [ + "Ethics Protocol" + ] }, { - "objectID": "notes/maelstrom-readings.html#fortier2010", - "href": "notes/maelstrom-readings.html#fortier2010", - "title": "Maelstrom reading notes", - "section": "Fortier et al. (2010)", - "text": "Fortier et al. (2010)\n\nA very grandiose paper presenting the grand vision for DataSHaPER, which would eventually become Maelstrom.\n\nLots of co-authors!\n\nInvokes the pan-European EPIC project (European Prospective Investigation into Cancer and Nutrition), which faced numerous data synthesis challenges despite its proactive effort to coordinate work across numerous research centres.\n\n\nTwo complementary approaches may be adopted to support effective data synthesis. The first one principally targets ‘what’ is to be synthesized, whereas the other one focuses on ‘how’ to collect the required information. Thus: (i) core sets of information may be identified to serve as the foundation for a flexible approach to harmonization; or (ii) standard collection devices (questionnaires and stand ard operating procedures) may be suggested as a required basis for collection of information.\n\n\nDataSHaPER is an acronym for DataSchema and Harmonization Platform for Epidemiological Research.\n\n\nIn an ideal world, information would be ‘prospectively harmonized’: emerging studies would make use, where possible, of harmonized questionnaires and standard operating procedures. This enhances the potential for future pooling but entails significant challenges —- ahead of time -— in developing and agree ing to common assessment protocols. However, at the same time, it is important to increase the utility of existing studies by ‘retrospectively harmonizing’ data that have already been collected, to optimize the subset of information that may legitimately be pooled. Here, the quantity and quality of infor mation that can be pooled is limited by the heterogeneity intrinsic to the pre-existing differences in study design and conduct.\n\nCompares prospective and retrospective harmonizatiom, with the former being presented as ideal, and the latter being a pragmatic reconciliation in acknowledgement that the former is essentially impossible to achieve.\n\nDataSHaPER is strikingly similar to OCHRE:\n\nXML-based data structures\nGenesis of a generic and ultimately optional base-level schema that illustrates the kind of data that the data structure may hold in ways that are immediately recognizable to all practitioners (at OCHRE it was associations between contexts and finds)\nSeparate harmonization platform where users can edit and manipulate records and associations between them\n\n\n\nThe question ‘What would constitute the ultimate proof of success or failure of the DataSHaPER approach’ needs to be addressed. Such proof will necessarily accumulate over time, and will involve two fundamental elements: (i) ratification of the basic DataSHaPER approach; and (ii) confirmation of the quality of each individual DataSHaPER as they are developed and/or extended. An important indication of the former would be provided by the widespread use of our tools. However, the ultimate proof of principle will necessarily be based on the generation of replicable scientific findings by researchers using the approach. But, for such evidence to accumulate it will be essential to assure the quality of each individual DataSHaPER. Even if the fundamental approach is sound, its success will depend critically on how individual DataSHaPERs are constructed and used. It seems likely that if consistency and quality are to be assured in the global development of the approach, it will be necessary for new DataSHaPERs to be formally endorsed by a central advisory team." + "objectID": "ethics-protocol.html#recruitment-and-consent", + "href": "ethics-protocol.html#recruitment-and-consent", + "title": "Ethics Protocol", + "section": "Recruitment and consent", + "text": "Recruitment and consent\nWill this study involve recruitment of human study participants?\n\nYes\nNo\n\nHow are potential study participants identified and/or recruited to the study? Explain how potential participants are identified or introduced to the study, and who will recruit participants. Will the investigator/s require any special permissions or access to the target population e.g. clinic access, patient registries or records, mailing lists, community access?\nThrough consultation with key community stakeholders, the principal investigator will devise a list of prospective projects to serve as cases.1 The principal investigator will then write to the leaders of these projects inviting them to participate in the study. These invitations to project leaders will explain the project’s purpose and scope, and will encourage the recipient to reply with any questions or concerns they may have. If they accept the invitation, the principal investigator will then work with project leaders to devise a list of individuals who may serve as interview candidates based on their roles in the project. The principal investigator will be clear with project leaders that they should not pressure those who work for them to participate in the study, and that individuals’ participation should be treated as separate from their regular duties; if project leaders cannot or will not abide by this condition, their project will be rejected as a prospective case. The principal investigator will then write to the recommended individuals to introduce the study and its objectives and to invite them to participate as research subjects. If these individuals express interest in participating in the study, the principal investigator will schedule a time to sit for an interview. Some interviews may be conducted remotely using internet-based video conferencing software, depending on participants’ availability.\n1 See the case selection protocol for further details.Describe the consent process. If alternate processes for seeking consent are planned (e.g. verbal, online, waiver), please provide a rationale and outline the procedure of obtaining and documenting consent and/or assent, where applicable.\nOnce individuals express their interest in participating, participants will provided with an informed consent document that outlines in more detail the goals of the study, the roles of the participant, how they will be recorded, how data pertaining to them will be retained, and the potential risks and benefits pertaining to their involvement. This document will also describe how participants’ personally identifiable information will be managed and used. Participants will be asked to read and sign the document in order to obtain written informed consent. For interviews that will be held remotely using internet-based video conferencing software, participants will asked to send their signed informed consent documents in PDF format to the principal investigator. At the start of each interview the researcher will reiterate participants’ rights and ask them to orally reaffirm their consent before proceeding.\nIs there a relationship between the study participants and the person obtaining consent and/or the principal investigator/s?\n\nYes\nNo\n\nIf yes, please explain the nature of the relationship, and outline the steps that will be taken to avoid the perception of undue influence.\nOne project that serves as a case in this research is the Covid-19 Immunity Task Force (CITF), which the principal investigator currently serve as postdoctoral researcher. Some of the participants will therefore be his colleagues. The interviews will remain structured and limited in scope, and will not touch on matters relating to other aspects of their work. Moreover, prior to and throughout their involvement as research participants, frank and open discussion will be encouraged regarding collective expectations and to articulate the boundaries between participants’ relationships with the principal investigator as colleagues and as research subjects.\nThe principal investigator will consult with David Buckeridge, who leads the CITF, as one key community stakeholder to help devise a shortlist of projects that may serve as prospective cases.", + "crumbs": [ + "Ethics Protocol" + ] }, { - "objectID": "notes/maelstrom-readings.html#fortier2011", - "href": "notes/maelstrom-readings.html#fortier2011", - "title": "Maelstrom reading notes", - "section": "Fortier et al. (2011)", - "text": "Fortier et al. (2011)\nThis paper responds to Hamilton et al. (2011), which presents an effort to devise a standardized nomenclature. The response is basically to advocate for a more flexible approach, rather than a stringent one promoted by Hamilton et al. (2011). It draws extensively from concepts published in the foundational paper by Fortier et al. (2010).\n\nTwo complementary approaches to harmonization may be adopted to support effective data synthesis or comparison across studies. The first approach makes use of identical data collection tools and procedures as a basis for harmoni zation and synthesis. Here we refer to this as the ‘‘stringent’’ approach to harmonization. The second approach is con sidered ‘‘flexible’’ harmonization. Critically, the second ap proach does not demand the use of identical data collection tools and procedures for harmonization and synthesis. Rather, it has to be based on sound methodology to ensure inferential equivalence of the information to be harmonized. Here, standardization is considered equivalent to stringent harmonization. It should, however, be noted that the term standard is occasionally employed to refer to common con cepts or comparable classification schemes but does not necessarily involve the use of identical data collection tools and procedures (12, 13).\n\nThis directly parallels the distinction made in Fortier et al. (2010) between “ideal” prospective and more pragmatic retrospective approaches to data harmonization.\n\nSynthesis of data using a flexible harmonization approach may be either prospective or retrospective. To achieve flexible prospective harmonization, investigators from several studies will agree on a core set of variables (or measures), compatible sets of data collection tools, and standard operating procedures but will allow a certain level of flexibilit in the specific tools and procedures used in each study (16, 17). Retrospective harmonization targets synthesis of information already collected by existing legacy studies (15, 18, 19). As an illustrative example, using retrospective harmonization, researchers will define a core set of variables (e.g., body mass index, global level of physical activity) and, making use of formal pairing rules, assess the potential for each participating study to create each variable (15). The ability to retrospectively harmonize data from existing studies facilitates the rapid generation of new scientifi knowledge.\n\nI wonder why there is no example provided for prospective data harmonization. Is it because it is ideal and not realistic? I’d argue that it is simply what occurs within individual projects." + "objectID": "ethics-protocol.html#risk-benefit-assessment", + "href": "ethics-protocol.html#risk-benefit-assessment", + "title": "Ethics Protocol", + "section": "Risk-benefit assessment", + "text": "Risk-benefit assessment\nDescribe the foreseeable risks to study participants. What risks are attributable to the research, including cumulative risks? Which risks are participants normally exposed to in the course of their clinical care or in their daily activities as they relate to the research questions/objectives?\nParticipation in this study does not involve any physical, psychological or legal risks. However, the principal investigator will be asking participants to share detailed information about their work practices and work relationships, and public association with their responses may potentially disrupt or complicate their professional reputations. To mitigate against this potential harm, the principal investigator will give participants the option to render their responses confidential.\nWhat procedures are in place to monitor and assess participant safety for the duration of the study?\nPrior to each interview, and as part of the procedure for obtaining informed consent, participants will be asked about whether they want to render their responses confidential. Immediately after each interview, participants will be given an additional opportunity to reflect on their responses, and will be prompted to either confirm or alter their decision regarding whether or not to maintain confidentiality. Furthermore, for participants who have not requested that their responses be treated as confidential immediately before and after the interview, a follow-up email will be sent one week after the interview to reiterate the option to render their responses confidential.\nDescribe the potential benefits of the study for: (1) the study participants; (2) the population under investigation, and (3) the field of research.\nThis study contributes to the development of better epidemiological data-sharing infrastructures by articulating social, collaborative and discursive aspects of data harmonization, and how these factors relate to, overlap with or conflict with technical, institutional and epistemic factors. By explicitly framing data harmonization as a social and collaborative activity, we may devise more effective data-sharing infrastructures that better support the contextualization of data and enhance their value in contexts of data reuse. This work therefore poses new ways to document how epidemiologists mobilize distributed records in the constitution of synthetic knowledge and helps develop practical solutions that enable greater reflexivity. Additionally, this study may directly benefit participants by framing the experiences they address during interviews in ways that they might not have otherwise considered, thereby encouraging greater reflexivity in their own work.", + "crumbs": [ + "Ethics Protocol" + ] }, { - "objectID": "notes/maelstrom-readings.html#fortier2017", - "href": "notes/maelstrom-readings.html#fortier2017", - "title": "Maelstrom reading notes", - "section": "Fortier et al. (2017)", - "text": "Fortier et al. (2017)\nExplicit statement regarding the rationale and presumed benefits of harmonization right in the first paragraph:\n\nThe rationales underpinning such an approach include ensuring: sufficient statistical power; more refined subgroup analysis; increased exposure hetero geneity; enhanced generalizability and a capacity to under take comparison, cross validation or replication across datasets. Integrative agendas also help maximizing the use of available data resources and increase cost-efficiency of research programmes.\n\nSummarized in bullet points:\n\nensuring sufficient statistical power\nmore refined subgroup analysis\nincreased exposure heterogeneity\nenhanced generalizability\na capacity to undertake comparison, cross validation or replication across datasets.\nmaximizing the use of available data resources\nincrease cost-efficiency of research programmes\n\nClearly defines harmonization and its benefits:\n\nEssentially, data harmonization achieves or improves comparability (inferential equivalence) of similar measures collected by separate studies.\n\nAdds an additional argument for retrospective harmonization on top of prior discussion of retrospective/prospective approaches (cf. Fortier et al. (2010); Fortier et al. (2011)):\n\nRepeating identical protocols is not necessarily viewed as providing evidence as strong as that obtained by exploring the same topic but using different designs and measures.\n\nAlso relates retrospective harmonization from systematic meta reviews. In fact, the paper basically responds to calls for more structured guidelines for data harmonization, similar to those that had been produced to support structured metareviews in the years prior to this publication. The authors identify several papers that have done similar guidelines or reports on harmonization practices, which they claim are too broad. Those papers include:\n\nRolland et al. (2015)\n\nhttps://doi.org/10.1093/aje/kwv133\n\nSchaap et al. (2011)\n\nhttps://doi.org/10.1186/1471-2474-12-272\n\nBennett et al. (2011)\n\nhttps://doi.org/10.1002/gepi.20564\n\nHohmann et al. (2012)\n\nThis paper reports the findings of a systematic inquiry made to data harmonization initiatives, whose data comprise responses to a questionnaire. The findings indicate that procedures were more attentively follows during earlier stages, such as when matching and aligning available data with the project’s designated scope. However, procedures were less sound with regards to documenting procedures, validating the results of data processing, and dissemination strategy. There is a notable division between work that occurs before and after people actually begin handling the data, which indicates a tension between aspirational plans and tangible experiences.\n\nRespondents were asked to delineate the specific procedures or steps undertaken to generate the harmonized data requested. Sound procedures were generally described; however, the terminologies, sequence and technical and methodological approaches to these procedures varied considerably. Most of the procedures mentioned were related to defining the research questions, identifying and selecting the participating studies (generally not through a systematic approach), identifying the targeted variables to be generated and processing data into the harmonized variables. These procedures were reported by at least 75% of the respondents. On the other hand, few reported steps related to validation of the harmonized data (N=4; 11.8%), documentation of the harmonization process (N=5; 14.7%) and dissemination of the harmonized data outputs (N=2; 5.9%).\n\nThe paper summarizes some specific “potential pitfalls” reported by respondents to their survey:\n\nensuring timely access to data;\nhandling dissimilar restrictions and procedures related to individual participant data access;\nmanaging diversity across the rules for authorship and recognition of input from study-specific investigators;\nmobilizing sufficient time and resources to conduct the harmonization project;\ngathering information and guidance on harmonization approaches, resources and techniques;\nobtaining comprehensive and coherent information on study-specific designs, standard operating procedures, data collection devices, data format and data content;\nunderstanding content and quality of study-specific data;\ndefining the realistic, but scientifically acceptable, level of heterogeneity (or content equivalence) to be obtained;\ngenerating effective study-specific and harmonized datasets, infrastructures and computing capacities;\nprocessing data under a harmonized format taking into account diversity of: study designs and content, study population, synchronicity of measures (events measured at different point in time or at different intervals when repeated) etc;\nensuring proper documentation of the process and decisions undertaken throughout harmonization to ensure transparency and reproducibility of the harmonized datasets;\nmaintaining long-term capacities supporting dissemination of the harmonized datasets to users.\n\nIt’s not made clear how these responses were distributed among respondents.\nThe authors then identify several absolute essential requirements needed to achieve success:\n\nCollaborative framework: a collaborative environment needs to be implemented to ensure the success of any harmonization project. Investigators involved should be open to sharing information and knowledge, and investing time and resources to ensure the successful implementation of a data-sharing infrastructure and achievement of the harmonization process.\nExpert input: adequate input and oversight by experts should be ensured. Expertise is often necessary in: the scientific domain of interest (to ensure harmonized variables permit addressing the scientific question with minimal bias); data harmonization methods (to support achievement of the harmonization procedures); and ethics and law (to address data access and integration issues).\nValid data input: study-specific data should only be harmonized and integrated if the original data items collected by each study are of acceptable quality.\nValid data output: transparency and rigour should be maintained throughout the harmonization process to ensure validity and reproducibility of the harmonization results and to guarantee quality of data output. The common variables generated necessarily need to be of acceptable quality.\nRigorous documentation: publication of results generated making use of harmonized data must provide the information required to estimate the quality of the process and presence of potential bias. This includes a description of the: criteria used to select studies; process achieved to select and define variables to be harmonized; procedures used to process data; and characteristics of the study-specific and harmonized dataset(s) (e.g. attribute of the populations).\nRespect for stakeholders: all study-specific as well as network-specific ethical and legal components need to be respected. This includes respect of the rights, intellectual property interests and integrity of study participants, investigators and stakeholders.\n\nThe authors describe how they arrived at guidelines following the results of this study:\n\nA consensus approach was used to assemble information about pitfalls faced during the harmonization process, establish guiding principles and develop the guidelines. The iterative process (informed by workshops and case studies) permitted to refine and formalize the guide lines. The only substantive structural change to the initial version proposed was the addition of specific steps relating to the validation, and dissemination and archiving of harmonized outputs. These steps were felt essential to em phasize the critical nature of these particular issues.\n\nThe paper outlines a checklist of stages that data harmonization initiatives need to go through to produce ideal outcomes. For each task, they describe a scenario in which the task can be said to be complete, whhich resembles an ideal outcome. This is described in the paper, summarized in a table, and more comprehensively documented in the supplementary materials.\nAlso worth noting, this paper includes a list of harmonization initiatives that I may consult when selecting cases. I’m not quite sure how useful it will be since the findings don’t really break down the distribution of responses in any detail, but maybe the authors have done this analysis and not published it." + "objectID": "ethics-protocol.html#privacy-and-confidentiality", + "href": "ethics-protocol.html#privacy-and-confidentiality", + "title": "Ethics Protocol", + "section": "Privacy and confidentiality", + "text": "Privacy and confidentiality\nPlease describe the measures in place for meeting confidentiality obligations. How is information and data safeguarded for the full cycle of the study: i.e. during its collection, use, dissemination, retention, and/or disposal?\nThe specific circumstances that frame each case are significant factors that will shape the findings, and the study will benefit from participants’ consent to associate their identities with their interview responses. However, they may choose to render their interview responses confidential while maintaining their role a research participant. Participants may change their decision regarding whether or not to associate their identities with their interview responses up to one week after the interview, at which point the principal investigator will begin transcribing and analyzing the records pertaining to the interview. Participants will be reminded about this option immediately after the interview and one week following the interview via email.\nThe study engages with a relatively small community, and there is minimal social risk that others may be able to determine the identities of those whose research practices and professional relationships are being documented, even if their responses are rendered confidential. To address this issue, if any single participant from a case decides to render their responses confidential, the responses of all participants pertaining to that case will be rendered confidential as well, and the identify of the project that serves as the case will be obfuscated too.\nIn situations whereby a participant decides to render their responses confidential, or has their responses rendered confidential due to another member of their case deciding to do so, only the principal investigator will have access to records containing un-obfuscated information that may identify them. These un-obfuscated records, which may include audio and video records of interview sessions, as well as unedited transcripts and textual notes containing information that may reveal the participants’ identities, will be kept in secure and encrypted media, and destroyed within five years of concluding the study, which provides sufficient time to revisit the data and produce additional research outputs. However, edited transcripts scrubbed of all information that may identify research participants may be kept, published and archived. If participants consent to maintaining association between their responses and their identities, un-obfuscated records and transcripts may be kept, published and archived.\nThe study is committed to adhering to fundamental data security practices, including those specified in McGill University’s Cloud Directive which regulates the curation of sensitive research data. Physical records will be kept in a locked drawer in secure workspaces, either at McGill University’s School of Public and Global Health or at the principal researcher’s home office. Digital records will be stored on encrypted and password-protected drives and on secure servers approved or managed by McGill University under the Cloud Directive.2\n2 Refer to the data management plan for further details on how information pertaining to this project will be collected, curated and shared.Recordings of remote interviews conducted using internet-based video conferencing software will be made using the software’s built-in recording tools. Only video conferencing software approved by the Cloud Directive will be used. Participants will be instructed to disable their microphones or video cameras prior to initiating recording if they have opted to not be recorded through these media. The researcher will record all media locally and refrain from using any cloud services to store or modify the records which the video conference software may provide.\nIf a contracted cloud/storage service provider or online survey tool is used, provide information on the service provider’s security and privacy policy, location of its servers, data ownership, and what happens to the stored data after the contract is terminated. For more information, please consult the University’s directive.\nThe study uses file-sharing software hosted by the Covid-19 Immunity Task Force at McGill University’s School of Public and Global Health to backup all files maintained for this study. These backups will include files containing information that might reveal participants’ identities. The software used to manage these backups is managed by McGill University and has been approved for storing sensitive research data by the Cloud Directive.\nThe study may use the secure GitLab instance hosted by the surveillance lab within the Clinical and Health Informatics Research Group at McGill University to store and track changes to sensitive research data. This software is managed by McGill University and has been approved for storing sensitive research data by the Cloud Directive.\nThe study maintains a website where the principal investigator shares documentation that supports the study and reflects on the work as it progresses. This is hosted using GitHub Pages and is backed up using Dropbox. No sensitive research data will pass through these services.\nRecordings of remote interviews conducted using internet-based video conferencing software will be made using the software’s built-in recording tools. Only video confering software approved by the Cloud Directive will be used. Participants will be instructed to disable their microphones or video cameras prior to initiating recording if they have opted to not be recorded through these media. The researcher will record all media locally and refrain from using any cloud services to store or modify the records which the video conference software may provide.\nPlease explain any reasonable and foreseeable disclosure requirements (e.g. disclosure to third parties such as government agencies or departments, community partners in research, personnel from an agency that monitors research, research sponsor, the REB/IRB, or regulatory agencies).\nNo disclosure requirements are foreseen.\nIf there are plans for retaining participant and/or study data for future use, please describe the context for its use, requirements for potentially re-contacting study participants and consent, and how the data will be stored and maintained for the long term.\nResearch data will be published in compliance with ethical standards for sharing open social science research data. Records that contain personally-identifying information pertaining to participants who have requested that their responses be rendered confidential and to those who have had their responses rendered confidential due to another member of their case deciding to do so will not be published.\nThe database containing codings, memos and trends deriving from qualitative data analysis will be published only after being scrubbed of all personally-identifying information pertaining to participants who have requested that their responses be rendered confidential and to those who have had their responses rendered confidential due to another member of their case deciding to do so.\nThe principal investigator may follow up with the leaders of the data-sharing initiatives that serve as cases for this project to share the results with them and to present them with constructive feedback deriving from the study’s findings. The principal investigator may also invite select participants to collaborate on a position paper advocating for reforms based on the project’s findings.\nSecondary use of data studies: if the study involves data linkage, please describe the data that will be linked and the likelihood that identifiable information will be created through the linkage.\nThis project does not rely on data deriving from other studies. The data may be reused in related work being undertaken under the same grant and by those who access the openly accessible data after they are published.", + "crumbs": [ + "Ethics Protocol" + ] }, { - "objectID": "notes/maelstrom-readings.html#bergeron2018", - "href": "notes/maelstrom-readings.html#bergeron2018", - "title": "Maelstrom reading notes", - "section": "Bergeron et al. (2018)", - "text": "Bergeron et al. (2018)\nThe authors reference the drive for efficiency as a motivating factor that drives open data:\n\nHowever, many cohort databases remain under-exploited. To address this issue and speed up discovery, it is essential to offer timely access to cohort data and samples.\n\nHowever the paper is actually about the need for better and more publicly accessible documentation about data.\nThe authors state that catalogues exist to promote discoverability of data and samples and to answer the data documentation needs of individual studies.\nThey draw attention to the importance of catalogues in research networks (analyzing data across studies), which establish portals that document “summary statistics on study subjects, such as the number of participants presenting specific characteristics (e.g. diseases or exposures)”.\nThe authors outline several challenges that inhibit or limit the potential value of catalogues:\n\nThe quality of a catalogue directly depends on the quality and comprehensiveness of the study-specific information documented. But, maintaining and providing access to understandable and comprehensive documentation to external users can be challenging for cohort investigators, and require resources not always available, particularly for the very small or long-established studies. In addition, the technical work required to build and maintain a catalogue is particularly demanding. For example, gathering comprehensive and comparable information on study designs necessitates the implementation of rigorous procedures and working in close collaboration with study investigators. Manual classification of variables is also a long and a tedious process prone to human error. Moreover, the information collected needs to be regularly revised to update metadata with new data collections. These challenges, among others, can lead to the creation of catalogues with partial or disparate information across studies, documenting limited subsets of variables (e.g. only information collected at baseline) or including only studies with data dictionaries available in a specific language or format.\n\nBullet point summary:\n\nA catalogue’s quality depends on the quality and comprehensiveness of documentation provided by individual studies\nCohort investigators, i.e. leaders of individual studies, are under-equipped to provide such comprehensive documentation\n\nDo they just need material support? Or also guidance on how to do it, what factors to account for, etc?\n\nTechnical work for building and maintaining a catalogue is demanding\n\nI’m not sure if they example they provide to illustrate these challenges aligns with what I would call “technical work”; they refer to precise and detailed documentation in direct consultation with individual study maintainers, and I suppose the discussion about and documentation of methodological details is technical in that it corresponds with the work that was actually done on the ground, using data collection and processing instruments\n\nClassification of variables is a long and tedious process\n\nWhat makes it long and tedious? This isn’t really specified\nThey recite that this is prone to human error, but I wonder what successful or error-ful (?) outcomes would look like and how they would differ\n\nThe information needs to be regularly revised and updated\n\nThe authors recommendations to resolve these concerns:\n\nHowever, to truly optimize usage of available data and leverage scientific discovery, implementation of high quality metadata catalogues is essential. It is thus important to establish rigorous standard operating procedures when developing a catalogue, obtain sufficient financial support to implement and maintain it over time, and where possible, ensure compatibility with other existing catalogues.\n\nBullet point summary:\n\nestablish rigorous standard operating procedures when developing a catalogue\nobtain sufficient financial support to implement and maintain it over time\nwhere possible, ensure compatibility with other existing catalogues" + "objectID": "ethics-protocol.html#managing-conflicts-of-interest", + "href": "ethics-protocol.html#managing-conflicts-of-interest", + "title": "Ethics Protocol", + "section": "Managing conflicts of interest", + "text": "Managing conflicts of interest\nConflicts of interest do not imply wrong-doing. It is the responsibility of the investigator to determine if any conflicts apply to any person/s involved in the design and/or conduct of the research study or any member of their immediate family. Disclose all contracts and any conflicts of interest (real, perceived, or potential) relating to this research project. Conflict of interest may also arise with regard to the disclosure of personal health information.\n\nNot applicable. There are no conflicts of interest to disclose.\nYes, there are conflicts of interest to disclose.\n\nIf yes, please describe the conflicts of interest (real, potential, and perceived), and the procedures for managing declared conflicts. Not applicable.", + "crumbs": [ + "Ethics Protocol" + ] }, { - "objectID": "notes/maelstrom-readings.html#bergeron2021", - "href": "notes/maelstrom-readings.html#bergeron2021", - "title": "Maelstrom reading notes", - "section": "Bergeron et al. (2021)", - "text": "Bergeron et al. (2021)\nIdentifies several registries of relevant cohorts, but notes that they face challenges getting the data together. Namely, issues concerning institutional policies concerning data-sharing, lack of open access to cohort data and to documentation about the data, the data’s complexity which makes it difficult to harmonize across studies, and lack of access to funding, secure data environments, and specialized expertise and resources.\nThe Research Advancement through Cohort Cataloguing and Harmonization (ReACH) initiative was establihed in collaboration with Maelstrom to overcome some of these barriers in the context of Developmental Origins of Health and Disease (DOHaD) research.\nThe authors briefly summarize some projects that rely on ReACH data, and provide a more comprehensive table of ongoing and unpublished work.\nIn the supplementary materials, the authors also include an illustrative example specific tasks, decisisions and actions that one might got through when using ReACH data. It is a broad-level but fairly sober account of how one would navigate the catalogue and engage with collaborators." + "objectID": "index.html", + "href": "index.html", + "title": "CITF-Postdoc", + "section": "", + "text": "This website serves as a hub for my postdoctoral research at McGill University’s Covid-19 Immunity Task Force Databank.\nThe project is concerned with articulating social, collaborative and discursive aspects of epidemiological data-sharing initiatives, and how they relate to, overlap with or conflict with technical, institutional and epistemic factors.\nThis website hosts a series of preparatory protocols that structure the project, as well as notes about key concepts and reflections on the progress of work. Please keep in mind that this is a continually evolving site and its contents may change as the project goes on. All content is hosted and tracked at github.com/zackbatist/CITF-Postdoc.\nHere’s an overview of what’s on this site:\nContext: My motivations for doing this work and the circumstances that surround the establishment of the project.\nResearch Protocol: Outlines the project’s overall vision and contextualizes it in relation to specific objectives.\nCase Selection: Articulates the parameters that inform how cases are selected.\nEthics Protocol: Specifies ethical considerations, including risks of harm and strategies for mitigating them.\nInterview Protocol: The questions I will be asking research participants, including the rationale for asking them.\nData Management: Procedures that circumscribe collection, management and curation of research data.\nQDA Protocol: The code system, memoing guidelines, and specific QDA procedures.\nGlossary: A series of key terms and their definitions, with reference to the literature and expanded notes about their meanings.\nNotes: Some semi-structured ideas that situate my work in relation to extant literature.\nBlog: Updates and reflections on key events, or general thoughts I wish to share.\nGitHub: A link to the GitHub repository where this website’s files are hosted.\nBib: A biblatex file containing a continually-updated list of sources cited in all documents hosted on this website.\nRSS: RSS feed you can use to subscribe to the blog.", + "crumbs": [ + "Home" + ] }, { - "objectID": "notes/maelstrom-readings.html#wey2024", - "href": "notes/maelstrom-readings.html#wey2024", - "title": "Maelstrom reading notes", - "section": "Wey and Fortier (2024)", - "text": "Wey and Fortier (2024)\nAn overview of the harmonization procedires applied in CanPath and MINDMAP. Authored by two members of the Maelstrom team, but no one from these initiatives.\nAt first, comes across as another broad-level overview of processes. But, as is elicited in the conslusion, the paper highlights some subtle divergent approaches, some of which are relevant to my project and I picked up here.\nInteresting bit about use of centralized systems or systems that are more conducive to end-users’ individual workflows.\n\nIn both illustrative projects, information about participating studies and data collected was gathered and made available on the central data server. In the CanPath BL-HRFQ, harmonization documentation and processing scripts were held centrally and only updated by Maelstrom Research. In MINDMAP, because multiple groups simultaneously worked on generating harmonization processing scripts for different areas of information, working versions of the DataSchema and processing scripts were held on a GitHub, allowing for better version control and dynamic updating of scripts by multiple remote data harmonizers.\n\nMore about the balance being struck at MINDMAP between institutional and popular tech and documentation platforms:\n\nIn the MINDMAP project, R markdown documents with applied harmonization scripts (Figure 13.1b) and any comments were preserved in the GitHub repository, which is also publicly accessible. Furthermore, summary statistics for MINDMAP variables are only available to approved data users through the secure server, as harmonized datasets are only intended for use within the MINDMAP network. Overviews of the harmonization process and outputs have also been published as open-access, peer-reviewed articles for both projects (Fortier et al. 2019; Wey et al. 2021).\n\nBit about existing collaborative ties making things much easier:\n\nthe prospective coordination and unu sually high ability for the harmonization team (Maelstrom Research) to work directly with study data managers on recently collected and documented study data resulted in high standardization and ability to resolve questions.\n\nData curation as an explicitly creative task, involving impactful decisions. Interesting that this was unexpected enough to warrant inclusion as a key challenge:\n\nIn MINDMAP, these differences were clearly documented in comments about harmonization for each source dataset to allow researchers to decide how to use the harmonized variable. In-depth exploration of the best statistical methods to harmonize these types of measures to maintain integrity of content while minimizing loss of information and methodological bias are important and active areas of research (Griffith et al. 2016; Van den Heuvel and Griffith 2016). The approach taken in this case was to apply simpler harmonization meth ods (i.e. rescaling) and document the transformation, leaving investigators the flexibility to further explore and analyze the harmonized datasets as appropriate for their research questions.\n\nAs with the above excerpt, documentation was considered as a viable mode of resolving or accounting for significant discrepancies:\n\nThe papers describing the harmonization projects attempt to highlight these considerations, for example, providing some comparison of demographics of the harmonized populations against the general populations from which they were drawn (Dummer et al. 2018; Fortier et al. 2019; Wey et al. 2021), listing sources of study-specific heterogeneity in the harmonized datasets to consider (Fortier et al. 2019), and pointing users to individual study documentation where more information on weights to use for analysis should be con sidered (e.g. Wey et al. 2021).\n\nThe bit about use of github was rationalized as a way of facilitating collaboration across insitutional boundaries. They refer to R and RMarkdown being open standards as the main reason, but I wonder if a part of it is that GitHub, being a non-institutional platform, was easier to use from an oboarding perspective:\n\nIn MINDMAP, due to the need for multiple international groups to work simultaneously and flexibly on the harmonization processing and frequently evolving versions of study-specific harmonization scripts, scripts for harmonization processing were written and applied entirely through R markdown in an RStudio interface, and the DataSchema and R markdown versions were maintained and frequently updated in a GitHub repository.\n\nPerhaps ironically, the private company and platform may have been used due to the strength of pan-european collaborative ties that institutions may not be able to keep up with. Whereas in Canada, with centralized project-oriented work, it may be much easier to enforce adoption of centralized tooling. This is just speculation." + "objectID": "notes.html", + "href": "notes.html", + "title": "Notes", + "section": "", + "text": "Modified\n\n\nTitle\n\n\nCategories\n\n\n\n\n\n\nFeb 4, 2025\n\n\nPotential cases\n\n\ncases, brainstorming\n\n\n\n\nFeb 14, 2025\n\n\nMethodology notes\n\n\nreading, general thoughts\n\n\n\n\nJan 28, 2025\n\n\nMaelstrom reading notes\n\n\nreading\n\n\n\n\n\nNo matching items", + "crumbs": [ + "Notes" + ] }, { - "objectID": "notes/maelstrom-readings.html#doiron2013a", - "href": "notes/maelstrom-readings.html#doiron2013a", - "title": "Maelstrom reading notes", - "section": "Doiron, Raina, and Fortier (2013)", - "text": "Doiron, Raina, and Fortier (2013)\nThis paper summarizes what was discussed at a workshop bringing together stakeholders who would contribute to two large data harmonization initiatives: the Canadian Longitudinal Study on Aging (CLSA) and the Canadian Partnership for Tomorrow Project (CPTP). It is therefore representative of plans and challenges that were held at an early stage when collaborations were being established.\nThe authors identify series of reasons for linking data, which I summarize here:\n\nMaximizing potential of disparate information resources\n\n\nenriching study datasets with additional data not being collected directly from study par ticipants\noffer vital information on health outcomes of participants\nvalidate self-reported information\n\n\nDrawing maximum value from data produced from public expenditure\n\n\noffers a cost-effective means to maximize the use of existing publicly funded data collections\n\n\nDevelop interdisciplinary collaborative networks\n\n\nby combining a wide range of risk factors, disease endpoints, and relevant socio-economic and biological measurements at a population level, linkage lays the groundwork for multidisciplinary health-research initiatives, which allow the exploration of new hypotheses not foreseeable using independent datasets\n\n\nEstablish long-lasting infrastructure and instill a collaborative culture\n\n\nLast, a coordinated pan-Canadian cohort-to-administrative linked database would establish legacy research infrastructures that will better equip the next generation of researchers across the country\n\nThe authors use the term “data linkage”:\n\nData linkage is “the bringing together from two or more different sources, data that relates to the same individual, family, place or event”. When linking data at the individual level, a common identifier (or a combination of identifiers) such as a personal health number, date of birth, place of residence, or sex, is used to combine data related to the same person but found in separate databases. Data linkage has been used in a number of research fields but is an especially valuable tool for health research given the large amount of relevant information collected by institutions such as governments, hospitals, clinics, health authorities, and research groups that can then be matched to data collected directly from consenting individuals participating in health research.\n\nThis is distinct from harmonization in that it is not meant to combine data with similar scope and schematic structure, but rather to relate information collected under various domains so that they could be more easily queried in tandem. I imagine this as reminiscient of establishing links between tables in a relational database.\nThe authors identify the open-endedness of the linked data as a unique challenge, without elaborating on this point:\n\nCLSA/CPTP-to-AHD linkage also poses unique challenges in that, in contrast to more traditional requests to link data to answer one-off research questions, it aims to establish a rich data repository that will allow investigators to answer a multitude of research questions over time.\n\nThe workshop participants established a 5-point plan:\n\nbuild strong collaborative relationships between stakeholders involved in data sharing (e.g., researchers, data custodians, and privacy commissioners);\nidentify an entity which could provide overall leadership as well as individual “champions” within each province;\nfind adequate and long-term resources and funding;\nclarify data linkage and data-sharing models and develop a common framework within which the data linkage process takes place; and\ndevelop a pilot project making use of a limited number of linked variables from participating provinces\n\nThe second point, about identifying “champions”, is kind of interesting, and I’d like to know more about what qualities these people were expcted to have, their domains of expertise, their collaborative/soft or technical skills, and how this plays into access to funds and general governance structures\nNeed to look at Roos, Menec, and Currie (2004), which they cite in the conclusion, specifically with reference to the aspiration to develop “information rich environments”. Seems like it is a primary source for the background on linked data in Manitoba and Australia." + "objectID": "notes/methodology-notes.html", + "href": "notes/methodology-notes.html", + "title": "Methodology notes", + "section": "", + "text": "This document is an overview of methodological topics and concerns. It is a place where I think through and justify my methodological decisions, and identify the methods and procedures through which I implement them." }, { - "objectID": "notes/maelstrom-readings.html#fortier2023", - "href": "notes/maelstrom-readings.html#fortier2023", - "title": "Maelstrom reading notes", - "section": "Fortier et al. (2023)", - "text": "Fortier et al. (2023)\nRelates harmonization to the FAIR principles, which has not really been featured much in arguments for harmonization in prior Maelstrom papers. Specifically, this paper frames harmonization as a necessary additional condition that enables FAIR data to be made useful; FAIR is deemed not enough.\n\nIn the following paper, we aim to provide an overview of the logistics and key ele ments to be considered from the inception to the end of collabo rative epidemiologic projects requiring harmonizing existing data.\n\nInteresting acronym/framework for defining research questions:\n\nThe research questions addressed and research plan proposed by harmonization initiatives need to be Feasible, Interesting, Novel, Ethical, and Relevant (FINER). (Cummings, Browner, and Hulley 2013)\n\nTable 1 lists examples of questions that could be addressed to help delineate analytical approach, practical requirements, and operations of a harmonization initiative. These are all questions that look toward or project specific goals, which then inform the strategies through which they may be achieved.\nThe supplementary materials include very detailed information about specific practices and objectives pertaining to the REACH initiative. However, it’s unclear how well this reflects any specific challenges experienced. In other words, I was hoping for something more candid.\nMoreover, this paper is also based on survey responses from 20 harmonization initiatives, but neither the findings resulting from the analysis, nor the data, are referenced or included. Is this the same as the one that informed Fortier et al. (2017)?\nLooming in the background of this paper is the DCC lifecycle model. However they do not cite DCC, or the field of digital curation in general. The DCC lifecycle model has always been presented as a guideline, sort of divorced from practical experience, or at least that’s how I’ve always understood it. Basically, and literally, a model for expected behaviours and interstitial outcomes. I think it would be interesting to explore (a) why the authors perceived need to present a model and (b) how they arrived at the phases and principles that are included in it. Is this tacit or common-sense thinking? Or was this directly informed by my concrete thinking from the field of digital curation?\nI should brush up on more recent work regarding the DCC, and specifically, critiques of it. Through a quick search, a few papers seem directly relevant:\n\nCox and Tam (2018)\nChoudhury, Huang, and Palmer (2020)\nRhee (2024)\n\nI think Cox and Tam (2018) may be especially relevant as a critique of the lifecycle metephor in a general sense. From the abstract, it seems like they identify various downsides pertaining to lifecycle models, specifically that they “mask various aspects of the complexity of research, constructing it as highly purposive, serial, uni-directional and occurring in a somewhat closed system.” I’m not sure how explicit the connection is, but I sense this ties into the general paradigm shift toward greater recognition of curation “in the wild”, as per Dallas (2016)." + "objectID": "notes/methodology-notes.html#significant-concepts-and-frameworks", + "href": "notes/methodology-notes.html#significant-concepts-and-frameworks", + "title": "Methodology notes", + "section": "Significant Concepts and Frameworks", + "text": "Significant Concepts and Frameworks\n\nMulticase Studies\nThese notes describe the features, affordances and limitations of case study research, and articules factors correspoding with variable kinds of case studies.\nI do notice a distinction between two schools of thought, which seem to be spearheaded by Stake and Yin. I generally favour Stake’s flexible approach, and it seems well aligned with other methodological works I’ve been reading (e.g. Abbott 2004; Charles C. Ragin and Becker 1992).\n\nStake’s Approach\nIn case-study research, cases represent discrete instances of a phenomenon that inform the researcher about it. The cases are not the subjects of inquiry, and instead represent unique sets of circumstances that frame or contextualize the phenomenon of interest (Stake 2006: 4-7).\nCases usually share common reference to the overall research themes, but exhibit variations that enable a researcher to capture different outlooks or perspectives on matters of common concern. Drawing from multiple cases thus enables comprehensive coverage of a broad topic that no single case may cover on its own (Stake 2006: 23). In other words, cases are contexts that ascribe particular local flavours to the activities I trace, and which I must consider to account fully for the range of motivations, circumstances and affordances that back decisions to perform activities and to implement them in specific ways.\nMoreover, the power of case study research derives from identifying consistencies that relate cases to each other, while simultaneously highlighting how their unique and distinguishing facets contribute to their representativeness of the underlying phenomon. Case study research therefore plays on the tensions that challenge relationships among cases and the phenomenon that they are being called upon to represent (C. C. Ragin 1999: 1139-1140).\nStake (2006: 4-6) uses the term quintain1 to describe the group, category or phenomenon that bind together a collection of cases. A quintain is an object, phenomenon or condition to be studied – “a target, not a bull’s eye” (Stake 2006: 6). “The quintain is the arena or holding company or umbrella for the cases we will study” (Stake 2006: 6). The quintain is the starting point for multi-case research.\n1 The term refers to a medieval jousting target: see https://en.wikipedia.org/wiki/Quintain_(jousting)According to Stake (2006: 6):\n\nMulticase research starts with the quintain. To understand it better, we study some of its single cases — its sites or manifestations. But it is the quintain we seek to understand. We study what is similar and different about the cases in order to understand the quintain better.\n\nStake (2006: 8) then goes on:\n\nWhen the purpose of a case is to go beyond the case, we call it an “instrumental” case study When the main and enduring interest is in the case itself, we call it “intrinsic” case study (Stake 1988). With multicase study and its strong interest in the quintain, the interest in the cases will be primarily instrumental.\n\nAbbott’s (2004: 22) characaterization of Small-N comparison is very reminiscient of Stake’s (2006) account of the case-quintain dialectic:\n\nSmall-N comparison attempts to combine the advantages of single-case analysis with those of multicase analysis, at the same time trying to avoid the disadantages of each. On the one hand, it retains much information about each case. On the other, it compares the different cases to test arguments in ways that are impossible with a single case. By making these detailed comparisons, it tries to avoid the standard critcism of single-case analysis — that one can’t generalize from a single case — as well as the standard criticism of multicase analysis — that it oversimplifies and changes the meaning of variables by removing them from their context.\n\nIt should be noted that case study research limits my ability to define causal relationships or to derive findings that may be generalized across the whole field of epidemiology. This being said, case study research allows me to articulate the series of inter-woven factors that impact how epidedemiological researchers coordinate and participate in data-sharing initiatives, while explicitly accounting for and drawing from the unique and situational contexts that frame each case.\nStake (2006: 23) recommends selecting between 4-10 cases and identifies three main criteria for selecting cases:\n\nIs the case relevant to the quintain?\nDo the cases provide diversity across contexts?\nDo the cases provide good opportunities to learn about complexity and contexts?\n\n\nFor qualitative fieldwork, we will usually draw a purposive sample of cases, a sample tailored to our study; this will build in variety and create opportunities for intensive study (Stake 2006: 24).2\n2 Though Yin (2014: 40-444) is dismissive of such use of the term “sample” since he sees case study research as only generalizable to similar situations, and not to a general population from which a sample is typically said to be drawn. I agree with this focus on concrete situations over Stake’s prioritization of theory-building as an end unto itself.\nStake’s (2010: 122) prioritizes doing research to understand something or to improve something, and I generally agree with his rationalization; research helps reframe problems and establish different decision options.\n\n\nYin’s Approach\nAccording to Yin (2014: 16), “a case study is an empirical inquiry that investigates a contemporary phenomenon (the”case”) in depth and within its real-world context, especially when the boundaries between phenomenon and context may not be clearly evident.”\nHe goes on to document some features of a case study: “A case study inquiry copes with the technically distinctive situation in which there will be many more variables of interest than data points, and as one result relies on multiple sources of evidence, with data needing to converge in a triangulating fashion, and as another result benefits from the prior development of theoretical propositions to guide data collection and analysis.” (Yin 2014: 17)\nYin (2014) is more oriented toward what he refers to as a realist perspective, which he pits against relativist and interpretivist perspectives (used interchangably, it seems), and which I might refer to as constructivist. He characterizes relativist perspectives as “acknowledging multiple realities having multiple meanings, with findings that are observer dependent”. His prioriting of a realist approach corresponds with the analysis by Yazan (2015), who compared Yin with Stake and Merriam. According to Yazan (2015: 137), Yin evades making statements about his epistemic commitments, and is characterized as post-positivist.\nYin (2014) is very concerned with research design in case study research He posits that, in a colloquial sense, “a research design is a logical plan for getting from here to there, where here may be defined as the initial set of questions to be answered, and there is some set of conclusions (answers) about these questions.” (Yin 2014: 28)\nYin distinguishes between a research design and a work plan. A research design deals with a logical problem, whereas a work plan deals with a logistical problem. Seems reminiscient of Brian Cantwell Smith’s distinction between skeletons and outlines.\nYin lists five components of a research design:\n\nA case study’s questions;\nits propositions, if any;\nits unit(s) of analysis;\nthe logic linking the data to the propositions; and\nthe criteria for interpreting the findings.\n\nInterestingly, I have been instinctively following these steps, and am currently hovering somewhere between components 3 and 4, while dipping back to 2 once in a while too.\nThe problem of defining the unit of analysis is salient to me right now. According to Yin (2014: 32), the unit of analysis may change as the project progresses, depending on initial misconceptions (he uses the example of a unit of analysis changing from neighbourhoods to small groups, as contextualized by the socio-geographical entity of the neighbourhood, which is laden with issues of class, race, etc). In my own situation, the unit of analysis may hover between the harmonization initiative, the people, activities or infrastructures that make it work.\nIn the section on criteria for interpreting the findings, Yin emphasizes the role of rival theories, which is akin to a concern with falsifiability as a means of validating truth claims, and which betrays his positivist leanings. This may be compared with Stake’s emphasis on triangulation, which is more concerned with internal cohesiveness. Similarly, Yin cites Corbin and Strauss regarding the role of theory or theoretical propositions in research design, which similarly reveals a concern with rigorous upfront planning and strict adherence to research design as a key aspect of deriving valid findings.\nRegarding generalizability, Yin (2014: 40-41) states that “Rather than thinking about your case as a sample, you should think of it as the opportunity to shed empirical light about some theoretical concepts or principles, not unlike the motive of a laboratory investigator in conceiving of and then conducting a new experiment.” He goes on to state that case studies tend to strive for analytic generalizations that go beyond the specific case that has been studied, and which apply to other concrete situations rather than just abstract theory building.\n\n\nLogistics of case study design\n\nPreparing to select case study data\nYin (2014: 72-23) identifies five desired attributes for collecting case studt data:\n\nAsk good questions — and interpret answers fairly.\n\n\n“As you collect case study evidence, you must quickly review the evidence and continually ask yourself why events or perceptions appear as they do.” (73)\nA good indicator of having asked good questions is mental and emotional exhaustion at the end of each fieldwork day, due to the depletion of “analytic energy” associated with being attention on your toes. (73-74)\n\n\nBe a good “listener” not trapped by existing ideologies or preconceptions.\n\n\nSensing through multiple modalities, not just spoken words.\nAlso subtext, as elicited through choices of terms used, mood and affective components. (74)\n\n\nStay adaptive, so that newly encountered situations can be seen as opportunities, not threats.\n\n\nRemember the original purpose but willing to adapt to unanticipated circumnstances. (74)\nEmphasize balancing adaptability with rigour, but not with rigidity. (75)\n\n\nHave a firm grasp of what is being studied, even when in an exploratory mode.\n\n\nNeed to do more than merely record data, but interpret information as they are being collected and to know immedately whether there are contradictions or complementary statements to follow-up on. (75-76)\n\n\nAvoid biases of being sensitive to contrary evidence, also knowing how to conduct research ethically.\n\n\nMaintain strong professional competence, including keeping up with related research, ensuring accuracy, striving for credibility, and knowledging and mitigating against bias.\n\nYin advocates for adoption of case study protocols. He provides an example of a table of contents for case study protocols, which generally comprise four sections:\n\nOverview of the case study\nData collection procedures\nData collection questions\nGuide for the case study report\n\n\n\nTriangulation\nTriangulation is a process of gaining assurance. Also sometimes called crystallization.\n“Each important finding needs to have at least three (often more) confirmations and assurances that key meanings are not being overlooked.” (Stake 2006: 33) Triangulation is a process of repetitous data gathering and critical review of what is being said. (Stake 2006: 34)\nWhat needs triangulation? (Stake 2006: 35-36)\n\nIf the description is trivial or beyond question, there is no need to triangulate.\nIf the description is relevant and debatable, there is much need to triangulate.\nIf the data are critical to a main assertion, there is much need to triangulate.\nIf the data are evidence for a controversial finding, there is much need to triangulate.\nIf a statement is clearly a speaker’s interpretation, there is little need to triangulate the quotation but not its content.\n\nStake (2006: 37) cites Denzin (1989) who highlighted several kinds of triangulation, leading to a few advisories:\n\nFind ways to use multiple rather than single observers of the same thing.\nUse second and third perspectives, i.e. the views of teachers, student and parents.\nUse more than one research method on the same thing, i.e. document review and interview.\nCheck carefully to decide how much the total description warrants generalization.\n\nDo your conclusions generalize across other times or places?\nDo your conclusions about the aggregate generalize to individuals?\nDo findings of the interaction among individuals in one group pertain to other groups?\nDo findings of the aggregate of these people generalized to a population?\n\n\n\n\nCross-Case Analysis Procedure\nStake (2006: Chapter 3) lays out a procedure for deriving synthetic findings from data collected across cases. He frames this in terms of a dialectic between cases and quintains. He identifies three tracks (Stake 2006: 46):\n\nTrack 1: Maintains the case findings and the situationality.\nTrack 2: Merges similar findings, maintaining a little of the situationality.\nTrack 3: The most quanitative track, shifts the focus from findings to factors.\n\nAccording to Stake, case reports should be created independently and then brought together by a single individual when working in a collaborative project. In keeping with the case-quintain dialectic, this integration must involve strategically putting the cases aside and bringing them back in to identify convergences and divergences, similarities and differences, normalitities and discrepancies among them.\nThere is some detailed discussion about different kinds of statements, i.e. themes, findings, factors and assertions, but I find this a bit too much detail for me to get at at this point in mymethodological planning. In general though, Stake documents a process whereby an analyst navigates back and forth between the general and the situational, presenting tentativr statements that are shored up, modified or discarded through testing compatability of the evidence across cases.\n\n\nSingle cases\nStake (2000) is concerned with identifying what can be learned from a single case. He (2000: 437) identifies three kinds of cases:\n\nIntrinsic case studies as being driven by a desire to understand the particular case.\nInstrumental case studies are examined “mainly to provide insight into an issue or to redraw a generalization.”\nCollective case studies “investigate a phenomenon, population or general condition”.\n\nStake (2000) frames case research around a tension between the particular and the general, which echoes the case-quintain dilemma he described in (Stake 2006: 4-6).\n\n\nSome scattered practical guidance\nStake (2006: 18-22) provides a detailed and realistic overview of common challenges involved in collaborative qualitative research. This could be handy in future work when planning a multicase project involving multiple researchers.\nStake (2006: 29-33) provides guidance on how to plan and conduct interviews in multicase research, including a series of helpful prompts and questions to ask yourself while designing the interview. One thing that stands out is his recommendation that an interview should be more about the interviewee than about the case. It’s necessary to find out about the interviewee to understand their interpretations, but what they reveal about the quintain is more important.\nOn page 34, Stake (2006) also provides some practical tips for documenting and storing data, after Huberman and Miles (1994).\nStake (2006: Chapter 4) includes a chapter on procedures for reporting the findings, and I may return to this later on once I need to initiative this phase of work. It addresses concerns about how to articulate comparisons, concerns about generalization, and how to handle advocacy based on findings.\nSee Stake (2006) Chapter 5 for a step-by-step overview of a multicase study analysis. The rest of the volume after that includes three very detailed examples from his own work.\n\n\n\n\nGrounded theory\nThese notes are largely drawn from Charmaz (2000), which I understand to be a fairly balanced and comprehensive overview of the Glaser / Strauss and Corbin debate, and of the situation of specific methods and techniques in relation to these different stances. I also value Charmaz’s position as someone who subscribes to her constructivist approach.\nAccording to Charmaz(2000: 509):\n\nEssentially, grounded theory methods consist of systematic inductive guidelines for collecting and analyzing data to build middle-range theoretical frameworks that explain the collected data.\n\nCharmaz(2000: 511) goes on to situate grounded theory in relation to what was the norm prior to its invention:\n\nGlaser and Strauss’s (1967) work was revolutionary because it challenged (a) arbitrary divisions between theory and research, (b) views of qualitative research as primarily a precursor to more “rigorous” quantitative methods, (c) claims that the quest for rigor made qualitative research illegitimate, (d) beliefs that qualitative methods are impressionistic and unsystematic, (e) separation of data collection and analysis, and (f) assumptions that qualitative research could produce only descriptive case studies rather than theory development (Charmaz 1995).\n\nPrior to Glaser and Strauss (1967), qualitative analysis was taught rather informally — they led the way in providing written guidelines for systematic qualitative data analysis with explicit procedures for data analysis (Charmaz 2000: 512)\nGlaser brought his very positivist assumptions from his work at Columbia, and Strauss’ work in Chicago with Herbert Blumer and Robert Park infused a pragmatic philosophical approach to the study of process, action and meaning that reflects symbolic interactionism.\n\nGlaser\nGlaser’s position comes close to traditional positivism, with assumptions of an objective, external reality and a neutral observer who discovers data. and a reductionist form of inquiry of manageable research problems. According to Charmaz (2000: 511), regarding Glaser’s approach:\n\nTheoretical categories must be developed from analysis of the collected data and must fit them; these categories must explain the data they subsume. This grounded theorists cannot shop their disciplinary stores for preconceived concepts and dress their data in them. Any existing concept must earn its way into the analysis. … The relevance of a grounded theory derives from its offering analytic explanations of actual problems and basic processes in the research setting. A grounded theory is durable because researchers can modify their emerging or established analyses as conditions change or further data are collected.\n\n\n\nCorbin and Strauss\nStrauss and Corbin assume an objective reality, aim toward unbiased data collection, propose a series of technical procedures, and espouses verification. However, they are postpositivism because they propose giving voice to their respondents,3 representing them as accurately as possible, discovering and reckoning with how their respodents’ views on reality differ from their own, and reflecting on the research process as one way of knowing.\n3 Charmaz uses the term “giving voice” in this specific context. I’m not sure if this is meant to represent Strauss and Corbin’s attitude, and whether this is an accurate representation on their views, but in my mind this should be framed as elevating, amplifying or re-articulating respondents’ voices (and this is a tenet of constructivist grounded theory in general, which derives from Charmaz). My take diverges from the position that we “give voice” to respondents in that it acknowledges (1) that the voices are already there, (2) that respondents are in fact giving us their voices, and (3) that the researcher plays an active editorial role, transforming the respondents’ elicitations into a format that is more amenable to analysis.Corbin and Strauss (1990) “gained readers but lost the sense of emergence and open-ended character of Strauss’s earlier volume and much of his empirical work. The improved and more accessible second edition of Basics (Strauss and Corbin 1998) reads as less prescriptive and aims to lead readers to a new way of thinking about their research and about the world.” (Charmaz 2000: 512)\nStrauss apparently became more insistent that grounded theory should be more verificational in nature in personal communications.\nGlaser (1992) responded to Strauss and Corbin (1990), repudiating what he perceived as forcing preconceived questions and frameworks on the data. Glaser considered it better to allow theory to “emerge” from the data, i.e. to let the data speak for themselves.\nCharmaz identifies these two approaches as having a lot in common: hey both advocate for mitigating factors that would hinder objectivity and minimize intrusion of the researcher’s subjectivity, and they are both embedded in positivist attitudes, with a researcher sitting outside the observed reality; Glaser exemplifies these through discovering and coding data, and using systematic comparative methods, whereas Strauss and Corbin maintain a similar distance through their analytical questions, hypotheses and methodological applications. They both engage in “silent authorship” and usually write about their data as distant experts (Charmaz and Mitchell 1996).\n\n\nConstuctivist Grounded Theory\n\nConstructivist grounded celebrates firsthand knowledge of empirical worlds, takes a middle ground between postmodernsm and positivism, and offers accessible methods for taking qualitative research into the 21st century. (510)\n\n\nThe power of grounded theory lies in its tools for understanding empirical worlds. We can reclaim these tools from their positivist underpinnings to form a revised, more open-ended practice of grounded theory that stresses its emergent, constructivist elements. We can use grounded theory methods as flexible, heuristic strategies rather than as formulaic procedures. (510)\n\nThree aspects to Charmaz’s argument (510):4\n4 Very much in line with the pragmatist turn of the late ’90s and early ’00s, as also documented by Lucas (2019: 54-57) in the context of archaeological theory, vis-a-vis positivism, postmodernism, and settling on a middle ground between them.\nGrounded theory strategies need not be rigid or prescriptive;\na focus on meaning while using grounded theory furthers, rather than limits, interpretive understanding; and\nwe can adopt grounded theory strategies without embracing the positivist leanings of earlier proponents of grounded theory.\n\nRepudiation of the notion that data speak for themselves, that data do not lie. Recognition that data are constructs of the rsearch process, are framed by the questions we ask informants and the methodological tools of our collection procedures.\nCharmaz (2000: 515) advocates for what seems to be a dialogical approach to coding, between researcher and the data:\n\nWe should interact with our data and pose questions to them while coding. Coding helps us to gain a new perspective on our material and to focus further data collection, and may lead us in unforeseen directions. Unline quantitative research that requires data to fit into preconceived standardized codes, the researcher’s interpretations of data shape his or her emergent codes in grounded theory.\n\nDistinguishes articulates open/initial coding as proceeding line by line to get a general sense of what the data contains. It is meant to keep the researcher close to the data, to remain attuned to the subjects’ views of their realities.\n\nLine-by-line coding sharpens our use of sensitizing concepts — that is, those background ideas that inform the overall research problem. Sensitizing concepts offer eays of seeing, organizing, and understanding experience; they are embedded in our disciplinary emphases and perspectival proclivities. Although sensitizing conceots may deepen perception, they provide starting points for building analysis, not ending points for evading it. We may use sensitizing concepts only as points of departure from which to study the data.\n\nMuch of the rest of the Charmaz (2000) paper is an overview of coding and memoing methods, as well as theoretical sampling. The emphasis is on situating these techniques in the Glaser / Strauss and Corbin debate, and it will be better to refer to Charmaz (2014) for in-depth notes on these techniques.\nCharmaz (2000: 521-522) provides an apt account of a significant critique of grounded theory, and poses her constructivist approach as a potential means of resolving it. Specifically, she refers to the notion that grounded theory (as traditionally conceived by both Glaser and Strauss and Corbin) “fractures” the data, making them easier to digest in an analytical sense, but also making it more difficult to engage with in a holistic manner. This is precisely the point of the original approach, to present qualitative data as data — as conceived and valued by quantitative researchers, i.e. as discrete, corpuscular, disembodied, re-arrangable and distant entities. The text of these two large paragraphs is copied here:\n\nConrad (1990) and Riessman (1990) suggest that “fracturing the data” in grounded theory research might limit understanding because grounded theorists aim for analysis rather than the portrayal of subjects’ experience in its fullness. From a grounded theory perspective, fracturing the data means creating codes and categories as the researcher defines themes within the data. Glaser and Strauss (1967) propose this strategy for several reasons: (a) to help the researcher avoid remaining immersed in anecdotes and stories, and subsequently unconsciously adopting subjects’ perspectives; (b) to prevent the researcher’s becoming immobilized and overwhelmed by voluminous data; and (c) to create a way for the researcher to organize and interpret data. However, criticisms of fracturing the data imply that grounded theory methods lead to separating the experience from the experiencing subject, the meaning from the story, and the viewer from the viewed. In short, the criticisms assume that the grounded theory method (a) limits entry into subjects’ worlds, and thus reduces understanding of their experience; (b) curtails representation of both the social world and subjective experience; (c) relies upon the viewer’s authority as expert observer; and (d) posits a set of objectivist procedures on which the analysis rests.\nResearchers can use grounded theory methods to further their knowledge of subjective experience and to expand its representation while neither remaining external from it nor accepting objectivist assumptions and procedures. A constructivist grounded theory assumes that people create and maintain meaningful worlds through dialectical processes of conferring meaning on their realities and acting within them (Bury 1986; Mishler 1981). Thus social reality does not exist independent of human action. Certainly, my approach contrasts with a number of grounded theory studies, methodological statements, and research texts (see, e.g., Chenitz and Swanson 1986; Glaser 1992; Martin and Turner 1986; Strauss and Corbin 1990; Turner 1981). By adopting a constructivist grounded theory approach, the researcher can move grounded theory methods further into the realm of interpretive social science consistent with a Blumerian (1969) emphasis on meaning, without assuming the existence of a unidimensional external reality. A constructivist grounded theory recognizes the interactive nature of both data collection and analysis, resolves recent criticisms of the method, and reconciles positivist assumptions and postmodernist critiques. Moreover, a constructivist grounded theory fosters the development of qualitative traditions through the study of experience from the standpoint of those who live it.\n\nCharmaz’s (2000: 523) proposal for a re-visioned grounded theory poses research as a materializing process:\n\nA re-visioned grounded theory must take epistemological questions into account. Grounded theory can provide a path for researchers who want to continue to develop qualitative traditions without adopting the positivistic trappings of objectivism and universality. Hence the further development of a constructivist grounded theory can bridge past positivism and a revised future form of interpretive inquiry. A revised grounded theory preserves realism through gritty, empirical inquiry and sheds positivistic proclivities by becoming increasingly interpretive.\n\nCharmaz (2000: 523) addresses realism and truth in constructivist grounded theory, and explicitly relates it to Blumerian situated interactionism:\n\nA constructivist grounded theory distinguishes between the real and the true. The constructivist approach does not seek truth — single, universal, and lasting. Still, it remains realist because it addresses human realities and assumes the existence of real worlds. However, neither human realities nor real worlds are unidimensional. We act within and upon our realities and worlds and thus develop dialectical relations among what we do, think, and feel. The constructivist approach assumes that what we take as real, as objective knowledge and truth, is based upon our perspective (Schwandt 1994). The pragmatist underpinnings in symbolic interactionism emerge here. Thomas and Thomas (1928: 572) proclaim, “If human beings define their situations as real, they are real in their consequences”. Following their theorem, we must try to find what research participants define as real and where their definitions of reality take them. The constructivist approach also fosters our self-consciousness about what we attribute to our subjects and how, when, and why researchers portray these definitions as real. Thus the research products do not constitute the reality of the respondents’ reality. Rather, each is a rendering, one interpretation among multiple interpretations, of a shared or individual reality. That interpretation is objectivist only to the extent that it seeks to construct analyses that show how respondents and the social scientists who study them construct those realities — without viewing those realities as unidimensional, universal, and immutable. Researchers’ attention to detail in the constructivist approach sensitizes them to multiple realities and the multiple viewpoints within them; it does not represent a quest to capture a single reality.\nThus we can recast the obdurate character of social life that Blumer (1969) talks about. In doing so, we change our conception of it from a real world to be discovered, tracked, and categorized to a world made real in the minds and through the words and actions of its members. Thus the grounded theorist constructs an image of a reality, not the reality — that is, objective, true, and external.\n\nOn the other hand, Charmaz (2000: 524) frames objectivist grounded theory as believing in some kind of truth:\n\nObjectivist grounded theory accepts the positivistic assumption of an external world that can be described, analyzed, explained, and predicted: truth, but with a small t. That is, objectivist grounded theory is modifiable as conditions change. It assumes that different observers will discover this world and describe it in similar ways That’s correct — to the extent that subjects have comparable experiences (e.g., people with different chronic illnesses may experience uncertainty, intrusive regimens, medical dominance) and viewers bring similar que-tions, perspectives, methods, and, subsequently, concepts to analyze those experiences. Objectivist grounded theorists often share assumptions with their research participants — particularly the professional participants. Perhaps more likely, they assume that respondents share their meanings. For example, Strauss and Corbin’s (1990) discussion of independence and dependence assumes that these terms hold the same meanings for patients as for researchers.\n\nCharmaz (2000: 525) further embeds construvist grounded theory as a way to fulfill Blumer’s symbolic interactionism:\n\nWhat helps researchers develop a constructivist grounded theory? How might they shape the data collection and analysis phases? Gaining depth and understanding in their work means that they can fulfill Blumer’s (1969) call for “intimate familiarity” with respondents and their worlds (see also Lofland and Lofland 1984, 1995). In short, constructing constructivism means seeking meanings — both respondents’ meanings and researchers’ meanings.\n\nCharmaz (2000: 524) on the concretization of procedures from what were orginally meant to be guidelines:\n\nGuidelines such as those offered by Strauss and Corbin (1990) structure objectivist grounded theorists’ work. These guidelines are didactic and prescriptive rather than emergent and interactive. Sanders (1995: 92) refers to grounded theory procedures as “more rigorous than thou instructions about how information should be pressed into a mold”. Strauss and Corbin categorize steps in the process with scientific terms such as axial coding and conditional matrix (Strauss 1987; Strauss and Corbin 1990, 1994). As grounded theory methods become more articulated, categorized, and elaborated, they seem to take on a life of their own. Guidelines turn into procedures and are reified into immutable rules, unlike Glaser and Strauss’s (1967) original flexible strategies. By taking grounded theory methods as prescriptive scientific rules, proponents further the positivist cast to obiectivist grounded theory.\n\n\nOn the modes of reasoning behind grounded theory\nKelle (2005) is an overview of the Glaser / Strauss and Corbin split. References to Kelle (2005) have no page numbers since it is published in an online-only journal and does not specify paragraph numbers.\nHighlights a primary impetus behind Glaser and Strauss (1967), which used political analogies to distinguish between “theoretical capitalists” and “proletariat testers”, and unify the field of sociology by de-centering emphasis on theories developed by “great men”.\nA common thread in this paper is sensitivity to the practical challenges of actually doing grounded theory according to Glaser’s approach:\n\nThe infeasibility of an inductivist research strategy which demands an empty head (instead of an “open mind”) cannot only be shown by epistemological arguments, it can also be seen in research practice. Especially novices in qualitative research with the strong desire to adhere to what they see as a basic principle and hallmark of Grounded Theory — the “emergence” of categories from the data — often experience a certain difficulty: in open coding the search for adequate coding categories can become extremely tedious and a subject of sometimes numerous and endless team sessions, especially if one hesitates to explicitly introduce theoretical knowledge. The declared purpose to let codes emerge from the data then leads to an enduring proliferation of the number of coding categories which makes the whole process insurmountable.\n\nKelle (2005) basically takes down the original Glaser and Strauss (1967) and subsequent reflection on theoretecal sensitivity (Glaser 1978). He highlights fundamental contraditions and oversights with regards to the role of theory in grounded theory, specifically with regards to the notion that such research can be accomplished with inductive purity:\n\nConsequently, in the most early version of Grounded Theory the advice to employ theoretical sensitivity to identify theoretical relevant phenomena coexists with the idea that theoretical concepts “emerge” from the data if researchers approach the empirical field with no preconceived theories or hypotheses. Both ideas which have conflicting implications are not integrated with each other in the Discovery book. Furthermore, the concept of theoretical sensitivity is not converted into clear cut methodological rules: it remains unclear how a theoretically sensitive researcher can use previous theoretical knowledge to avoid drowning in the data. If one takes into account the frequent warnings not to force theoretical concepts on the data one gets the impression that a grounded theorist is advised to introduce suitable theoretical concepts ad hoc drawing on implicit theoretical knowledge but should abstain from approaching the empirical data with ex ante formulated hypotheses.\n\nKelle (2005) recognizes that Glaser identified a series of “theoretical families” to help assist with the practical experience of coding. I find it somewhat interesting that many of the terms in these first families are very reminiscient of so-called “natural language”, as used in the wave of cybernets that was contemporary with Glaser (1978) and which largely dealt with “expert systems”.\n\nIn the book “Theoretical Sensitivity” (1978) GLASER presents an extended list of terms which can be used for the purpose of theoretical coding loosely structured in the form of so called theoretical “coding families”. Thereby various theoretical concepts stemming from different (sociological, philosophical or everyday) contexts are lumped together, as for example:\n\nterms, which relate to the degree of an attribute or property (“degree family”), like “limit”, “range”, “extent”, “amount” etc.,\nterms, which refer to the relation between a whole and its elements (“dimension family”), like “element”, “part”, “facet”, “slice”, “sector”, “aspect”, “segment” etc.,\nterms, which refer to cultural phenomena (“cultural family”) like “social norms”, “social values”, “social beliefs” etc.\n\n\nThis is substantiated by other observations by Kelle (2005) that ad hoc coding actually follows implicit theoretical knowledge:\n\nOne of the most crucial differences between GLASER’s and STRAUSS’ approaches of Grounded Theory lies in the fact that STRAUSS and CORBIN propose the utilization of a specified theoretical framework based on a certain understanding of human action, whereas GLASER emphasises that coding as a process of combining “the analyst’s scholarly knowledge and his research knowledge of the substantive field” (1978, p.70) has to be realised ad hoc, which means that it has often to be conducted on the basis of a more or less implicit theoretical background knowledge.\n\nand that the Glaserian approach is better suited for more experienced, rather than novice sociologists, who will have internalized the theory that they then apply in their coding.\nKelle then goes on to address how grounded theory can or can not be applied in alignment with inductivist or hypothetic-deductivist reasoning, and raises abductive reasoning an an alternative means of arriving at legitimate and verifiable conclusions. There is too much detail in the paper to copy here.\nBut here is another nice conclusive gem from the end:\n\nWhereas STRAUSS and CORBIN pay a lot of attention to the question how grounded categories and propositions can be further validated, GLASER’s concept shows at least a gleam of epistemological fundamentalism (or “certism”, LAKATOS 1978) especially in his defence of the inductivism of early Grounded Theory. “Grounded theory looks for what is, not what might be, and therefore needs no test” (GLASER 1992, p.67). Such sentences carry the outmoded idea that empirical research can lead to final certainties and truths and that by using an inductive method the researcher may gain the ability to conceive “facts as they are” making any attempt of further corroboration futile.\n\n\n\nRebuttals by Glaser\nGlaser (2002) constitutes a rebuttal to Charmaz (2000). As Bryant (2003) points out in his response to Glaser (2002), it is very angry, polemical and irrational. I don’t want to go too in depth with the fundamental problems with Glaser’s response (see Bryant’s paper for the details), but the gist is that Glaser never really got the message about data being inherently constructed by researchers decisions, actions and circumstances. Glaser seems to continue believing in the inherent neutrality of data as a matter of faith.\nThis being said, Glaser (2002) did highlight the large emphasis on descriptive rather than explanatory potential in Charmaz’s approach. This aligns with my own apprehensions when I try to address the relevance of my work. I tend to use the term “articulate” as a way to frame my work as descriptive, but in a way that lends value, and this very fuzzy distinction between the power of identying the shapes and relationships among things and explaining their causes and effects in a generalizable way (i.e., theories, or explanations), still somehow troubles me. I wonder if Glaser is drawing a false distinction here, and through that, a false prioritization of explanation over description as a desired outcome. This would put my mind at ease, as would dismissing Glaser’s dismissal of people who simply don’t know how to do the “real” grounded theory (which, in his mind, include all feminist and critical researchers).\n\n\nOn the utility of grounded theory\nI completely agree with this statement from Clarke (2003: 555):\n\nTo address the needs and desires for empirical understandings of the complex and heterogeneous worlds emerging through new world orderings, new methods are requisite (Haraway 1999). I believe some such methods should be epistemologically/ ontologically based in the pragmatist soil that has historically nurtured symbolic interactionism and grounded theory. Through Mead, an interactionist grounded theory has always had the capacity to be distinctly perspectival in ways fully com patible with what are now understood as situated knowledges. This fundamental and always already postmodern edge of a grounded theory founded in symbolic interactionism makes it worth renovating.\n\nThis is super interesting, and really contextualizes how Strauss imagined grounded theory to be useful for him:\n\nSome years ago, Katovich and Reese (1993:400–405) interestingly argued that Strauss’s negotiated order and related work recuperatively pulled the social around the postmodern turn through its methodological [grounded theoretical] recognition of the partial, tenuous, shifting, and unstable nature of the empirical world and its constructedness. I strongly agree and would argue that Strauss also furthered this “postmodernization of the social” through his conceptualizations of social worlds and arenas as modes of understanding the deeply situated yet always also fluid orga nizational elements of negotiations. He foreshadowed what later came to be known as postmodern assumptions: the instability of situations; characteristic changing, porous boundaries of both social worlds and arenas; social worlds seen as mutually constitutive/coproduced through negotiations taking place in arenas; negotiations as central social processes hailing that “things can always be otherwise”; and so on. Significantly, negotiations constitute discourses that also signal micropolitics of power as well as “the usual” meso/macrostructural elements—power in its more fluid forms (e.g., Foucault 1979, 1980). Through integrating the social worlds/arenas/ negotiations framework with grounded theory as a new conceptual infrastructure, I hope to sustain and extend the methodological contribution of grounded theory to understanding and elaborating what has been meant by “the social” in social life — before, during, and after the postmodern turn.\n\nIt also echoes Charmaz’s vision of grounded theory as a powerful too, and Bryant’s (2003) call to “look at what Glaser and Strauss actually did, rather than what they claimed — and continued to claim — they were doing” to uncover “the basis for a powerful research approach”. Bryant (2003) further cites Baszanger and Dodier (1997), who characterize grounded theory as a method “consisting of accumulating a series of individual cases, of analyzing them as a combination between different logics of action that coexist not only in the field under consideration, but even within these individuals or during their encounters”. Bryant (2003) summarizes this by stating that “[t]he aim of such methods is generalization rather than totalization, with the objective of producing”a combinative inventory of possible situations”.\n\n\n\nTheoretical sampling\nSee Charmaz (2000): 519-520.\nFrom Clarke (2003: 557):\n\nUnique to this approach has been, first, its requiring that analysis begin as soon as there are data. Coding begins immediately, and theorizing based on that coding does as well, however provisionally (Glaser 1978). Second, “sampling” is driven not necessarily (or not only) by attempts to be “representative” of some social body or population (or its heterogeneities) but especially and explicitly by theoretical con cerns that have emerged in the provisional analysis. Such “theoretical sampling” focuses on finding new data sources (persons or things) that can best explicitly ad dress specific theoretically interesting facets of the emergent analysis. Theoretical sampling has been integral to grounded theory from the outset, remains a fundamen tal strength of this analytic approach, and is crucial for the new situational analyses." }, { - "objectID": "notes/maelstrom-readings.html#gaye2014", - "href": "notes/maelstrom-readings.html#gaye2014", - "title": "Maelstrom reading notes", - "section": "Gaye et al. (2014)", - "text": "Gaye et al. (2014)\nIntroduces DataShield.\nFrames DataShield as a technical fix to administrative problems:\n\nMany technical and policy measures can be enacted to render data sharing more secure from a governance per spective and less likely to result in loss of intellectual prop erty. For example, data owners might restrict data release to aggregate statistics alone, or may limit the number of variables that individual researchers might access for speci fied purposes. Alternatively, secure analysis centres, such as the ESRC Secure Data Service and SAIL represent major informatics infrastructures that can provide a safe haven for remote or local analysis/linkage of data from selected sources while preventing researchers from down loading the original data themselves. However, to comple ment pre-existing solutions to the important challenges now faced, the DataSHIELD consortium has developed a flexible new way to comprehensively analyse individual level data collected across several studies or sources while keeping the original data strictly secure. As a technology, DataSHIELD uses distributed computing and parallelized analysis to enable full joint analysis of individual-level data from several sources, e.g. research projects or health or administrative data—without the need for those data to move, or even be seen, outside the study where they usually reside. Crucially, because it does not require underpin ning by a major informatics infrastructure and because it is based on non-commercial open source software, it is both locally implementable and very cost effective.\n\nAdds a social/collaborative element to earlier arguments about the challenges inherent of prospective harmonization, highlighting a need for engagement with individual studies (either through direct or peripheral participation) to conduct research that was not initially planned for:\n\nUnfortunately, both [study-level metadata] SLMA and [individual-level metadata] ILMA present significant problems Because SLMA com bines analytical results (e.g. means, odds ratios, regression coefficients) produced ahead of time by the contributing studies, it can be very inflexible: only the pre-planned analyses undertaken by all the studies can be converted into joint results across all studies combined. Any additional analyses must be requested post hoc. This hinders exploratory analysis for example the investigation of sub-groups, or interactions between key variables.\n\nProvides a detailed overview of how DataShield was implemented for HOP (Healthy Obesity Project), including the code used to generate specific figures and analyses. Hoever it does not really describe or reflect upon the processes through which the code was developed.\nThe authors highlight the fact that certain analytical approaches are not possible using DataShield, especially analysis that visualize individual data points. It’s unclear how they enforce this, or whether it’s an implicit limitation based on the data that DataShield participants provide.\n\nBecause in DataSHIELD potentially disclosive com mands are not allowed, some analyses that are possible in standard R are not enabled. In essence, there are two classes of limitation on potential DataSHIELD functional ity: (i) absolute limitations which require an analysis that can only be undertaken by enabling one of the functional ities (e.g. visualizing individual data points) that is explicitly blocked as a fundamental element of the DataSHIELD philosophy. For example, this would be the case for a standard scatter plot. Such limitations can never be circumvented and so alternatives (e.g. contour and heat map plots) are enabled which convey similar information but without disclosing individual data points; (ii) current limitations which are functions or models that we believe are implementable but we have not, as yet, under taken or completed the development work required. As examples, these latter include generalized linear mixed model (including multi-level modelling) and Cox regression.\n\nThe authors list numerous other limitations and challenges. Some have to do with what kinds of data DataShield can handle (something about horizontal and vertical that I do not yet fully understand). Other challenges include the need for data to be harmonized, and having to deal with governance concerns.\nNotably, the first challenge mentioned seems to contradict the statement earlier on (and made by Doiron et al. (2013)) that this is relatively easy to set up. The authors acknowledge the fact that coding for analysis using DataShield has a steep learning curve and requires some pre-planning to enable results from satellite computers to be properly combined. Their mitigation is to black-box these concerns by implementing simpler client-side functions that mask the more complex behaviours (and presumably translate error messages in ways that users can understand and act to resolve!).\n\nDespite its potential utility, implementation of DataSHIELD involves significant challenges. First, although set-up is fundamentally straightforward, application involves a relatively steep learning curve because the command structure is complex: it demands specification of the analysis to be undertaken, the studies to use and how to combine the results. In mitigation, most complex serverside functions are now called using simpler client-side functions and we are working on a menu-driven implementation.\n\nAlso interesting that they note how there may be unanticipated problems, either accidental or malicious, and their way of mitigating against this is to log all commands:\n\nFifth, despite the care taken to set up DataSHIELD so that it works properly and is non-disclosive, it is possible that unanticipated prob lems (accidental or malicious) may arise. In order to iden tify, describe and rectify any errors or loopholes that emerge and in order to identify deliberate miscreants, all commands issued on the client server and enacted on each data server are permanently logged.\n\nThis is even more interesting in light of their continuous reference to “data.care”, which they do not address in depth, but which seems to have been a scandal involving unauthorized release of personal health data used in research.\nThe authors add an additional caveat concerning the need to ensure that the data are cleaned in advance.\n\nBut, to be pragmatic, many of the routinely collected healthcare and administra tive databases will have to undergo substantial evolution before their quality and consistency are such that they can directly be used in high-quality research without exten sive preparatory work. By its very nature, such preparation—which typically includes data cleaning and data harmonization—cannot usually be undertaken in DataSHIELD, because it involves investigating discrepan cies and/or extreme results in individual data subjects: the precise functionality that DataSHIELD is designed to block. Such work must therefore be undertaken ahead of time by the data generators themselves—and this is de manding of time, resources and expertise that — at present — many administrative data providers may well be unwilling and/or unable to provide. That said, if the widespread us ability of such data is viewed as being of high priority, the required resources could be forthcoming.\n\nThis corresponds with another limitation identified earlier, namely with regards to identifying duplicate individual records across jurisdictional boundaries (which involves assumptions regarding nationality and identify – one of those weird myths that programmers can’t seem to let go!):\n\nSo far DataSHIELD has been applied in settings where individual participants in different studies are from different countries or from different regions so it is unlikely that any one person will appear in more than one source. However, going forward, that cannot al ways be assumed. We have therefore been consider ing approaches to identify and correct this problem based on probabilistic record linkage. In the genetic setting 48 the BioPIN provides an alternative solution. Ongoing work is required.\n\nNote the last line of the prior block quote regarding data cleaning:\n\nThat said, if the widespread us ability of such data is viewed as being of high priority, the required resources could be forthcoming.\n\nThis seems like a thread worth tugging at!" + "objectID": "notes/methodology-notes.html#data-collection", + "href": "notes/methodology-notes.html#data-collection", + "title": "Methodology notes", + "section": "Data Collection", + "text": "Data Collection\n\nInterviews\nSee (Yin 2014: 110-113) See Becker (1998)\nFrom Charmaz (2000: 525):\n\nA constructivist approach necessitates a relationship with respondents in which they can cast their stories in their terms. It means listening to their stories with openness to feeling and experience. … Furthermore, one-shot interviewing lends itself to a partial, sanitized view of experience, cleaned up for public discourse. The very structure of an interview may preclude private thoughts and feelings from emerging. Such a structure reinforces whatever proclivities a respondent has to tell only the public version of the story. Researchers’ sustained involvement with research participants lessens these problems.\n\nFontana and Frey (2000) spend some time writing about the emergence of an “interview society”, whereby interviews are commonly used to seek various forms of biographical information. They cite (holstein1998?), who noted that “the interview has become a means of contemporary storytelling, where persons divulge life accounts in response to interview inquiries”. They then go over a brief history of interviewing in the context of sociological research, which largely tracks the values underlying positivist and postmodernist transitions as you might expect.\n\nStructured interviewing\nFrom Fontana and Frey (2000: 649-651):\nInterviewers ask respondents a series of preestablished questions with a limited set of response categories. The interview records responses according to a preestablished coding scheme.\nInstructions to interviewers often follow these guidelines:\n\nNever get involved in long explanations of the study; use the standard explanation provided by the supervisor.\nNever deviate from the study introduction, sequence of questions, or question wording.\nNever let another person interrupt the interview; do not let another person answer for the respondent or offer his or her opinions on the question.\nNever suggest an answer or agree or disagree with an answer. Do not give the respondent any idea of your personal views on the topic of the question or the survey.\nNever interpret the meaning of a question; just repeat the question and give instructions or clarifications that are provided in training or by the supervisors.\nNever improvise, such as by assing answer categories or making wording changes.\n\nThe interviewer must establish a “balanced rapport”, being casual and friendly while also directive and impersonal. Interviewers must also perfect a style of “interested listening” that rewards respondents’ participation but does not evaluate their responses.\nFrom Fontana and Frey (2000: 651):\n\nThis kind of interview often elicits rational responses, but it overlooks or inadequately assesses the emotional dimension.\n\n\n\nGroup interviews\nFrom Fontana and Frey (2000: 651-652):\nCan be used to test a methodological technique, try out a definition of a research problem or to identify key informants. Pre-testing a questionnaire or survey design.\nCan be used to aid respondents’ recall of specific events or to stimulate embellished descriptions of events, or experiences shared by members of a group.\nIn formal group interviews, participants share views through the coordinator.\nLess formal group interviews are meant to establish the widest range of meaning and interpretation on a topic, and the objective is “to tap intersubjective meaning with depth and diversity”.\n\n\nUnstructured interviewing\nFrom Fontana and Frey (2000: 652-657):\nThe essence of an unstructured interview is establishing a human-to-human relation with the respondent and a desire to understand rather than to explain.\n(fontana?) then goes on with some practical guidance on how to engage in unstructured interviews, largely concerned with how to access a community and relate with respondents.\n\n\nTranscribing\nThis section describes how I transcibe interviews and accounts for the decisions to encode certain things and not others. It goes on to explains the procedures for transcribing spoken dialog into textual formats, including the notation applied to encode idiosyncratic elements of conversational speech.\nCheck out Silverman (2000), who writes about the nuanced challenges of working with and between verbal and textual media, and what this means for transcription.\n\nTranscript notation\nDerived from the transcription protocol applied for the E-CURATORS project.\n\n\nCleaning audio\nTo clean the audio:\n\nI select a clip that is representative of a single source of background noise, and then filter that wavelength throughout the entire audio file.\nAfter selecting the clip, go to Effect >> Noise Reduction and select Get Noise Profile, then press OK.\nClose the noise reduction menu, select the entire range of audio using the keyboard shortcut Command + A.\nThen go back to the noise reduction window (Effect >> Noise Reduction) to apply the filter based on the noise profile identified for the noisy clip.\nExport the modified audio file to the working directory (File >> Export >> Export as .WAV).\nUse ffmpeg to replace the dirty audio track with the clean one:\n\n ffmpeg -i dirty.mp4 -i clean.wav -c:v copy -map 0:v:0 -map 1:a:0 clean.mp4\n\n\n\n\nObservations\nSee Angrosino and Mays de Pérez (2000)\n\n\nField notes\nSee (Yin 2014: 124-125)\n\n\nRecording video" }, { - "objectID": "notes/maelstrom-readings.html#wolfson2010", - "href": "notes/maelstrom-readings.html#wolfson2010", - "title": "Maelstrom reading notes", - "section": "Wolfson et al. (2010)", - "text": "Wolfson et al. (2010)\nx" + "objectID": "notes/methodology-notes.html#qda", + "href": "notes/methodology-notes.html#qda", + "title": "Methodology notes", + "section": "QDA", + "text": "QDA\nMy QDA processes are most influenced by Kathy Charmaz and Johnny Saldaña, as well as the practical experiences instilled during my PhD and while working on E-CURATORS.\n\nSensitizing concepts\nFrom Kelle (2005):\n\nHerbert BLUMER invented the term “sensitizing concepts” to describe theoretical terms which “lack precise reference and have no bench marks which allow a clean cut identification of a specific instance” (1954, p.7). Sensitizing concepts are useful tools for descriptions but not for predictions, since their lack of empirical content permits researchers to apply them to a wide array of phenomena. Regardless how empirically contentless and vague they are, they may serve as heuristic tools for the construction of empirically grounded theories.\n\nSee Bowen (2006)\n\n\nCoding\nThese notes are largely derived from my reading of Saldaña (2016), provides a practical overview of what coding entails and specific methods and techniques.\nCoding as component of knowledge construction:\n\nCoding is an intermediate step, “the”critical link” between data collection and their explanation or meaning” (Charmaz (2001), as quoted in Saldaña (2016): 4)\n“coding is usually a mixture of data [summation] and data complication … breaking the data apart in analytically relevant ways in order to ead toward further questions about the data” (Coffey and Atkinson (1996): 29-31, as quoted and edited by Saldaña (2016): 9)\n\nThis relates to the paired notions of decodng when we reflect on a passage to decipher its core meaning, and encoding when we determine its appropriate code and label it (Saldaña 2016: 5).\n\nCoding “generates the bones of your analysis. … [I]ntegration will assemble those bones into a working skeleton” (Charmaz (2014): 113, quoted in Saldaña (2016): 9)\nTo codify is to arrange things in a systematic order, to make something part of a system or classification, to categorize\n\nWhat I sometimes refer to as arranging the code tree\nWhat Saldaña (2016) refers to as categories, I tend to refer to as stubs\n\nCategories are arranged into themes or concepts, which in turn lead to assertions or theories\n\nPre-coding techniques: - Data layout - Separation between lines or paragraphs may hold significant meaning - Putting interviewer words in square brackets or capital letters - Semantic markup - Bold, italics, underline, highlight - Meant to identify “codable moments” worthy of attention (Boyatzis (1998), as referenced in Saldaña (2016): 20) - Relates to Saldaña (2016): 22’s prompt: “what strikes you?” - Preliminary jottings - Tri-column exercise with the text on the left, first impression or preliminary code in the middle, and code on the right, after Liamputtong and Ezzy (2005): 270-273.\nAsking questions back to the interviewer, or participating in an imagined dialogue. I imagine this might be useful in situations where the time to hold an interview is quite limited and I have to work with limited responses that don’t touch on everything I want to cover. The form of questions maintains my tentativity, my unwillingness to commit or assume their responses, and opens the door for their own responses in rebuttal.\nMagnitude coding can be applied to tag positive/negative attitudes, but also other gradients like hard/soft, technical/social skills. May be useful to apply symbols using my little 12-button keypad.\nCan also use colons to identify a magnitude associated with a code’s usage, as per 88-89.\nNote: create a qc issue for sub-documents, for identifying sections of a document that are especially relevant and hiding less relevant sections. I don’t necessarily want to delete these lines, but I may want to hide them from view. Maybe this is possible using vscode, outside of qc (see https://stackoverflow.com/a/72954133).\nIn lieu of initial/open coding, I think I will opt to devise sensitizing concepts, which may amalgamate as memos. I could use the prefix “SC:” to denote sensitizing concepts.\nWhat saldana refers to as “concept coding” is what I have previously referred to as “theoretical coding” to a certain extent. It’s a form of lumping, identifying specific instances under the label of cohesive concepts.\nFrom Clarke (2003: 558) on process coding:\n\nIn a traditional grounded theory study, the key or basic social process is typically articulated in gerund form connoting ongoing action at an abstract level. Around this basic process are then constellated the particular and distinctive conditions, strategies, actions, and practices engaged in by human and nonhuman actors in volved with/in the process and their consequences. For example, subprocesses of disciplining the scientific study of reproduction include formalizing a scientific disci pline, gleaning fiscal support for research, producing contraceptives and other techno scientific products, and handling any social controversies the science provokes (such as cloning and stem cell research).\n\n\n\nMemos\nSaldana chapter 2 on “analytic memos”\n\n\nPreliminary analyses\nYin (2014: 135-136 5) identifies various strategies for analyzing case study evidence.\n\nA helpful starting point is to “play” with your data. You are searching for patterns, insights, or concepts that seem promising. (Yin 2014: 135)\n\nCiting Miles and Huberman (1994), Yin (2014) lists a few strategies at this playful stage:\n\nJuxtaposing data from different interviews\nPutting information into different arrays\nMaking a matrix of categories and placing the evidence within them\nTabulating the frequency of different events\nPutting information in chronological order or using some other temporal scheme\n\nYin (2014: 135) also emphasizes memo-writing as a core strategy at this stage, citing Corbin and Strauss (2014). These memos should include hints, clues and suggestions that simply put into writing any preliminary interpretation, essentially conceptualizing your data. He uses the specific example of shower thoughts.\n\n\nAnalytical strategies and techniques\nYin (2014: 136-142) then goes on to describe four general strategies:\n\nRelying on theoretical propositions\nWorking your data from the “ground up”\nDeveloping a case description\nExamining plausible rival explanations\n\nYin (2014: 142-168) then goes on to describe five analytical techniques:5\n5 I wonder: would Abbott (2004) call these heuristics?\nPattern matching\nExplanation building\nTime-series analysis\nLogic models\nCross-case synthesis\n\nRyan and Bernard (2000) describe various analysis techniques for analyzing textual elicitations in structured and codified ways.\n\n\nThe constant comparative method\nThe constant comparative method is based on action codes, similar to what Saldaña (2016) refers to as process codes. According to Charmaz (2000: 515): > The constant comparative method of grounded theory means (a) comparing different people (such as their views, situations, actions, accounts, and experiences), (b) comparing data from the same individuals with themselves at different points in time, (c) comparing incident with incident, (d) comparing data with categori, and (e) comparing categories with other categories.\nMy initial impression is that this is very well suited for Stake’s (2006) multicase study framework, specifically with regards to his notion of the case-quintain dilemma. It also seems very well suited for analysis of situational meaning-making, as per Suchman (1987), Lave and Wenger (1991), Knorr Cetina (2001) and symbolic interactionism at large.\n\n\nSituational analysis\nSituational analysis originates from Strauss’s social worlds/arenas/negotiations framework. From Clarke (2003: 554):\n\nBuilding on and extending Strauss’s work, situational analyses offer three main cartographic approaches:\n\nsituational maps that lay out the major human, nonhuman, discursive, and other elements in the research situation of concern and provoke analyses of relations among them;\nsocial worlds/arenas maps that lay out the collective actors, key nonhuman elements, and the arena(s) of commitment within which they are engaged in ongoing negotiations, or mesolevel interpretations of the situation; and\npositional maps that lay out the major positions taken, and not taken, in the data vis-à-vis particular discursive axes of variation and difference, con cern, and controversy surrounding complicated issues in the situation.\n\n\nRefer to highlighted sections in Clarke (2003), bring those over at some point.\nClarke (2003) refers to Shim (2000) as an exemplary case of situational analysis in action.\n\n\nStatistical methods\ncrosstab\n\n\nOn software\nWeitzman (2000) provides an overview of software and qualitative research, including a minihistory up to the year 2000 when the chapter was published.\nDescribing the first programs specifically designed for analysis of qualitative data, Weitzman (2000: 804) writes:\n\nEarly programs like QUALOG and the first versions of NUDIST reflected the state of computing at that time. Researchers typically accomplished the coding of texts (tagging chunks of texts with labels — codes — that indicate the conceptual categories the researcher wants to sort them into) by typing in line numbers and code names at a command prompt, and there was little or no facility for memoing or other annotation or markup of text.6 In comparison with marking up text with coloured pencils, this felt awkward to many researchers. And computer support for the analysis of video or audio data was at best a fantasy.\n6 This caught my eye since its the same approach as that adopted by qc!\nThis history if followed by a sober account of what software can and can not do in qualitative research, as well as affirmation and dismissed of hopes and fears. Very reminiscient of Huggett (2018)." }, { - "objectID": "notes/potential-cases.html", - "href": "notes/potential-cases.html", - "title": "Potential cases", - "section": "", - "text": "Isabel Fortier came up with a shortlist based on consultations to help determine which Maelstrom partner projects may serve as potential cases. We then met on 2025-02-04, when, among other topics, we discussed the shortlist.\nSee the case selection protocol for further details on the parameters that guide how cases are to be determined." + "objectID": "notes/methodology-notes.html#writing", + "href": "notes/methodology-notes.html#writing", + "title": "Methodology notes", + "section": "Writing", + "text": "Writing\nSee Richardson (2000), who frames writing as a method of inquiry.\nSee Mitchell and Charmaz (1996)\nSee Charmaz (2000: 526-528)" }, { - "objectID": "notes/potential-cases.html#general-notes", - "href": "notes/potential-cases.html#general-notes", - "title": "Potential cases", + "objectID": "posts.html", + "href": "posts.html", + "title": "Blog", "section": "", - "text": "Isabel Fortier came up with a shortlist based on consultations to help determine which Maelstrom partner projects may serve as potential cases. We then met on 2025-02-04, when, among other topics, we discussed the shortlist.\nSee the case selection protocol for further details on the parameters that guide how cases are to be determined." - }, - { - "objectID": "notes/potential-cases.html#possible-candidates", - "href": "notes/potential-cases.html#possible-candidates", - "title": "Potential cases", - "section": "Possible candidates", - "text": "Possible candidates\n\nCITF\n\n\nReACH\nTitle: Stress and Anxiety During Pregnancy and Length of Gestation Harmonization Initiative + ReACH\nContact: Julie Bergeron \nReason: A small project and a very complex infrastructure-oriented network coordinated by Julie Bergeron. Both projects are finalized.\nNotes: Julie Bergeron was a PhD student at Maelstrom, and Isabel says that she is probably the most knowledgeable person regarding data harmonization who I will encounter during my research. She worked on her dissertation project (Stress and Anxiety During Pregnancy and Length of Gestation Harmonization Initiative) while also leading work on ReACH, and her dissertation essentially served as one of a few pilot projects under the aegis of ReACH. ReACH was led by Isabel as its PI.\nBoth projects are complete, but Isabel thinks that Julie Bergeron will be able to share some significant insights on this past work. My instinct is that this presents an opportunity to explore how/whether harmonization is suited for doctoral training, the role of pilot projects within broader initiatives, and impact that closeness to the method of data harmonization might have.\nLinks:\n\nhttps://www.maelstrom-research.org/network/reach\n\n\n\nCAPACIty\nTtle: Capacity: Building CAPACIty for pediatric diabetes research and quality improvement across Canada\nAcronym: CAnadian PediAtric diabetes ConsortIum (CAPACIty)\nContact: Shazhan Amed \nReason: Across Canada, focus on clinical data. A lot of work already achieved, and harmonization will start soon. Will use a federated approach for statistical analysis.\nNotes: A network of 15 childhood diabetes centers from across Canada. Went through four years of administrative work, and is now just starting harmonization after finally going through all theose hurldes, despite being very eager to get into the data work early on. Despite these challenges, Isabel thinks they will be very receptive to participating in the study.\n\n\nSHAIRE\nTitle: SHAIRE: Scientific & Health Data Assets In Respiratory Research\nContact: Sanja Stanojevic \nReason: New project just starting, very interesting and dynamic.\nNotes: Extremely new, just got funding very recently. I don’t know that much, to be honest. Could potentially provide redundant value to my study as Capacity, but need to find out more.\n\n\nRespiratory study\nTitle: High-dose docosahexaenoic acid for bronchopulmonary dysplasia severity in very preterm infants: A collaborative individual participant data meta-analysis\nContact: Isabelle Marc \nReason: Very specific and small project, very clinical.\nNotes: A very small project, harmonizing two datasets. I asked if this scale of work is common and Isabel says that it is, so it’s not an outlier.\nLinks:\n\nhttps://maelstrom-research.org/study/n3pi-hi\n\n\n\nMORGAM\nTitle: MOnica Risk, Genetics, Archiving and Monograph\nContact: Kari Kuulasmaa \nReason: European, started several years ago.\nNotes: Older project, ended around 10 years ago, the PI is retired. Might be valuable for looking at broader impact and potential offshoots after the work has been completed.\nLinks:\n\nhttps://www.maelstrom-research.org/network/morgam\n\n\n\nLifecycle\nVery improvized approach to data harmonization, did a lot of “manual” work. According to Isabel, Julie Bergeron will be able to tell me more.\nLinks:\n\nhttps://pmc.ncbi.nlm.nih.gov/articles/PMC7387322" - }, - { - "objectID": "notes/potential-cases.html#recommended-but-no-longer-considered", - "href": "notes/potential-cases.html#recommended-but-no-longer-considered", - "title": "Potential cases", - "section": "Recommended but no longer considered", - "text": "Recommended but no longer considered\n\nCanPath\nTitle: CanPath, formerly called the Canadian Partnership for Tomorrow’s Health\nContact: Noah Frank \nReason: One of the most important harmonization initiative, infrastructure oriented, long-term started more than 10 years ago.\nNotes: Other contacts include John Mclaughlin and Paula Robson.\nMy instinct is to look at how things change over the course of a very long and institutional initiative, especially across discrete phases marked by leadership transitions. But the history here is so vast and I will probably not get much of it through a series of interviews.\n\n\nMindmap\nPromoting mental well-being and healthy ageing in cities. Seems very similar to CanPath in terms of scope and governance, and I would likely face similar challenges." + "text": "Date\n\n\nTitle\n\n\nCategories\n\n\n\n\n\n\nFeb 14, 2025\n\n\nWeek notes (2025-W07)\n\n\nweek notes\n\n\n\n\nFeb 7, 2025\n\n\nWeek notes (2025-W06)\n\n\nweek notes\n\n\n\n\nJan 31, 2025\n\n\nWeek notes (2025-W05)\n\n\nweek notes\n\n\n\n\nJan 25, 2025\n\n\nWeek notes (2025-W04)\n\n\nweek notes\n\n\n\n\nJan 24, 2025\n\n\nOn the role of AI in my research\n\n\nAI / LLM, Methods, QDA\n\n\n\n\nJan 18, 2025\n\n\nWeek notes (2025-W03)\n\n\nweek notes\n\n\n\n\nDec 18, 2024\n\n\nTechnical specs for this website\n\n\nwebsite\n\n\n\n\nDec 9, 2024\n\n\nReflection on first team meeting\n\n\nmeeting notes, general thoughts\n\n\n\n\nDec 9, 2024\n\n\nHello World!\n\n\nintroduction, website\n\n\n\n\n\nNo matching items", + "crumbs": [ + "Blog" + ] }, { - "objectID": "posts/2024-12-09-first-team-meeting.html", - "href": "posts/2024-12-09-first-team-meeting.html", - "title": "Reflection on first team meeting", + "objectID": "posts/2024-12-09-hello-world.html", + "href": "posts/2024-12-09-hello-world.html", + "title": "Hello World!", "section": "", - "text": "Last week (2024/12/04) I finally met with David Buckeridge, Tanya Murphy and Aklil Noza in person. The meeting was meant to convey my vision for the project to the whole team, to align perspectives, and to articulate how this will actually work in practice.\nThe gist is that I will be investigating the role of social and cultural factors in data-sharing initiatives such as CITF and other Maelstrom-affiliated projects, and how these relate to, overlap with, or conflict with technical and institutional/administrative factors. To be clear, these are all very important aspects of data-sharing, but we generally recognized that the social and cultural aspects are under-explored relative to their impact.\nWe briefly talked about how we will go about selecting cases, and I emphasized the importance of strategic case selection. This also involves carefully articulating the project’s goals so that the cases will complement them. We agreed that the dataset will likely comprise between 12-15 interviews of around 60-90 minutes in length with representatives from 4-5 cases (one of them being CITF), in addition to representatives of the Maelstrom team. Maelstrom will serve as a “fixed point” that limits the scope of the cases’ breadth and ensures that participants have a common frame of reference. It also potentially allows me to “offload” or “consolidate” reference to technical and administrative aspects of data-sharing through targeted interviews with Maelstrom personnel, instead of dealing with those things with the representatives for each case.\nWe discussed timelines and overlap with Aklil’s work, which will be more concerned with focus groups with CITF databank users. There is definitely overlap with the emphasis of my own work and we will coordinate data collection to enhance the potential for analytical alignment.\nAfter the meeting I chatted with Tanya and Aklil who helped familiarize me with the bigger-picture theoretical discourse and tensions in epidemiology. Much of it seemed familiar since these concerns are common across disciplines, but I still need to read more to concretize my understanding. Tanya recommended I read the “Baby Rothman” which is a condensed version of a very long-lived textbook in this field, among a few other papers she sent me.\nOverall, this meeting got me really excited about this project :)" + "text": "Welcome to the website for my CITF Postdoc! This will serve as a hub for documenting and sharing my work. I decided to do this as a way of managing and sharing always-updated drafts of research protocols with my supervisor and team members, but it is also generally useful for keeping my thoughts organized. I will also use this blog section to write my thoughts as the project progresses." }, { - "objectID": "posts/2024-12-11-technical-specs.html", - "href": "posts/2024-12-11-technical-specs.html", - "title": "Technical specs for this website", + "objectID": "posts/2025-01-24-ai-in-my-work.html", + "href": "posts/2025-01-24-ai-in-my-work.html", + "title": "On the role of AI in my research", "section": "", - "text": "I’m using this website as a way to help organize and share key documents and resources. The research protocols are in flux at this stage in the project’s development, and this will make it easier to distribute up-to-date drafts with partners, while simultaneously enhancing transparency.\nThis post outlines the technical specifications for this website and outlines a roadmap for its further development. It will therefore be continually updated as the site evolves." - }, - { - "objectID": "posts/2024-12-11-technical-specs.html#fundamentals", - "href": "posts/2024-12-11-technical-specs.html#fundamentals", - "title": "Technical specs for this website", - "section": "Fundamentals", - "text": "Fundamentals\nThis website is based on Quarto, a platform for writing and publishing scientific and technical writing. I had used quarto before but without fully understanding it, and now I am starting to see its elegance.\nI had started off using Hugo, but there were too many limitations that Quarto was able to accomodate. You can find an older version of this post reflecting that setup here: #2346852.\nThe site is hosted on GitHub Pages. The repo is located at https://github.com/zackbatist/CITF-Postdoc." - }, - { - "objectID": "posts/2024-12-11-technical-specs.html#generating-pdfs", - "href": "posts/2024-12-11-technical-specs.html#generating-pdfs", - "title": "Technical specs for this website", - "section": "Generating PDFs", - "text": "Generating PDFs\nAs an avid user, one thing I really like about Quarto is the ability to generate PDFs alongside html versions served over the web. I started tinkering with includes but I need to review how Quarto passes info from YAML frontmatter. It is not at all straightforward and I will need to experiment a bit more with this to get the hang of it." - }, - { - "objectID": "posts/2024-12-11-technical-specs.html#archiving-and-version-control", - "href": "posts/2024-12-11-technical-specs.html#archiving-and-version-control", - "title": "Technical specs for this website", - "section": "Archiving and Version Control", - "text": "Archiving and Version Control\nEvery change is tracked using git. I would also like to archive each research protocol in Zenodo once they reach a point of stability. This would ensure that they ca be aassigned DOIs and detailed metadata, which will make them easier to reference.\nHowever, I do not want to rely on Zenodo’s GitHub integration for two reasons: (1) I want this to be as platform-agnostic as possible, and (2) that system relies on GitHub’s release system which operates on the level of the whole repository rather than specific files.\nI might be able to write a custom CI workflow to archive specific files to Zenodo using their API. But, I want to be able to toggle this option, rather than have it occur for every single detected change. Maybe I can accomplish this by pushing the changes that I want to archive to a dedicated branch that the CI workflow is configured to operate on. Or it might be easier to simply do this manually, since I’m not sure I will be using it that often anyway." + "text": "AI is upon is, and although I would probably be ok if it wasn’t around, I have been (and still am, to a certain extent) tempted to use it in my research. So here I’m gonna articulate some of my thoughts on AI. This isn’t written to convince anyone, or even to convince myself. Just to lay out all my thoughts and take stock of my preconceptions, disapointments, hopes and desires, etc.\nAlso, I’m gonna use AI, LLM and whatever other brand names and marketing buzzwords interchangably here. Draw whatever conclusions you want about that.\nI see AI as being potentially useful in a few productive activities I regularly engage in:\n\nTranscribing spoken words into written text\nTranscription is a significant component of processing interview data, and this can be extremely slow work. It’s a lot easier to edit a transcript produced through a computer algorithm rather than start from scratch. I used trint, otter and other similar tools before all the AI hype, and more recently I’ve been using whisper to transcribe voice notes that I record while I’m waiting for the bus or drifting off to sleep. I’m not really sure how they’re much different, to be honest. Is AI just a rebrand of natural language processing in these contexts? Either way, I will most certainly be using some automatic transcrion tool in my research.\nSummarizing, breaking down and simplifying complex bundles of ideas\nI do a lot of reading, and it can be hard to get through everything on my list. I therefore make lots of compromises and refrain from reading some things because I just can’t make enough time to get through everything. I imagine that AI can help summarize some key points across a whole corpus of articles on my to-read pile, and I may try it out once I have time to figure out the right tooling for the job. However, I do gain a lot of value from the process of reading. Specifically, as a scholar of scientific practice, I’m interested in the language and rhetoric authors use to describe and situate their methods and findings, and I’m not sure if automatic summary tools can capture and communicate this nuance in ways that I want.\nGenerating code snippets for data processing and visualization\nThis is arguably the most productive potential application I can imagine. Specifically, I’m thinking about using this to generate R code that processes and visualizies data according to imagined outcomes. This is directly relevant to a project I’m working on where I’ve already finished the workflows for scraping and processing the data, I have the questions I want to ask of it, but I don’t have the practical know-how to generate the code that will allow me to address them. ggplot is just so dense to me, and stitching together code snippets from stack exchange is a major pain in the ass that produces a horrible abomination of code that would not pass the muster of any rigorous code review. What’s more, those queries to search stack exchange are already half-formed AI prompts! At least an AI would generate some harmony in the code, and I might learn something by having a tidy and consistent template.\n\nI’m more ambivalent and critical about using AI in these contexts where it’s been really hyped:\n\nAny form of writing, including generating emails and abstracts\nFor me, writing is a creative process and a way of unerstanding. It’s a mechanism through which I come to learn about something. The experience of drafting and revising a document is crucial to my research process. This is especially important for honing my position as a scholar at the intersection of various disciplinary communities, who have distinct language and modes of communication.\nQuerying for truth claims\nTo be clear, the idea that knowledge can be total, absolute and disembodied is deeply flawed, and the popular reception of AI as a neutral observer and reporter of nature makes me sad. That being said, I’m still ambivalent about the potential for specialized, home-grown LLMs as means of parsing, sorting through and obtaining greater value from under-used resources. There are patterns in even the messiest and least formal documents we create, and even if we can’t draw information from these documents, LLMs may be useful to help us reflect on the circumstances of their creation. I keep thinking about Shawn Graham’s twitter bots in this context (which were not based on AI, but whatever), which attempted to spit out segments of artificial reports and fieldwork drama, which real archaeologists often related and resonded to. These responses were interesting to me, often expressed as collective fascination, titilation or disgust, and reminiscient of the apprehension one might experience when hearing your own voice played back while standing at the opposite end of a long hallway. Reacting to distortions of your own experience from very different perspectives can be a really powerful reflexive exercise.\nAs a brainstorming tool, or as a rubber duck\nI’ve heard about people using AI chatbots as agents to bounce their ideas off of. Kind of like eliza, but for productive work. While I think it’s intriguing, I don’t know where I’d start. Also, drawing up the prompt and figuring out how to ask the right questions may already be enough to get the ideas flowing. I think I already do this in some ways by drafting little ephemeral notes, usually directed toward a specific person or imaginary audience while anticipating their feedback. It also somehow seems like a perverse way to de-socialize work, and in a world where students and postdocs feel increasingly isolated, I’d much rather solicit and provide feedback among peers. This has been the foundation of some of my most solid friendships and professional partnerships, and should be encouraged.\n\nI also have some previously-unstated opinions in relation to some common critiques of AI:\n\nProcess versus product\nAI seems to be really good at devising formulaic outputs. That is, it’s good at getting things to look like things whose shapes are already well-defined. This can be valuable in various use cases, like writing emails according to a template or translating texts between languages. I could imagine it being really helpful for those who are coming into a field where certain skills are taken for granted, such as learning how to write “proper” academic emails as a student who is not fluent in english. Imagine being up against a deadline for a job application, while also being knee-deep in unpaid work to get your name out there; an LLM could be a godsend. So I don’t discount easy outputs as inherently bad. A standard output for one is a week-long struggle for another, so I think this distinction between product and process is a false and misleading dichotomy.\nBad instructions\nSometimes I find it really hard to believe that people could earnestly follow whatever an AI tells them. But I think we’re getting to the point of urban mythmaking, similar to the older wariness about following your GPS into a lake. There’s a story behind every warning sign, even if it’s a projection of what you think might happen if you disregard it.\n“Intelligence”\nOne weird thing about AI branding is the smushing together of some unified idea of what constitutes “intelligence”. We’ve already been through this with “smart” gadgets, which have always just been ways to capture consumer products under a platforms proprietary injected plastic molds and information protocols. AI is literally just a way to sell you a new version of the smart gadget you threw out last year.\nTruthiness, i.e., AI’s ability to sound authoritative while also making false claims\nI cringe at any retort to a screenshot of AI giving a wrong definition of a thing. Accuracy of responses should come secondary to critique of the notion that all forms of knowledge can be presented in terms of absolute, disembodied and universally truths. For example, when people ridicule AI’s inability to identify the capitols of various nation states, I see missed opportunities to challenge the value of any answer that anyone might provide. True subversion would be to reject or re-frame the question and the simplicity with which it is addressed.\nOne another related note, I see a lot of weird parallels between myths about truth claims made by AI and by practitioners of qualitative data analysis (QDA) — and, as a qualitative researcher, this is obviously a bit unsettling. Specifically, in both QDA and AI, there is no actual attempt to make absolute truth claims, but the focus is rather on attempting to identify and draw out meaningful elements of elicitations in a corpus, and to trace patterns between them. In my current opinion, the key difference lies in positionality. Any QDA researcher who laim that their cases are representative of all experiences will be laughed out of the room. Meanwhile, AI is lauded for the claims made by their creators that it can derive unambiguous and concrete knowledge from inherently situated and biased data sources. Humility is key while contributing to collective knowledge bases, and AI risks changing the dynamic away from deriving greater value from constructive discourse and toward a system where the loudest voice in the room wins.\nClimate change\nAI uses a lot of energy, and is therefore said to be wasteful. However I think there are certain wasteful components of AI. For instance, generative models that spit out a full sentence to wrap around the answer to a question don’t have to do all that extra work. Also, not everyone is reliant on fossil fuels, and the critique that AI is necessarily bad for the environment is laden with a thick American accent (as is the case with so many of the loudest opinions on the internet).\nThat being said, there are enormous problems with resource allocation in AI, and I’m not trying to dismiss all concerns. I see these concerns as relating to the distribution of power and wealth in society at large, and AI is one aspect of this. Sometimes I wonder if comparisons can be made between using AI in selective research contexts and eating a burger or a banana, which each have their own environmental costs. But thinking in this way is a bit of a trap.\n\nI also see that rhetoric, including anxieties about AI, differs in the various communities I participate in:\n\nIn digital-x, where x = {archaeology | humanities | librarianship | whatever}\nThere’s a lot of experimentation going on. Honestly, I don’t know much about it and I tend to scroll past any discussion about AI applications in archaeology that appears in my feed. Part of me sees it as a passing trend, but it could be better framed as a wild frontier, as is the case with many other things in digital archaeology. People are still in the process of taming the landscape, to make it better suit their needs, and maybe I’ll join in once the settlement is established. But I’m not personally motivated by the dynamism of the current landscape, at least in this particular domain.\nEpidemiology, biostats, public health\nI’m still too new in this community to really make sense of this yet. I’ll continue to watch and learn and listen.\nBroader social science and humanities, as well as libraries, archives and museums\nCritique tends to follow broader, more abstract, and more common-sense lines of thought. In my view, much of this does not really account for the material problems and imperfections in which the social sciences and humanities operate. AI is a lifeline for many people in an overworked, overburdened, under-resourced and hyper-competitive environment, and tut-tutting around how other people use AI sometimes comes across as tone-deaf and disrespectful. Some criticisms of AI being used in real, practical circumstances make me second guess critics’ supposed commitments to improving the social experience of research. The fundamental problem is inequitable access to financial and material resources, and AI’s prevalence is a major symptom of, or — depending on your perspective — resolution to that. People’s who recognize this have no choice but to post broader and more abstract criticisms, which come across as somewhat hollow when disconnected from real and tangible experiences.\nSenior faculty\nProbably the most ambivalent of all communities are senior faculty, who want AI to be useful and will test the waters without fully committing. Which is fine and very prudent, and honestly I identify most with this perspective, despite my position as a lowly postdoc.\nGrad students\nI engage with many grad students. I share my workspace with grad students and encounter them constantly in my day to day neighbourhood forays, where I overhear and sometimes participate in conversations about AI. In my new work environment (Epidemiology, Biostatistics and Occupational Health), the grad students who I engage with have a relatively positive perception of AI. They seem to find greater value in the ability to automate complex processes, using it as a black box of sorts, with predictable and abstracted inputs and outputs, which they see as especially helpful for coding. Outside of this space I’m encountering way more diversity of thought on AI, and I’m not quite sure how to group these viewpoints to structure a proper reaction. I think this in fact contributes to the multitude of perspectives, since no one really cares that much one way or the other to really have a strong opinion (though I sense an overwhelming dissatisfaction when it comes to AI in consumer contexts; this post is largely about productive uses of AI in research and pedagogy).\nI was also told about students learning RStats by just having AI generate their code. The person who pointed this out to me related this to the growing misconception that to learn stats you first need to learn how to code. This in turn relates to the sense that to learn how to do RStats, you just need to memorize a series of steps and copy the text from the slides into the IDE. So, in the end, AI reveals the inadequacy of the teaching mechanisms for programming and stats classes, similarly to how AI has revealed the inadequacy of essay-writing as a pedagogical technique.\nOn the other hand, some students are concerned about dulling their skills, or even not being able to take advantage of opportunities to learn new skills, due to the temptation to automate these tasks. Some upper-year PhD students are glad that they were trained in the fundamentals prior to the AI hype wave. This makes me wonder how students are determining what skills they think they need to know how to do on their own and what is worth running through an LLM. Does it basically operate as a bullshit sensor, where you can smell from a distance that the work is just gonna be tedium and irrelevant? Or is it more out of practical necessity, where you’re stretched so thin that you simply have to rely on these tools to achieve anything meaningful, almost as a mechanism for salvaging one’s work from the claws of austerity? In either case, this points to PhD programs’ inadequacy to match students’ needs and desires, and overwhelming amount of administravia or (seemingly) irrelevant work that students are made to do, which get in the way of their true interests.\n\nMaybe I’ll have more to share some other time." }, { - "objectID": "posts/weeknotes-2025-W03.html", - "href": "posts/weeknotes-2025-W03.html", - "title": "Week notes (2025-W03)", + "objectID": "posts/weeknotes-2025-W04.html", + "href": "posts/weeknotes-2025-W04.html", + "title": "Week notes (2025-W04)", "section": "", - "text": "I’m trying out a new way to track and communicate my progress on this project. Every week I’ll write a post to track the work I’ve been doing and reflect on my activities. I’ll try to maintain this document throughout the week, tidy it up and post it here on Friday afternoons. However the specific process will probably vary as it grows into the rest of my workflow.\nI’m purposefully trying to not tie this into the personal knowledge management trend. It’s for me and my own purposes, and I don’t want to get bogged down with the unabashed managerial phoniness that belies the PKM phenomenon.\nAnyway, I didn’t take notes on my work this past week, but here’s an overview of what I’ve done from memory:\nContinued to take notes on readings produced by the Maelstrom Project and its partners.\nContinued to investigate and maintain notes on potential cases.\nSet up a placeholder document for notes on methodological concerns.\nMet with David for our bi-weekly check-in (meeting notes are private, at least for now). I was also given access to the lab’s private git server but haven’t really had much of a chance to explore what it’s being used for or devise my own plans to make use of it.\nWorked extensively on the ethics protocol. David and I went back and forth deciding on whether this was necessary, given how the project is based on his grant which already has IRB approval. But it’s better to play it safe than sorry, especially when it’s necessary to obtain informed consent. So to this end, I revised the research protocol and responses to the ethics form, and I also drafted an informed consent document. I’ll share all these things once the whole package is put together (probably in a week or two), but my responses to the ethics form already appears on the ethics protocol page.\nI simplified the way private documents are handled in the quarto project and git respository. May still need to so some fiddling, especially for draft blog posts and notes.\nI started drafting a blog post about the potential use of AI/LLMs in my research. Stay tuned.\nOn a related note, I watched this recent video about the use of LLMs in qualitative data analysis, which did not prompt me to draft the post but which is well-timed, nevertheless.\nI worked a bit more on the data management plan, which prompted me to think more about which QDA software I’ll use. I started filling in a university form to use cloud services provided by MaxQDA, but stumbled upon qualitative-coding (abbreviated as qc), an open source CLI-based QDA system. It represents a very innovative approach to QDA rooted in computational thinking and plain text social science, while also remaining true to the core tenets and purpose of QDA, which make it unique and difficult to design software for. If this is the sort of thing that appeals to you, I highly recommend you read the docs.\nI had a bit of trouble installing it and getting it running, but I met remotely with Chris Proctor, who develops the tool through his work at the Computational Literacies Lab, based in the Department of Learning and Instruction at University at Buffalo (SUNY). He helped me resolve some issues, gave me a guided tour of the system and we just talked about the overall state of qualitative data analysis and its tooling. I don’t really have the capacity right now to post everything he showed me but I will definitely be posting about my experiences tinkering around with qc in the coming weeks.\nSimilarly, I asked on Mastodon about whether there are any tools that might support automatic generation of transcripts that include support for specialized notation. A few linguists and conversation analysis scholars responded with recommendations to use GailBot, and with discussion about the tool’s capabilities and limitations. I requested access to the software but haven’t heard back from the dev team yet. I also created a thread on the whisper github repo, which I now realize it a bit of a naive place to put it, and it hasn’t yet gotten any responses.\nI attended a talk from the epidemiology seminar series, which went wayyyy over my head.\nDid my usual amount of engagement on masotodon, I suppose. And I continued to make new friends in the department too :)" + "text": "This week was a bit slower than last. I spent much of it finalizing the content for my IRB application, and the rest preparing for a meeting with a key stakeholder relating to my research.\nThe IRB application is more or less done, just waiting on David and the department head to sign off. It was a major opportunity to re-organize my research protocol and related documents. I shuffled some things over into various placeholder sections in my methodology notes, and pushed a revised research protocol to the website.\nYesterday I posted about on the role of AI in my research. It’s mainly meant to lay out my current state of thinking on AI. I’m not fixed to those ideas, and I think there is much more nuance than I do justice to in that post, but putting it on the page helped me consolidate and put aside some scrambled opinions.\nAfter playing around with qc on Sunday, I started to assemble some feedback. I may post a github issue later this week, once I’ve had a chance to consolidate and edit my thoughts.\nI participated in the weekly CITF logistics update, after which I met with Aklil to discuss the overall strategy for her project and strategize on how we might form the focus groups for that work. We’re gonna meet more regularly, just to share some updates on our respective projects which have a lot in common.\nOn Thursday I met with Isabel Fortier, with the intention of discussing data harmonozation initiatives that might serve as potential cases. It was a bit of a tough meeting. During the first 45 minutes I struggled to communicate the purpose of my work, but I think by the end we reached a greater understanding of what this work will entail and the unique perspective it will bring. One surprising outcome that I still need to think through is Isabel’s suggestion that I slow down a bit, immerse myself more in the world of data harmonization. While she is absolutely right that I’ve been rushing through this first month, I do feel pressure to get the project going and to start accumulating data — I felt a similar pressure when starting my PhD, too. So I need to put my eagerness aside so that the data are relevant and of good enough quality. Isabel offered to schedule regular meetings with me, and even to have me work in the Maelstrom office once a week, and I’m extremely grateful for her support! Plus, I’ll get to have lunch with my mom who works in the same hospital complex, one building over :)" }, { - "objectID": "posts/weeknotes-2025-W05.html", - "href": "posts/weeknotes-2025-W05.html", - "title": "Week notes (2025-W05)", + "objectID": "posts/weeknotes-2025-W06.html", + "href": "posts/weeknotes-2025-W06.html", + "title": "Week notes (2025-W06)", "section": "", - "text": "The first part of my week largely involved reading and taking notes on methodological texts. I focused on case study design and qualitative coding techniques. I plan to continue my methodology readings on coding techniques, memoing, interview methods and systematic note-taking, as well filling in gaps in my understanding of grounded theory and related debates. These readings are especially useful in this planning stage, but also serve to fill time while I wait for my IRB approval.\nOn that note, I finally submitted my ethics application on Thursday. I expect an expedited review based on the low-risk nature of the work. I posted the materials I submitted on the ethics protocol page.\nI had my biweekly meeting with David, and he was very encouraging.\nI met with Isabel Fortier again yesterday and we came up a list of six projects that may serve as potential cases. We will discuss them in greater depth next week.\nI finally sent some feedback on qc.\nNext week I also need to fulfill a few commitments not as related to the postdoc: I need to work on a peer-review I had committed to, continue assemmbling constructive feedback for qc, and continue going through the DOAJ for the diamond.open-archaeo initiative." + "text": "This week was a bit slower than usual. The highlight was my meeting with Isabel to go over details about harmonization procedures and to discuss projects that may serve as potential cases. I got a better sense of the community composition and the kinds of challenges that are commonly experienced, and Isabel was able to provide me with contacts to get in touch with once I’m ready to begin collecting data.\nI was less active with regards to my methodology notes this week, having spent much time re-organizing and consolidating them. I’m also reading about coding techniques and the foundational principles and debates surrounding grounded theory, but there is a lot of ground to cover and I’m taking it all in before I begin recording my thoughts in a systematic way.\nI did some non-postdoc service this week too. I started a peer-review that I had committed to, participated in the monthly SSLA meeting, and completed the first-pass screening of archaeology-related journals in the Directory of Open Access Journals to verify their inclusion in diamond.open-archaeo." }, { "objectID": "qda-protocol.html", diff --git a/sitemap.xml b/sitemap.xml index b4ca591..d4d9727 100644 --- a/sitemap.xml +++ b/sitemap.xml @@ -5,84 +5,88 @@ 2025-01-22T21:05:38.131Z - https://zackbatist.info/CITF-Postdoc/posts/weeknotes-2025-W06.html - 2025-02-07T19:44:48.625Z + https://zackbatist.info/CITF-Postdoc/posts/weeknotes-2025-W07.html + 2025-02-14T19:53:21.488Z - https://zackbatist.info/CITF-Postdoc/posts/weeknotes-2025-W04.html - 2025-01-25T16:34:43.500Z + https://zackbatist.info/CITF-Postdoc/posts/weeknotes-2025-W05.html + 2025-01-31T18:08:15.277Z - https://zackbatist.info/CITF-Postdoc/posts/2025-01-24-ai-in-my-work.html - 2025-01-25T16:06:33.708Z + https://zackbatist.info/CITF-Postdoc/posts/weeknotes-2025-W03.html + 2025-01-18T17:31:25.738Z - https://zackbatist.info/CITF-Postdoc/posts/2024-12-09-hello-world.html - 2024-12-18T16:31:38.955Z + https://zackbatist.info/CITF-Postdoc/posts/2024-12-11-technical-specs.html + 2024-12-18T20:38:01.508Z - https://zackbatist.info/CITF-Postdoc/posts.html - 2025-01-19T01:57:01.158Z + https://zackbatist.info/CITF-Postdoc/posts/2024-12-09-first-team-meeting.html + 2024-12-18T16:31:38.954Z - https://zackbatist.info/CITF-Postdoc/notes/methodology-notes.html - 2025-02-12T21:31:11.371Z + https://zackbatist.info/CITF-Postdoc/notes/potential-cases.html + 2025-02-04T20:47:30.293Z - https://zackbatist.info/CITF-Postdoc/notes.html - 2025-01-22T21:14:13.480Z + https://zackbatist.info/CITF-Postdoc/notes/maelstrom-readings.html + 2025-01-28T20:27:53.768Z - https://zackbatist.info/CITF-Postdoc/index.html - 2025-01-21T23:24:59.599Z + https://zackbatist.info/CITF-Postdoc/interview-protocol.html + 2025-01-21T21:35:24.613Z - https://zackbatist.info/CITF-Postdoc/ethics-protocol.html - 2025-01-30T21:40:04.298Z + https://zackbatist.info/CITF-Postdoc/glossary.html + 2025-01-19T01:58:44.408Z - https://zackbatist.info/CITF-Postdoc/context.html - 2025-01-31T16:51:55.950Z + https://zackbatist.info/CITF-Postdoc/data-management.html + 2025-01-15T16:48:39.050Z https://zackbatist.info/CITF-Postdoc/case-selection.html 2025-01-28T15:47:37.243Z - https://zackbatist.info/CITF-Postdoc/data-management.html - 2025-01-15T16:48:39.050Z + https://zackbatist.info/CITF-Postdoc/context.html + 2025-01-31T16:51:55.950Z - https://zackbatist.info/CITF-Postdoc/glossary.html - 2025-01-19T01:58:44.408Z + https://zackbatist.info/CITF-Postdoc/ethics-protocol.html + 2025-01-30T21:40:04.298Z - https://zackbatist.info/CITF-Postdoc/interview-protocol.html - 2025-01-21T21:35:24.613Z + https://zackbatist.info/CITF-Postdoc/index.html + 2025-01-21T23:24:59.599Z - https://zackbatist.info/CITF-Postdoc/notes/maelstrom-readings.html - 2025-01-28T20:27:53.768Z + https://zackbatist.info/CITF-Postdoc/notes.html + 2025-01-22T21:14:13.480Z - https://zackbatist.info/CITF-Postdoc/notes/potential-cases.html - 2025-02-04T20:47:30.293Z + https://zackbatist.info/CITF-Postdoc/notes/methodology-notes.html + 2025-02-14T19:47:03.624Z - https://zackbatist.info/CITF-Postdoc/posts/2024-12-09-first-team-meeting.html - 2024-12-18T16:31:38.954Z + https://zackbatist.info/CITF-Postdoc/posts.html + 2025-01-19T01:57:01.158Z - https://zackbatist.info/CITF-Postdoc/posts/2024-12-11-technical-specs.html - 2024-12-18T20:38:01.508Z + https://zackbatist.info/CITF-Postdoc/posts/2024-12-09-hello-world.html + 2024-12-18T16:31:38.955Z - https://zackbatist.info/CITF-Postdoc/posts/weeknotes-2025-W03.html - 2025-01-18T17:31:25.738Z + https://zackbatist.info/CITF-Postdoc/posts/2025-01-24-ai-in-my-work.html + 2025-01-25T16:06:33.708Z - https://zackbatist.info/CITF-Postdoc/posts/weeknotes-2025-W05.html - 2025-01-31T18:08:15.277Z + https://zackbatist.info/CITF-Postdoc/posts/weeknotes-2025-W04.html + 2025-01-25T16:34:43.500Z + + + https://zackbatist.info/CITF-Postdoc/posts/weeknotes-2025-W06.html + 2025-02-07T19:44:48.625Z https://zackbatist.info/CITF-Postdoc/qda-protocol.html