From d82f3686c15344d6b80d316a38c356930a8cb3b8 Mon Sep 17 00:00:00 2001 From: Jay Feulner Date: Mon, 31 Mar 2025 14:23:57 -0500 Subject: [PATCH 1/2] Add SQL scripts for preliminary data collection and sales analysis - Created `with.pgsql` for querying salespeople associated with California dealerships. - Added `W2_case_study.sql` containing exercises for data collection, sales extraction, and analysis of Bat Scooter sales. - Introduced `case_study_exercises_with_js_blocks.sql` for structured SQL exercises and sales data analysis. - Developed `w2.ipynb` Jupyter notebook for interactive SQL exercises and data exploration. - Implemented `week2_case_study.sql` for comprehensive SQL exercises focusing on scooter products and sales information. --- .gitignore | 16 + 2025-03-30_sqlda.docx | Bin 0 -> 20044 bytes ... \342\200\223 a Case Study en-uCertify.md" | 1482 ++++++++++++++++ ...\342\200\223 a Case Study en-uCertify.sql" | 1495 +++++++++++++++++ Lesson02/with.pgsql | 8 + W2_case_study.sql | 281 ++++ case_study_exercises_with_js_blocks.sql | 175 ++ w2.ipynb | 229 +++ week2_case_study.sql | 76 + 9 files changed, 3762 insertions(+) create mode 100644 .gitignore create mode 100644 2025-03-30_sqlda.docx create mode 100644 "Lesson Using SQL to Uncover the Truth \342\200\223 a Case Study en-uCertify.md" create mode 100644 "Lesson Using SQL to Uncover the Truth \342\200\223 a Case Study en-uCertify.sql" create mode 100644 Lesson02/with.pgsql create mode 100644 W2_case_study.sql create mode 100644 case_study_exercises_with_js_blocks.sql create mode 100644 w2.ipynb create mode 100644 week2_case_study.sql diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..b5c7ae2 --- /dev/null +++ b/.gitignore @@ -0,0 +1,16 @@ +.qodo + +# Node modules and build output +node_modules/ +dist/ + +# Log files +*.log + +# Operating system files +.DS_Store +Thumbs.db + +# IDE / Editor directories +.idea/ +.vscode/ diff --git a/2025-03-30_sqlda.docx b/2025-03-30_sqlda.docx new file mode 100644 index 0000000000000000000000000000000000000000..60063f6131a2b45a9944da665b063b5125f1e2f6 GIT binary patch literal 20044 zcmeHvbzD{5)-Ih&hje#0N=Zp~r+^^cozf{t2nZ5NcXvpaNC?sm(o)iW*9QB2-+Rva z?sxw?{MK*7X0I{VTw~7hjAuM^uPHAD355X$1_uW=;_0QCu2=MO4GauS76J?m5%~Ss z&d9=^i4pW#7~LWX$$}EN;~C0)ylp`jEMDFiJovi%3{04{9xu^Q2+i^Qh?KfcUJ6+^ zDAoEM&ee<;l5V3dqikBB%&_mGNqZf)naeuSnDZOSOp_id!@E?BB|ED1#z)G@?X0!R zpYT2CakDgs`3zXlVQIt3BkXtmK0mBvj~0_Qj9_QAnmWSTw}x>*%zB`H_f(JT&6(1o z`8eyH{$0OUW6h;08a}*g5pi1&j3kosmmu=8dF@8@I(UsFplLFnA`$L3d^~bXZz}F+ zj-^)-B1>R8_n>5ipSf5Kiwhxo?c9U4HFh!ugUzs}N+kcALzp`&34_4Fz^q}wz|aAQ zXbD?eIT%?vJXUtKF|yZTbg{Ikj2*CAB1Y-l@k9vQq@`Sco2P2)Hg5HzJ~q)44B|-; z!w2G5+~F!JPK8(XHhL%TcDNwXdS8#$8z8f2v`W&yucZd#zzLI~xGLOvI+iA0 zFi0&17t1Y##+4toe0Gl*v(Wh!ZK)BGK&G@CqHbn1gx;o(%tty|ijGGxnM5j(U+wst zq7x&*T{|a)3*xHf^OKhroqN5hBHRaIJ&M`M{v?`3v{_$tmPnPoictBpXRYSdjPUc(hqlF} z*9wHRBzM=(#YrSEg-(xnVPIq(EjjKD@gR3o7-B#^#Uh0cws@O*C4c0T7S#Zaob9-0 z>fOIm*!@mN26dLm*-KC3GKCVIS<7N58}EY^Np#>#dd}Hc<5da&#KK#8&Q<A;C6O#3#+iWX1;tpB zTaS)0$CgHuJA&veXJ2xL(p$am>2^{15246J%g;pbZuUI(vg{Jqpf>3d3kvx3B6oNs zFbuX<-lX=8c_CFLxU0;(H{WtdA9#Z;s|E-0Dwq=_<)?rloFdi}-1KtXu;n)?g|bPt zbRz@h6J{GQwTwP|n`G<9hP~Vpg-R}E`c5pcHqHK?t=t}(TCCh=4i!@PDnX3s-E`gr z=fV2Q`>k#W#7rh<3u;~=3_ID59!FR3YUS@HSntySnqS)8Be__~(rPoyG;x2C@y_Q%lo9LS$K?X{P40bQT)cpBLH%`Ho>7lxn1l#eG$<- zKIN;P`%WQP3YS24rdSKIq~1fUQ?s;8GH9Xd{xlpCnk&tw zcQ4XA6_N87y-jFbS_n$?@HreA?z6v<^04KxC^6zM2#>6U+N)@SvTpNnE{SOq{^&rS z^~4qboER4MP!}s`LAJ;E>Y_}8Br(c@Fow6khcX$vT$H7^Dd=6%#(9XzmS+AwDU-=@WFnfw!cQgi+ z;ufCpgP`iKrz_whhN9tRDa=8rWM4u@j-U$-mS~)^UKrTK86XkZUsD?u@Ho&?z7xYu zvh=hGeX{5m*O)gK`>0lSIQ-5q5;fz4j(0sIUtZr4xqoz$YBg7t)k*c32p{cnLS&R! z7^+CF@1y<&7b?GU;^Ki~>}0X0!fpsB5-V`kQfU08ZkOq={Ou#_DM$*b7*&k+K5&cL z2cJ`*PQQH(_HN+Gt3Dh~DcrSr+B<;<6SkpVs9zVSLPxOw*=u;ed+n*UwS$oz%YU8r zpF559zfSwloyPWGr~M~RGcnRP{BL;vhnRNjwEu?Zf0PJro%Y}G{HxP8U%s_dq67m2 z$AZ7%b~l{kSGKV=qh&wEU6-?Pet{6TZEV03!M7Az`QAycN@?%C6UD4kAssey$#Zz| zl84k~7n{yLVuN;6g_${cI8C0YT<~y_HZD+K(R-Y)4ew?p-rDH~HK;Vhq72|H+n?6>Y zf1vx>_Rv`;bdf~OdGd2<;Z;%J7e~U^#rt2abNhS8-;2}(s-P72J>eIUxMcI~m=(ke3du=Ect{3Png)SE}ga6K0n*LC!c z?(@_!e|xpI<0;lMQ7!}9o)A{KHU}m{MUP~GHZQp z<73&c>R6sE9cI_Zj@Nu6u2UwC3T?1GBZg;IwlCK#*016|E_N%ko_p|4Ox(ZEJ3Ysf zdxnRTsK#AT{^71E~$LPiC&79wQUmLGpZr8 z@B6?wEvq;!=d=kpwQ1_iAYM`=EZw^FK}x$dW$|VEYm)_Bo32KQdist%@)Rw;{{6Sz zs&+=md)&hTqmSz)osQ%-3Y>?OkIHrN<(y`+LZ=Ez_N4P&bzRnHA|6P+9gtfh-K;Kp zT{vUgmpy6Ns@zxfG@G%$deKb_{`l)gX<-)al5&MnmZs^_gZ9fYOs)7R6LZIRwk(p< z6XA#~c1}&MG-Wx4l>N^X?~lEHsf4_sFi`297&R4<@lii+|~{T z*Un+?^Cje~)3Vdg?(WwQT=wzfEss9(FkQyw9D87)IhvcEKUe~C}>=4y4 z>ss0KOXsBrn_;h=nl3kwWBB=GBn>H>l%5{#xEcxYc<|R)XWN>ov@gdx3A;43PA>>?x@H=Tq&^uLDZ>yCPzDZtDaS1PF>ewBqv6-v;>V}xyfa-e9P*GH@|(D}Zl^3F z12zcRz;X5{$t#f#lPXkLv_gok0zo_9X|ga}iV@WK;MMGv$6<;nHt;n=&^MQL`Wn_Kzg9=^C5H1wk@AU9Ko*8|uqneJ zoCp7QbQ+;b;pDa7oo)^%o*|9{jL^V)G@rH$8HqyP1oorf*=(-W(AH;L1t0(ImCz4kiQtGYL>NIW#xv7#?~97kc73ntEQ} zUu=Lg{uPeP29it2Sr|+8gMSOsmu93dCmK!gL9i~~LD=tw>nr9J*`}NCPOFcF%Uxr0 z>OpeqEg2JpqiF6OfgzTF_2O?5H?;bwdSr$zjb(xQOFdX8y)I2B8$kq6M{J@$ zK`5#&b_8IJKI>l!+!&+9#&;edoP_L-f7`WaJki0IWw$YNl2Fuu+t1b zE{5QdS&bd3Wv*5!5E;35k@3E6FA+5{rwBQ*kjW|6O>dD)CzT^)i%qPND&owqw&w;r zT?Z&ES#?YRC)5fo56Q&&AkDQf5oiIZXG*jMiN` z*=d)4`yKUHmj;MzDeheChn$2_+wXiJc?R2P3tDLlaJd!_%rpX>q|`EmKy%BMOm@Iw z8<$z6z`|{qV4eS&Z}#~(Xs*GS>5$1X2?Va;$Sz>WE-23h56jtSE{fRZ1giq`&c}R2 zw!X4-(M{PJ8Kdz|=tmP=b+z(6%<@-9GrhA+QO%2#OWp zVD1M;7DnT3kS3g5LSmHa0L5NvO~HKGr$a`mcW5c07Wp ze0y=l)%0WM#PRnASmOxr58gmN5X$~7JOq$H;Q@*eTO^lz(en#NUk3?YZ6s)|=3%TZ z+Kr$UFszYFfGJ*3N?eOhDz$h(myZUz8+AnFv8T=8_`s1wvGv{$mgxpETq@iAQV|K~ zeTWgPTxP=XYFa$`APe+rij5B=c-$h)Rw7cSKgRI56*Yk%z}H~ZuxUK;Ibvu~7VwFB zfQmW??PZ602LWFcLT)6jM{Z3z)%Tku;IjpwmDV8}_@CxmOwCJNOX)Wg-Q zqVi`JB_ANPC>LP8=Ec^$$n7Zoc$)4K41yE141pu>mLx&}L-6G$_*~|Fl|eu-Uz7rZ zOEhBoE#G!YylFX^NXYwGO2DtFVrFDFQG^JE%abeG4#N(^xz5NoCFo;aF42j z?OUmL!HISS??H2)S%D+X&-2oNQ1teA-b?zNRXGF?2z$Xg;1qRl3Wzuc2Jz|rgWxog zH6Ow?K@4c_iz{frj{SGL(4Te~KkV{OXOcbiWFSB{=r8Y`zKI1OReux+WNweP8hRmW z;n*9P$-o0nfmSEgn;7yH`CO_@F9EtyKR5&%hZzX`<`x>oTT}wQ-isH7GRi>%L8-3; za0&o!Q1n`wksm~&;M)?RKywK0EtuFC18C&Q0&Zu|%>>f$mk*=`Akl_QoC`Pwu|%Y~ zEE76}-g=3iEiF$yz8Th|8CD--eyt9g+uLtH<6B&SLW41k`Mo>^XwE^c!{`H!je}FK zsvxG>acqHHj{4TugMYiP%A;`lwfBt1AxTgmNc?3&er*z5T$xI|t`?=Yaim4%iRpD05a#_aWW~yc+xlOlAJ$ zM&ze&fK$HtMk1FEFMmqexLpC1@cpAZa(vU{?BZcYwHHAgR{bLI26Djb|-8RzT029b9wyJ&VLYcSe z{d6--Cr>y2L}pB1W>KbsTeF4+hauui8OBo7^ygD~L%Q0sAeW?dq0QpUl1YKw@>vO` zi2=8Tht6zFR@H&+Y=tYX3z^P58}tGE0^wl=b9Gi#PF)$;{O@U3%oj+67%HIzUv-P#m4z^{s(hjAkO$gafRqj8eT&}R6*{pRM z$i#WHC)(hU&7;<36!YaB@|pfT7K*VZN1KCn7alQQ_7`h;&GX#4YFyQmnwAUxO*AD6 zgC$3enfXHzl?@j~xl8w`u;cvKYGox|F^$y)@9W&uV=Zk};zC;y_P>rA^*G0y!3t>R z5nSlyjuI|%%r*NEEQ81trfN8zty?9%aF{O{kTEb7L2v&b$SY=iuABRtH6!_UrV7r> z0M-2dUC4B;p4hx^R3{{bWhziIfr68& zC(bQEZLNFlg{z=aAVUl#L~O_)r2QHk_6B&1{)S}=(cDfLn72QHH(IjZP!vEgaD1pA z7#R5*215MAK;V>b7?@F3dUvZc8+nV7ZvtK^Lw9R;e)V^nC-^O5ZesQu%~Mq4 zRISeK4^E#4QVpyv0BH3fKtulyFSmfk{S(k^e*l_7X{?hh3N#3#N88DU_eDZ-*hyn! zKS6`IwKtrxbp6-)mR@KOU-;e`;y1o{y2pz!Rw(j7x&e zf#z!12B-^h1CTf&=-;*tF^X^OO6EC;xiqVY3YxrQuCt&I(C z7WuZi!BrpJNIxm*u_J0bo86T9NH2g&zrQ_PDoT`s&GstheeG;Ul2f8P-`I97dmHt; z;P`Fn)r}g9=ovLZfjiTa3)1;dUu|!>KpHN+yu2%EE}(L(+J->D6*2cOYWFy34K-xM zG6u$OMFn+z-PvgR({0+Vd9nNoKca%6$@KaaJnj-8(g23r3gWcIaYiQKHvc;m7ft0G z#kB<}Zok<`?lUb1x>|nBo4MOY7R81$-DA1tjkE?AhXl~a5pv)~ANH1g#HRuCbsom~ z3<@-3!9X+irnn6hM*t$m3q|64Nt*Y2Nh+1Mxd9 zS}kQzpmaPMme#GU%-P>-LbBDEGS$5mLg?s2<>E`_(mFBxj@M<^4#3xDKQ$iSlUg7| z;fm%1#~Ok7f>Hxu&YQwPeG*z?KMA9i4)w<8x-CFZ$DklP{1qp}$o$oAaANm6P6Q0d zle|L%hw}!5+rtE~fERvG9QM0(aDQH3+&3t`N7$WR$O9S#>s2U^|C+)}|2c&-g!ixH z7UFuQDywp7v2mi0RC<*kb}*{QNnK=Z7ENYLzmFgIhi&nZa5R`$7m6A0(RVI;2-S@Rb=iPrPY zNP8s+`M}W`&fTaP_covzR1G2I0&n?-Yt5TcO3T(|oHNcZ;8V7r_~C*IBuC)Pi+gFt zpgJ%>$&*Z0pf5HQz2hw@s8WJ1QsLPe?F`LVZse0cV5MGM)p&iyo3E06>G-h_sF^5Q z0A?<>=5bRqHU3l0WOrLLIi>!nnaIZyfh#W^OhD- z>KQ^1n9Sqf)IPj2fj5N(vA-{fqLVPv!M$~H2hHo{XSQ5Sc(ahKU{DXi$Q>TmU;()o z2xp~@?`o9Gu2~SQ0H_-E!I<8y!v^x-Lx43SmiQw9tV3c^a(X3}Mk5Z$oo~7{-7xmG z?c1VtF#k+$;ExiHzf>=7-{iVjVdCK zbO$UyABnuEYX+rJ-UVs1$Ar8tpctKR`7Y%^Y1*`H>HXmrYJfJXa$qtW+q86u{i&~*ye->O01mGpDls%MJEjWJnesfVrbJ6Q(P zR(C^;HT(8k|4|OI`CSgm4mRm6$QI*cd1T+_SXQOFiif=c60Jx7NsVJS>r(T4ZIw48UAjX807nQ#6C*RpG>Q(lS}d zw_*+wkG}jAk7yfUw*U(LzrdogOTMvaxhl@f9@YE^v^oE}8F-5je}Zuzg11vq-3fQL zr#V5%3Q7vdr)_3vz)K0_4f^_QDuD`YzbS7Bkm~3SsE%kFhKoKF@OH$ob|}I=5pT7C z0-ampx8xT5OL7zbk=(>IX0sk3Zkg$Tm>L|xzbEGiq5v@ntu7RlsT0Y24c5ZC{FyrO z4SZrjwnWxy2%3Va2rXT%u=|61AVLcj2KQ(G;SfPZK8pSgmAxSbP3Za`m$gT(d(EJg zA3`gS3I)`AJGuim@y2gI8`QO3cAVRL-$ek~`qvb!wbC8ViO4+WOKzbG3OaWW8ju-f z{4~`6(K(E4{gK?*QPJ|WK#1v40;SQ}rixxRNbMXv&&aClOY}*|DO!JhqjstRYNr}U z2-n9&lLg8r72IXLiqE@jVpfHt3O~OmT(r$XENnW0G`r5|ccYRq64(q#pYE%znZZRH zs7PE5h@aicDaSawU}r4z+N_(HBpCC^lcAJeepbF#=1vz&|5UOjUhRb?M+D->IE?{6 zH<;&?KASSPlIKJzAbCdpA$j)O{Z;aug937m)ZgbCyJ_ZsnQKO$=EL0uwCIYFt$ zM{_KAxc3E#TOcUaaE}1e>acb$w*o$G8m8u3m3hs|VJpjm8ljOXNKqy~Qm@yQ>(=F( zjzg|Sj*K5b>E^pcXTxGv4GYqfY9{Z1MmUAC?MAw?VYyKsfq^EDUfjBwDURERrVgBP z3_chq-pD7!kq`vwgut@My!7u%S>#Wpj2Og$ZWOkS`Wemv>9FB5A5#qr;rd`~$v04v zLfP%_e~_#C!V-{L0U@^wCAaIZl`~0j$2}jZt_FJF3UHpff~ZSxG($BLh}mUC6my`7tIW zi@uN$8rKb;nF|#BTz2IiocR#;uU1{Jj>CXOSH&KKEPr>`!*SK6K@`h`LyMf%78BYU z{*loQk0pa7VmxdE{O5S_q+yaz7cZz{?HeWTZ)qR970uY21}b=n<)SzY?KXAT#k5rYjaXrEzRiI_q(*m?$2w7Q#njKFhd~gS}U}%$P-M*e( zt7x0Xtt>)UA(Y=-X_h@Rsc!3(6G6k=+bH-*^oFNN6jCc5C6Hc&{lE53O$-3G|VN%X%dmFaQ>l$v^MM{9WdB{axm~ z_fzI%{wZ_5qLyrey^$x=fbz>F69P6{V&Qffvt;xG4|P(EW%JY)&o_0eM{G_kN=M>?iS$3;s{#@t#KZMb0n=?6ey`ny7%0 z5ca;)6^9|Cam*gCmhWRaBM6kB8J+uw}Q+)slNy^Vb%XE$c!vo(iURG^2FkE!2x2b zRpZlOfIa%(imoA0q7jZz3%3tq6+_5@AvQR)i(41;6~4BJ2l1giWxtHTovPJ_STrb3lYG z0z}x_QNooQ5qAA|5!U~IM});L_(mU#0TDJ)b`_JtB8XvLb5YyW!< z#QS4KE?>d+w&d78f&Y;`LS0kf?DKc5hC$g`x&s1Q%mzwq)b0jV0w4fz7(}H9rLKGl z;H3=^dO&5_&!qr>3PSh;RB$?k7il|9=LUt6k$k@wbM(I#bHP7~xwPA2&L|ujp5z8f zi2Wf5mTpq|Z2@?@hy^T~`Tt1idtJiRf2Dm%e^2{X)k?~by^d1{R0gveoS1S&%$WLi znzqBlFULMret5^rIRHB|bGh0t$cHlBW^uK*LL`&0{H5uMTE@sS@MV;y1n|zj@}-__ zfy4>W_j~xs>jeceX2ZDcmDX*FXB&nm-Cmd17IHZ!ZYUN`584bXvb%1~B*@Nuv80;I ztXRL?``jm(d{%cpe|Sy#Kx_0mX5C?XW$fS@^7rp=&y0vGdIL`vAVGtHp?&}U_EX@A z9c6t3i<`#_Mq*^F)0u&%m@YBncJN??hA{$^O`Y!_$yW;K2}$4UA?jWVJeVtPK9H(c z;0hwdBKJ?lX&is?YNYR@iLHD}&xB$YTcps$+x#u`B=Jy#NJvxJ_YAnP|a62m<)U8vcmAv z4iYT#GS86Qlx82id1OnZY(Z-;vHOB8w=1dUT@hup*P6s=J~{45TYD$^20EdYKKQ92 z>A4f~gw00Z*NvIBulu=keDqBRk*daSF68Oz)A_o*+?PqJRzVS|W#Ht<}Usp0DG@8gB=(iw6PctXJ!=a=!aaKDY_B+EhB;CBp`xP~EuTH* z*XPe~Vjk?RDiDc9b6ku~a%VzzMRF`gP6|7Ik)%PRkSK`?ZVcfg3Kb_sJBoU;d39ug zL~j|XPD%+K(i~^9d-Zij6f4x9DLBo*lFyFP;gzV~6KVX(s!qJtsFdzk!D9WYinfLa zB6*d`5|z(m+eOT2N0TNP{eAjSG2i$ucoX?nmEVw5@6UDtwJPF_joXE_ux)5 zA58b-IdCv$ZkXV6lSXA~9+%43Ocyc<(`4_YW09JV!UW)UAkk=s+-nXu=lN+8+TGdD z9#rt`m2?dix1Ci9vVdcCEzD?&2VgxHA-4CRv(HlXVw(R-iuh3oFQDW!(I$UyY=#vA z1ZT z9c2*>!ov4=UT7pAOq-9L!(GyL4DkwUyx-UgUHUX4t3rw9)$9C`qU9x7RZtDpi~7&j z{f4P0&o`OL=#)B^1e9%QD+Wi(8PVWjRk1o1#q?HM(`IMY6r&KM$W+$pr-@4$-&r8N zNA##4zkZcA9B+WuCE`iaiE?;H#u+~`V#oO_{k~sFonP=3&3&|i-Gk1B^#_7f1FQnC zvd7}7VkYDmKM{qjdCbaGbR$!4Ad^lyz8$u8aRdMISz`ruKPiIE-`7_BO!pXrcHd6| zW-0vb1h+_biTj<{(-slq^zgT*;=IN)PjWsWuiPiHzLWeyn0>dtOFm&o@RJpHeCi}_ z_Oiy)-3!j$#(ddDKWZeNZGsSFAvkxsYkb3eI{!ewz8Up>VfugigbI) z4^?-pX-7|sSh*(8Gs&(+@SsCBB-BYSDzhU(4mbhj2dcr%h zHLzNNdkk(iPQYhh6M)b5L7%x^3b>b828-(Zcf19+s#(l>;G{6Axys*!#5p?pTF-pdJa4Zh5$TW33?jr<{QKIMh*@@^xK2NKUuj4cuWta5eWYjbMxF7PNC1l zGV_e$lWCItdgKadsL{fAOnbM-ur*EYTgpT6xt?%Fvf)0~-HN^AlvMCAxzq)x)O!<+ z?(OU6eEb$zMCy73YY60KDW#CrU(7C7%A^>aW!E}k+$kz$Se|GNZV#@6Pq|bEB|by7x)tUaqjcIDFF9ii{HH3+ z&NKGICQ}6MO<|{Nw{}^H;?2c1%z>@Qy!0iGvCX@IC7(wTW~kShs4HHSe&x7N=17GA z@gT{+fT{ifI}M@klMQXH6HZu6Z~&je9&XhT-@_;#cER}VhflvWX7^&jNp{h2$s^26 zzj5*(+N8Y*+=huIvD*6l(5U$?g@%I0;|mdoSuD{p?EFoc)`&86N=eSWj<63O(Fw~{ zCXv6wo|^ehLYp~Ed((e6`Y-I#Kr)hNj0%W>{l-YyZveYl8ETi1x zVICyOYw38-eH_fK71Y_#EOME9%QG0hLpK)Il?1J$87z6S#4RD=dM&Z%&iv?3L|O7O?3_T6U@P38|p;E#b_U?M^@YqCj{90$CW>*FP5Uu@VsbzT^ie^%I36ET8_hW zJns6U61GhO;qqnalZhn`$q<5K{(+0fgX2(Bz6;e$(c^~{5zQ4Goa31uJFJ~_3+j5| zIT&Bq;zW$wx+yFDA+2WEEC&t^_Ickzz~Y1j_3*JF&s`8Xug6zoAQ*bdcQnNG?ky^+ z$J;|fA@$f>XK(FYFK*8LK6T??*h)13G~@U_7#Q-8sq5fsVRSQb8(&W;E^(tgoI9q| z_999`BqtYrWuT{?lbB!x_lcjb3N1g9qBsNPRq@={yAYiUh#b7AbgHRU@fydN5^jZ4@K$GDOJmNk5(>v_3xPoiwMfkC(z4X2U6Fqm^b~JA4$CmUNFYKvtuu z?ZG4rnkAV^G#!poVS>6*LufoIW$RLUAA8guj!rZOR~@&qS{{GFi}wPniuVXCvP8O` z&t$Ntmoq;R$_qn(t8vdF>!pc(8jkCJZ^g;(3RfwoR^Oa@YikbihCs_xH@Opf{5y$p zN|$xv=%r?pIVW~JbNz<-rY&fr_>a286g>4mZua&)CW@U;YA^U4nfcZX3(2jKl-jt1 z7r8TiW!|lCou1sB%v@YoxT@_@A?j+oy#C4v?SwRX-37@KPvVEWEDl(41^$fIOvM;6 zp$T0Y+zFq;?GsB8f2G8a{cKi6I!yVg=~1eaAQn^(M#toV7z4xJ{762sX=jPUI3x z97pfmoi{aPKM=X|CHT^rG!))jyVXa|zU9g4j^R~Gs_z$nXRtfKZzN@#;jR)2m#RcY1 zX5HM&;M)L>0p$uwZSobSWbpS-!QjMZ!B8R^1P+=MY~WHF!0GW_As=#og#nsvyr%gI zBaia=?wdC~c+EmFT;2g>*53`liNBwCv1}PM51jDrdY}_vQ%SACpao~a5E6kF*Q*QXEj0ScdaSd)Y+b~8eK5CJ08a|6VFLyf2VZL@o6i!*PND`W;f$&HK`G(?2_C`h-%9W!e!QodU%>m(& zBptcIeG#?Q##f(`U0gg%8tUA8b{EJ1_^fI=GdaT&!JtJ z@7WGMZB_B-S%!6AwQg)g;bJI@n=ZKcWLNpHY&*`5R%aXfQiIvUzIJS`@F3N0c-?{Z z@c2v;Z1Gj_B=+mOPa&T*44Vp$3@FXS$d1D&He@zAw<32f*>YR#HbN35?fS_I)U0uy z%r^&RU%g4|y5rf{+qJ=f>BdpyEDkctW(Jb-r;VJHNS#!fd(xA4#SxnSMUqm@?RVOPR zqgPezjTL-+4;Rut%djP`li?u3s9As0&PMQU)a%Uw6Z%@IONv_aXkx{A##n!@p%W^L zJBG3^NR~KVp#$rZlB`vx#cMbbL?mlOhW$;pqEBAo` zMr)&2Q?8D#^K{zce(jLa{z!DWVJ9~eZ06vVUh**fT!OhxuJ2ba#i5I~JIPAj68@J% zLV4yRxsPf;p>EHs&MdQ$pl2l_IwRZJWVLAbW+ZM;AiKD8EK`qEz14j?pWEqX?e*c+ zV;{MSl^`qgh4fKgy4OkYP#wedb33Y8Q4u~d8Qt!gdTMdlOi@{pk#&#Wv7s~_w^!VK zm(0;SS#tbjG)0koN_)_N(Zh$$vXvNPBAElxg8ukjmQa!xyPW+ea;&eL2T#lAFDZX+(o&KaqO+8)?T##VnZX!?9EOgh7 z#>NadZ?^n#hL6fQ0@Je{PinC^cipg^IULCyXkqwgGxDLaQc&v@%Vg+=BH*;6CjObM z%2^0@?-vuITVGbaWDoYE(H3fOG1Wei+(z=uA5!3`eVM2gN7^w2hnELe%i@?cC%@vn zb-pR9Fk^hQz0q){8_m;--Lbuez4t`Kc_u?vq=x{z&wi`O=)rl29KT?1&+6V%ON;z& zQn|Puhv%j$^92ibifjH>9ocrE+`Ew#J!;x35$>n7D?$5PLbK81Z!Sa765yI}KTTN+ z)IKi}DDkCJzO3%b5r&6hIP~gi%Z~k`D^*6sb$r=_L{N)FBvRk7`KwQFTJ>ta~`# z=Qj-4Z}_qUC}4-sU^~-ile;WR|QPHg# z7+8i6D&Vlc*|zA<2L9YJ=Vt>wK!yJ2{yBeI|GBTrPiuXsU#!3H^75zopWBuEH17dY z@BeM{lK)#vpe;PU|77@F10usOE&aKn$G_hYJKWzi^z$kDf4?D9_`hlB=ac3CenTk; zzcuvdW(fa&L&LzPU%&Y1&kX|p{f6M~{!K&wd~1I`I{tIi_>jIe^y`D>qry|5erc)BH~@;iow*-*@vL%EF(< xfAZ6x#^}P|jQ?b-e>U+awEk@3wfMIt{s3BeDQF;?K%zYwSTzvf&m=(a{tw*3$6EjZ literal 0 HcmV?d00001 diff --git "a/Lesson Using SQL to Uncover the Truth \342\200\223 a Case Study en-uCertify.md" "b/Lesson Using SQL to Uncover the Truth \342\200\223 a Case Study en-uCertify.md" new file mode 100644 index 0000000..96dc260 --- /dev/null +++ "b/Lesson Using SQL to Uncover the Truth \342\200\223 a Case Study en-uCertify.md" @@ -0,0 +1,1482 @@ + +# Lesson : Using SQL to Uncover the Truth – a Case Study en-uCertify + +> ## Excerpt +> +> uCertify offers online computer courses and hands-on labs on project management, data analytics, cybersecurity, and more to advance your IT career. en + +--- +In this case study, we will be following the scientific method to help solve our problem, which, at its heart, is about testing guesses (or hypotheses) using objectively collected data. We can decompose the scientific method into the following key steps: + +1. Define the question to answer what caused the drop-in sales of the Bat Scooter after approximately 2 weeks. +2. Complete background research to gather sufficient information to propose an initial hypothesis for the event or phenomenon. +3. Construct a hypothesis to explain the event or answer the question. +4. Define and execute an objective experiment to test the hypothesis. In an ideal scenario, all aspects of the experiment should be controlled and fixed, except for the phenomenon that is being tested under the hypothesis. +5. Analyze the data collected during the experiment. +6. Report the result of the analysis, which will hopefully explain why there was a drop in the sale of Bat Scooters. + +It is to be noted that in this lesson, we are completing a post-hoc analysis of the data, that is, the event has happened, and all available data has been collected. Post-hoc data analysis is particularly useful when events have been recorded that cannot be repeated or when certain external factors cannot be controlled. It is with this data that we are able to perform our analysis, and, as such, we will extract information to support or refute our hypothesis. We will, however, be unable to definitively confirm or reject the hypothesis without practical experimentation. The question that will be the subject of this lesson and that we need to answer is this: why did the sales of the ZoomZoom Bat Scooter drop by approximately 20% after about 2 weeks? + +So, let's start with the absolute basics. + +Exercise 9.1: Preliminary Data Collection Using SQL Techniques + +In this exercise, we will collect preliminary data using SQL techniques. We have been told that the pre-orders for the ZoomZoom Bat Scooter were good, but the orders suddenly dropped by 20%. So, when was production started on the scooter, and how much was it selling for? How does the Bat Scooter compare with other types of scooters in terms of price? The goal of this exercise is to answer these questions: + +1. Load the `sqlda` database from the accompanying source code is located [here](https://github.com/TrainingByPackt/SQL-for-Data-Analytics/tree/master/Datasets): + + ```javascript + psql sqlda + ``` + +2. List the model, `base_msrp` (MSRP: manufacturer's suggested retail price) and `production_start_date` fields within the product table for product types matching `scooter`: + + ```javascript + SELECT model, base_msrp, production_start_date FROM products WHERE product_type='scooter'; + ``` + + The following table shows the details of all the products for the `scooter` product type: + + | model | base\_msrp | production\_start\_date | + | --- | --- | --- | + | Lemon | 399.99 | 2010-03-03 00:00:00 | + | Lemon Limited Edition | 799.99 | 2011-01-03 00:00:00 | + | Lemon | 499.99 | 2013-05-01 00:00:00 | + | Blade | 699.99 | 2014-06-23 00:00:00 | + | Bat | 599.99 | 2016-10-10 00:00:00 | + | Bat Limited Edition | 699.99 | 2017-02-15 00:00:00 | + | Lemon Zester (7 rows) | 349.99 | 2019-02-04 00:00:00 | + + Figure 9.1: Basic list of scooters with a base manufacturer suggesting a retail price and production date + + Looking at the results from the search, we can see that we have two scooter products with **Bat** in the name; **Bat** and **Bat Limited Edition**. The **Bat** Scooter, which started production on October 10, 2016, with a suggested retail price of $599.99; and the **Bat Limited Edition** Scooter, which started production approximately 4 months later, on February 15, 2017, at a price of $699.99. + + Looking at the product information supplied, we can see that the Bat Scooter is somewhat unique from a price perspective, being the only scooter with a suggested retail price of $599.99. There are two others at $699.99 and one at $499.99. + + Similarly, if we consider the production start date in isolation, the original Bat Scooter is again unique in that it is the only scooter starting production in the last quarter or even half of the year (date format: _YYYY-MM-DD_). All other scooters start production in the first half of the year, with only the Blade scooter starting production in June. + + In order to use the sales information in conjunction with the product information available, we also need to get the product ID for each of the scooters. + +3. Extract the model name and product IDs for the scooters available within the database. We will need this information to reconcile the product information with the available sales information: + + ```javascript + SELECT model, product_id FROM products WHERE product_type='scooter'; + ``` + + The query yields the product IDs shown in the following table: + + | model | product\_id | + | --- | --- | + | Lemon | 1 | + | Lemon Limited Edition | 2 | + | Lemon | 3 | + | Blade | 5 | + | Bat | 7 | + | Bat Limited Edition | 8 | + | Lemon Zester (7 rows) | 12 | + + Figure 9.2: Scooter product ID codes + +4. Insert the results of this query into a new table called `product_names`: + + ```javascript + SELECT model, product_id INTO product_names FROM products WHERE product_type='scooter'; + ``` + + Inspect the contents of the `product_names` table shown in the following figure: + + | model | product\_id | + | --- | --- | + | Lemon | 1 | + | Lemon Limited Edition | 2 | + | Lemon | 3 | + | Blade | 5 | + | Bat | 7 | + | Bat Limited Edition | 8 | + | Lemon Zester (7 rows) | 12 | + + Figure 9.3: Contents of the new product\_names table + +As described in the output, we can see that the Bat Scooter lies between the price points of some of the other scooters and that it was also manufactured a lot later in the year compared to the others. + +By completing this very preliminary data collection step, we have the information required to collect sales data on the Bat Scooter as well as other scooter products for comparison. While this exercise involved using the simplest SQL commands, it has already yielded some useful information. + +This exercise has also demonstrated that even the simplest SQL commands can reveal useful information and that they should not be underestimated. In the next exercise, we will try to extract the sales information related to the reduction in sales of the Bat Scooter. + +Exercise 9.2: Extracting the Sales Information + +In this exercise, we will use a combination of simple `SELECT` statements, as well as aggregate and window functions, to examine the sales data. With the preliminary information at hand, we can use it to extract the Bat Scooter sales records and discover what is actually going on. We have a table, `product_names`, that contains both the model names and product IDs. We will need to combine this information with the sales records and extract only those for the Bat Scooter: + +1. Load the `sqlda` database: + + ```javascript + psql sqlda + ``` + +2. List the available fields in the `sqlda` database: + + ```javascript + \d + ``` + + The preceding query yields the following fields present in the database: + + | Column | Table "public.sales" Type | Collation | Nullable | Default | + | --- | --- | --- | --- | --- | + | customer\_id | bigint | | | | + | product\_id | bigint | | | | + | sales\_transaction\_date | timestamp without time zone | | | | + | sales\_amount | double precision | | | | + | channel | text | | | | + | dealership\_id | double precision | | | | + + Figure 9.4: Structure of the sales table + + We can see that we have references to customer and product IDs, as well as the transaction date, sales information, the sales channel, and the dealership ID. +3. Use an inner join on the `product_id` columns of both the `product_names` table and the sales table. From the result of the inner join, select the model, `customer_id`, `sales_transaction_date`, `sales_amount`, channel, and `dealership_id`, and store the values in a separate table called `product_sales`: + + ```javascript + SELECT model, customer_id, sales_transaction_date, sales_amount, channel, dealership_id INTO products_sales FROM sales INNER JOIN product_names ON sales.product_id=product_names.product_id; + ``` + + The output of the preceding code can be seen in the next step. + + Note + + Throughout this lesson, we will be storing the results of queries and calculations in separate tables as this will allow you to look at the results of the individual steps in the analysis at any time. In a commercial/production setting, we would typically only store the end result in a separate table, depending upon the context of the problem being solved. + +4. Look at the first five rows of this new table by using the following query: + + ```javascript + SELECT * FROM products_sales LIMIT 5; + ``` + + The following table lists the top five customers who made a purchase. It shows the sale amount and the transaction details, such as the date and time: + + | model | customer\_id | sales\_transaction\_date | sales\_amount | channel | dealership\_id | + | --- | --- | --- | --- | --- | --- | + | Lemon | 41604 | 2012-03-30 22:45:29 | 399.99 | internet | | + | Lemon | 41531 | 2010-09-07 22:53:16 | 399.99 | internet | | + | Lemon | 41443 | 2011-05-24 02:19:11 | 399.99 | internet | | + | Lemon | 41291 | 2010-08-08 14:12:52 | 319.992 | internet | | + | Lemon | 41084 | 2012-01-09 03:34:52 | 319.992 | internet | | + | (5 rows) | + + Figure 9.5: The combined product sales table + +5. Select all the information from the `product_sales` table that is available for the Bat Scooter and order the sales information by `sales_transaction_date` in ascending order. By selecting the data in this way, we can look at the first few days of the sales records in detail: + + ```javascript + SELECT * FROM products_sales WHERE model='Bat' ORDER BY sales_transaction_date; + ``` + + The preceding query generates the following output: + + | model | customer\_id | sales\_transaction\_date | sales\_amount | channel | dealership\_id | + | --- | --- | --- | --- | --- | --- | + | Bat | 4319 | 2016-10-10 00:41:57 | 599.99 | Internet | | + | Bat | 40250 | 2016-10-10 02:47:28 | 599.99 | dealership | 4 | + | Bat | 35497 | 2016-10-10 04:21:08 | 599.99 | dealership | 2 | + | Bat | 4553 | 2016-10-10 07:42:59 | 599.99 | dealership | 11 | + | Bat | 11678 | 2016-10-10 09:21:08 | 599.99 | internet | | + | Bat | 45868 | 2016-10-10 10:29:29 | 599.99 | internet | + | Bat | 24125 | 2016-10-10 18:57:25 | 599.99 | dealership | 1 | + | Bat | 31307 | 2016-10-10 21:22:38 | 599.99 | internet | | + | Bat | 42213 | 2016-10-10 21:27:36 | 599.99 | internet | | + | Bat | 47790 | 2016-10-11 01:28:58 | 599.99 | dealership | 20 | + | Bat | 6342 | 2016-10-11 03:04:57 | 599.99 | internet | | + | Bat | 45880 | 2016-10-11 04:09:19 | 599.99 | dealership | 7 | + | Bat | 43477 | 2016-10-11 05:24:50 | 599.99 | internet | | + | Bat | 6322 | 2016-10-11 08:48:07 | 599.99 | internet | + | Bat | 46653 | 2016-10-11 15:47:01 | 599.99 | dealership | 6 | + | Bat | 9045 | 2016-10-12 00:15:20 | 599.99 | dealership | 19 | + | Bat | 23679 | 2016-10-12 00:17:53 | 539.991 | internet | | + | Bat | 49856 | 2016-10-12 00:26:15 | 599.99 | dealership | 10 | + | Bat | 45256 | 2016-10-12 02:08:01 | 539.991 | dealership | 7 | + | Bat | 48809 | 2016-10-12 05:08:43 | 599.99 | internet | | + | Bat | 42625 | 2016-10-12 06:17:55 | 599.99 | internet | | + | Bat | 39653 | 2016-10-12 06:28:25 | 599.99 | dealership | 7 | + | Bat | 49226 | 2016-10-12 10:26:13 | 539.991 | internet | | + | Bat | 18602 | 2016-10-12 15:09:53 | 599.99 | internet | | + + Figure 9.6: Ordered sales records + +6. Count the number of records available by using the following query: + + ```javascript + SELECT COUNT(model) FROM products_sales WHERE model='Bat'; + ``` + + The model count for the `'Bat'` model is as shown here: + + ```javascript + count --------- 7328 (1 row) + ``` + + **Figure 9.7: Count of the number of sales records** + + So, we have **7328** sales, beginning October 10, 2016. Check the date of the final sales record by performing the next step. +7. Determine the last sale date for the Bat Scooter by selecting the maximum (using the `MAX` function) for `sales_transaction_date`: + + ```javascript + SELECT MAX(sales_transaction_date) FROM products_sales WHERE model='Bat'; + ``` + + The last sale date is shown here: + + ```javascript + Max ------------------ 2019-05-31 22:15:30 (1 row) + ``` + + Figure 9.8: Last sale date + + The last sale in the database occurred on May 31, 2019. +8. Collect the daily sales volume for the Bat Scooter and place it in a new table called `bat_sales` to confirm the information provided by the sales team stating that sales dropped by 20% after the first 2 weeks: + + ```javascript + SELECT * INTO bat_sales FROM products_sales WHERE model='Bat' ORDER BY sales_transaction_date; + ``` + +9. Remove the time information to allow tracking of sales by date, since, at this stage, we are not interested in the time at which each sale occurred. To do so, run the following query: + + ```javascript + UPDATE bat_sales SET sales_transaction_date=DATE(sales_transaction_date); + ``` + +10. Display the first five records of `bat_sales` ordered by `sales_transaction_date`: + + ```javascript + SELECT * FROM bat_sales ORDER BY sales_transaction_date LIMIT 5; + ``` + + The following is the output of the preceding code: + + | model | customer\_id | sales\_transaction\_date | sales\_amount | channel | dealership\_id | + | --- | --- | --- | --- | --- | --- | + | Bat | 4553 | 2016-10-10 00:00:00 | 599.99 | dealership | 11 | + | Bat | 35497 | 2016-10-10 00:00:00 | 599.99 | dealership | 2 | + | Bat | 40250 | 2016-10-10 00:00:00 | 599.99 | dealership | 4 | + | Bat | 4319 | 2016-10-10 00:00:00 | 599.99 | internet | | + | Bat | 11678 | 2016-10-10 00:00:00 | 599.99 | internet | | + | (5 rows) | + + Figure 9.9: First five records of Bat Scooter sales + + Create a new table (`bat_sales_daily`) containing the sales transaction dates and a daily count of total sales: + + ```javascript + SELECT sales_transaction_date, COUNT(sales_transaction_date) INTO bat_sales_daily FROM bat_sales GROUP BY sales_transaction_date ORDER BY sales_transaction_date; + ``` + +11. Examine the first `22` records (a little over 3 weeks), as sales were reported to have dropped after approximately the first 2 weeks: + + ```javascript + SELECT * FROM bat_sales_daily LIMIT 22; + ``` + + This will display the following output: + + | sales\_transaction\_date | count | + | --- | --- | + | 2016-10-10 00:00:00 | 9 | + | 2016-10-11 00:00:00 | 6 | + | 2016-10-12 00:00:00 | 10 | + | 2016-10-13 00:00:00 | 10 | + | 2016-10-14 00:00:00 | 5 | + | 2016-10-15 00:00:00 | 10 | + | 2016-10-16 00:00:00 | 14 | + | 2016-10-17 00:00:00 | 9 | + | 2016-10-18 00:00:00 | 11 | + | 2016-10-19 00:00:00 | 12 | + | 2016-10-20 00:00:00 | 10 | + | 2016-10-21 00:00:00 | 6 | + | 2016-10-22 00:00:00 | 2 | + | 2016-10-23 00:00:00 | 5 | + | 2016-10-24 00:00:00 | 6 | + | 2016-10-25 00:00:00 | 9 | + | 2016-10-26 00:00:00 | 2 | + | 2016-10-27 00:00:00 | 4 | + | 2016-10-28 00:00:00 | 7 | + | 2016-10-29 00:00:00 | 5 | + | 2016-10-30 00:00:00 | 5 | + | 2016-10-31 00:00:00 | 3 | + | (22 rows) | + + Figure 9.10: First 3 weeks of sales + +We can see a drop-in sales after October 20, as there are 7 days in the first 11 rows that record double-digit sales, and none over the next 11 days. + +At this stage, we can confirm that there has been a drop off in sales, although we are yet to quantify precisely the extent of the reduction or the reason for the drop off in sales. + +Activity 9.1: Quantifying the Sales Drop + +In this activity, we will use our knowledge of the windowing methods that we learned in _Lesson 5_, _Window Functions for Data Analysis_. In the previous exercise, we identified the occurrence of the sales drop as being approximately 10 days after launch. Here, we will try to quantify the drop off in sales for the Bat Scooter. + +Perform the following steps to complete the activity: + +1. Load the `sqlda` database from the accompanying source code located at this [link](https://github.com/TrainingByPackt/SQL-for-Data-Analytics/tree/master/Datasets). +2. Using the `OVER` and `ORDER BY` statements, compute the daily cumulative sum of sales. This provides us with a discrete count of sales over time on a daily basis. Insert the results into a new table called `bat_sales_growth`. +3. Compute a 7-day `lag` of the `sum` column, and then insert all the columns of `bat_sales_daily` and the new `lag` column into a new table, `bat_sales_daily_delay`. This `lag` column indicates what sales were like 1 week prior to the given record, allowing us to compare sales with the previous week. +4. Inspect the first 15 rows of `bat_sales_growth`. +5. Compute the sales growth as a percentage, comparing the current sales volume to that of 1 week prior. Insert the resulting table into a new table called `bat_sales_delay_vol`. +6. Compare the first 22 values of the `bat_sales_delay_vol` table to ascertain a sales drop. + +**Solution** + +1. Load the sqlda database: + + ```javascript + psql sqlda + ``` + +2. Compute the daily cumulative sum of sales using the OVER and ORDER BY statements. Insert the results into a new table called bat\_sales\_growth: + + ```javascript + SELECT *, sum(count) OVER (ORDER BY sales_transaction_date) INTO bat_sales_growth FROM bat_sales_daily; + ``` + + The following table shows the daily cumulative sum of sales: + + | sales\_transaction\_date | count | sum | + | --- | --- | --- | + | 2016-10-10 00:00:00 | 9 | 9 | + | 2016-10-11 00:00:00 | 6 | 15 | + | 2016-10-12 00:00:00 | 10 | 25 | + | 2016-10-13 00:00:00 | 10 | 35 | + | 2016-10-14 00:00:00 | 5 | 40 | + | 2016-10-15 00:00:00 | 10 | 50 | + | 2016-10-16 00:00:00 | 14 | 64 | + | 2016-10-17 00:00:00 | 9 | 73 | + | 2016-10-18 00:00:00 | 11 | 84 | + | 2016-10-19 00:00:00 | 12 | 96 | + | 2016-10-20 00:00:00 | 10 | 106 | + | 2016-10-21 00:00:00 | 6 | 112 | + | 2016-10-22 00:00:00 | 2 | 114 | + | 2016-10-23 00:00:00 | 5 | 119 | + | 2016-10-24 00:00:00 | 6 | 125 | + | 2016-10-25 00:00:00 | 9 | 134 | + | 2016-10-26 00:00:00 | 2 | 136 | + | 2016-10-27 00:00:00 | 4 | 140 | + | 2016-10-28 00:00:00 | 7 | 147 | + | 2016-10-29 00:00:00 | 5 | 152 | + | 2016-10-30 00:00:00 | 5 | 157 | + | 2016-10-31 00:00:00 | 3 | 160 | + + Figure A: Daily sales count + +3. Compute a 7-day lag function of the sum column and insert all the columns of bat\_sales\_daily and the new lag column into a new table, bat\_sales\_daily\_delay. This lag column indicates what the sales were like 1 week before the given record: + + ```javascript + SELECT *, lag(sum, 7) OVER (ORDER BY sales_transaction_date) INTO bat_sales_daily_delay FROM bat_sales_growth; + ``` + +4. Inspect the first 15 rows of bat\_sales\_growth: + + ```javascript + SELECT * FROM bat_sales_daily_delay LIMIT 15; + ``` + + The following is the output of the preceding code: + + | sales\_transaction\_date | count | sum | lag | + | --- | --- | --- | --- | + | 2016-10-10 00:00:00 | 9 | 9 | | + | 2016-10-11 00:00:00 | 6 | 15 | | + | 2016-10-12 00:00:00 | 10 | 25 | | + | 2016-10-13 00:00:00 | 10 | 35 | | + | 2016-10-14 00:00:00 | 5 | 40 | | + | 2016-10-15 00:00:00 | 10 | 50 | | + | 2016-10-16 00:00:00 | 14 | 64 | | + | 2016-10-17 00:00:00 | 9 | 73 | 9 | + | 2016-10-18 00:00:00 | 11 | 84 | 15 | + | 2016-10-19 00:00:00 | 12 | 96 | 25 | + | 2016-10-20 00:00:00 | 10 | 106 | 35 | + | 2016-10-21 00:00:00 | 6 | 112 | 40 | + | 2016-10-22 00:00:00 | 2 | 114 | 50 | + | 2016-10-23 00:00:00 | 5 | 119 | 64 | + | 2016-10-24 00:00:00 | 6 | 125 | 73 | + | (15 rows) | + + Figure B: Daily sales delay with lag + +5. Compute the sales growth as a percentage, comparing the current sales volume to that of 1 week prior. Insert the resulting table into a new table called bat\_sales\_delay\_vol: + + ```javascript + SELECT *, (sum-lag)/lag AS volume INTO bat_sales_delay_vol FROM bat_sales_daily_delay ; + ``` + + Note + + The percentage sales volume can be calculated via the following equation: + + ```javascript + (new_volume – old_volume) / old_volume + ``` + +6. Compare the first 22 values of the bat\_sales\_delay\_vol table: + + ```javascript + SELECT * FROM bat_sales_daily_delay_vol LIMIT 22; + ``` + + The delay volume for the first 22 entries can be seen in the following: + + | sales\_transaction\_date | count | sum | lag | volume | + | --- | --- | --- | --- | --- | + | 2016-10-10 00:00:00 | 9 | 9 | | | + | 2016-10-11 00:00:00 | 6 | 15 | | | + | 2016-10-12 00:00:00 | 10 | 25 | | | + | 2016-10-13 00:00:00 | 10 | 35 | | | + | 2016-10-14 00:00:00 | 5 | 40 | | | + | 2016-10-15 00:00:00 | 10 | 50 | | | + | 2016-10-16 00:00:00 | 14 | 64 | | | + | 2016-10-17 00:00:00 | 9 | 73 | 9 | 7.1111111111111111 | + | 2016-10-18 00:00:00 | 11 | 84 | 15 | 4.6000000000000000 | + | 2016-10-19 00:00:00 | 12 | 96 | 25 | 2.8400000000000000 | + | 2016-10-20 00:00:00 | 10 | 106 | 35 | 2.0285714285714286 | + | 2016-10-21 00:00:00 | 6 | 112 | 40 | 1.8000000000000000 | + | 2016-10-22 00:00:00 | 2 | 114 | 50 | 1.2800000000000000 | + | 2016-10-23 00:00:00 | 5 | 119 | 64 | 0.85937500000000000000 | + | 2016-10-24 00:00:00 | 6 | 125 | 73 | 0.71232876712328767123 | + | 2016-10-25 00:00:00 | 9 | 134 | 84 | 0.59523809523809523810 | + | 2016-10-26 00:00:00 | 2 | 136 | 96 | 0.41666666666666666667 | + | 2016-10-27 00:00:00 | 4 | 140 | 106 | 0.32075471698113207547 | + | 2016-10-28 00:00:00 | 7 | 147 | 112 | 0.31250000000000000000 | + | 2016-10-29 00:00:00 | 5 | 152 | 114 | 0.33333333333333333333 | + | 2016-10-30 00:00:00 | 5 | 157 | 119 | 0.31932773109243697479 | + | 2016-10-31 00:00:00 | 3 | 160 | 125 | 0.28000000000000000000 | + | (22 rows) | | | | | + + Figure C: Relative sales volume of the scooter over 3 weeks + +Looking at the output table, we can see four sets of information: the daily sales count, the cumulative sum of the daily sales count, the cumulative sum offset by 1 week (the lag), and the relative daily sales volume. + +**Expected Output:** + +| sales\_transaction\_date | count | sum | lag | volume | +| --- | --- | --- | --- | --- | +| 2016-10-10 00:00:00 | 9 | 9 | | | +| 2016-10-11 00:00:00 | 6 | 15 | | | +| 2016-10-12 00:00:00 | 10 | 25 | | | +| 2016-10-13 00:00:00 | 10 | 35 | | | +| 2016-10-14 00:00:00 | 5 | 40 | | | +| 2016-10-15 00:00:00 | 10 | 50 | | | +| 2016-10-16 00:00:00 | 14 | 64 | | | +| 2016-10-17 00:00:00 | 9 | 73 | 9 | 7.1111111111111111 | +| 2016-10-18 00:00:00 | 11 | 84 | 15 | 4.6000000000000000 | +| 2016-10-19 00:00:00 | 12 | 96 | 25 | 2.8400000000000000 | +| 2016-10-20 00:00:00 | 10 | 106 | 35 | 2.0285714285714286 | +| 2016-10-21 00:00:00 | 6 | 112 | 40 | 1.8000000000000000 | +| 2016-10-22 00:00:00 | 2 | 114 | 50 | 1.2800000000000000 | +| 2016-10-23 00:00:00 | 5 | 119 | 64 | 0.85937500000000000000 | +| 2016-10-24 00:00:00 | 6 | 125 | 73 | 0.71232876712328767123 | +| 2016-10-25 00:00:00 | 9 | 134 | 84 | 0.59523809523809523810 | +| 2016-10-26 00:00:00 | 2 | 136 | 96 | 0.41666666666666666667 | +| 2016-10-27 00:00:00 | 4 | 140 | 106 | 0.32075471698113207547 | +| 2016-10-28 00:00:00 | 7 | 147 | 112 | 0.31250000000000000000 | +| 2016-10-29 00:00:00 | 5 | 152 | 114 | 0.33333333333333333333 | +| 2016-10-30 00:00:00 | 5 | 157 | 119 | 0.31932773109243697479 | +| 2016-10-31 00:00:00 | 3 | 160 | 125 | 0.28000000000000000000 | +| (22 rows) | + +Figure 9.11: Relative sales volume of the Bat Scooter over 3 weeks + +While the count and cumulative `sum` columns are reasonably straightforward, why do we need the `lag` and `volume` columns? This is because we are looking for drops in sales growth over the first couple of weeks, hence, we compare the daily sum of sales to the same values 7 days earlier (the lag). By subtracting the sum and lag values and dividing by the lag, we obtain the volume value and can determine sales growth compared to the previous week. + +Notice that the sales volume on October 17 is 700% above that of the launch date of October 10. By October 22, the volume is over double that of the week prior. As time passes, this relative difference begins to decrease dramatically. By the end of October, the volume is 28% higher than the week prior. At this stage, we have observed and confirmed the presence of a reduction in sales growth after the first 2 weeks. The next stage is to attempt to explain the causes of the reduction. + +Exercise 9.3: Launch Timing Analysis + +In this exercise, we will try to identify the causes of a sales drop. Now that we have confirmed the presence of the sales growth drop, we will try to explain the cause of the event. We will test the hypothesis that the timing of the scooter launch attributed to the reduction in sales. Remember, in _Exercise 9.1, Preliminary Data Collection Using SQL Techniques_, that the ZoomZoom Bat Scooter launched on October 10, 2016. Observe the following steps to complete the exercise: + +1. Load the `sqlda` database: + + ```javascript + psql sqlda + ``` + +2. Examine the other products in the database. In order to determine whether the launch date attributed to the sales drop, we need to compare the ZoomZoom Bat Scooter to other scooter products according to the launch date. Execute the following query to check the launch dates: + + ```javascript + SELECT * FROM products; + ``` + + The following figure shows the launch dates for all the products: + + | product\_id | model | year | product\_type | base\_msrp | production\_start\_date | production\_end\_date | + | --- | --- | --- | --- | --- | --- | --- | + | 1 | Lemon | 2010 | scooter | 399.99 | 2010-03-03 00:00:00 | 2012-06-08 00:00:00 | + | 2 | Lemon Limited Edition | 2011 | scooter | 799.99 | 2011-01-03 00:00:00 | 2011-03-30 00:00:00 | + | 3 | Lemon | 2013 | scooter | 499.99 | 2013-05-01 00:00:00 | 2018-12-28 00:00:00 | + | 4 | Model Chi | 2014 | automobile | 115,000.00 | 2014-06-23 00:00:00 | 2018-12-28 00:00:00 | + | 5 | Blade | 2014 | scooter | 699.99 | 2014-06-23 00:00:00 | 2015-01-27 00:00:00 | + | 6 | Model Sigma | 2015 | automobile | 65,500.00 | 2015-04-15 00:00:00 | 2018-10-01 00:00:00 | + | 7 | Bat | 2016 | scooter | 599.99 | 2016-10-10 00:00:00 | | + | 8 | Bat Limited Edition | 2017 | scooter | 699.99 | 2017-02-15 00:00:00 | | + | 9 | Model Epsilon | 2017 | automobile | 35,000.00 | 2017-02-15 00:00:00 | | + | 10 | Model Gamma | 2017 | automobile | 85,750.00 | 2017-02-15 00:00:00 | | + | 11 | Model Chi | 2019 | automobile | 95,000.00 | 2019-02-04 00:00:00 | | + | 12 | Lemon Zester | 2019 | scooter | 349.99 | 2019-02-04 00:00:00 | | + | (12 rows) | + + Figure 9.12: Products with launch dates + + All the other products launched before July, compared to the Bat Scooter, which launched in October. +3. List all scooters from the `products` table, as we are only interested in comparing scooters: + + ```javascript + SELECT * FROM products WHERE product_type='scooter'; + ``` + + The following table shows all the information for products with the product type of `scooter`: + + | product\_id | model | year | product\_type | base\_msrp | production\_start\_date | production\_end\_date | + | --- | --- | --- | --- | --- | --- | --- | + | 1 | Lemon | 2010 | scooter | 399.99 | 2010-03-03 00:00:00 | 2012-06-08 00:00:00 | + | 2 | Lemon Limited Edition | 2011 | scooter | 799.99 | 2011-01-03 00:00:00 | 2011-03-30 00:00:00 | + | 3 | Lemon | 2013 | scooter | 499.99 | 2013-05-01 00:00:00 | 2018-12-28 00:00:00 | + | 5 | Blade | 2014 | scooter | 699.99 | 2014-06-23 00:00:00 | 2015-01-27 00:00:00 | + | 7 | Bat | 2016 | scooter | 599.99 | 2016-10-10 00:00:00 | | + | 8 | Bat Limited Edition | 2017 | scooter | 699.99 | 2017-02-15 00:00:00 | | + | 12 | Lemon Zester | 2019 | scooter | 349.99 | 2019-02-04 00:00:00 | | + | (7 rows) | + + Figure 9.13: Scooter product launch dates + + To test the hypothesis that the time of year had an impact on sales performance, we require a scooter model to use as the control or reference group. In an ideal world, we could launch the ZoomZoom Bat Scooter in a different location or region, for example, but just at a different time, and then compare the two. However, we cannot do this here. Instead, we will choose a similar scooter launched at a different time. There are several different options in the product database, each with its own similarities and differences to the experimental group (ZoomZoom Bat Scooter). In our opinion, the Bat Limited Edition Scooter is suitable for comparison (the control group). It is slightly more expensive, but it was launched only 4 months after the Bat Scooter. Looking at its name, the Bat Limited Edition Scooter seems to share most of the same features, with a number of extras given that it's a "limited edition." +4. Select the first five rows of the `sales` database: + + ```javascript + SELECT * FROM sales LIMIT 5; + ``` + + The sales information for the first five customers is as follows: + + | customer\_id | product\_id | sales\_transaction\_date | sales\_amount | channel | dealership\_id | + | --- | --- | --- | --- | --- | --- | + | 1 | 7 | 2017-07-19 08:38:41 | 479.992 | internet | | + | 22 | 7 | 2017-08-14 09:59:02 | 599.99 | dealership | 20 | + | 145 | 7 | 2019-01-20 10:40:11 | 479.992 | internet | | + | 289 | 7 | 2017-05-09 14:20:04 | 539.991 | dealership | 7 | + | 331 | 7 | 2019-05-21 20:03:21 | 539.991 | dealership | 4 | + | (5 rows) | + + Figure 9.14: First five rows of sales data + +5. Select the `model` and `sales_transaction_date` columns from both the products and sales tables for the Bat Limited Edition Scooter. Store the results in a table, `bat_ltd_sales`, ordered by the `sales_transaction_date` column, from the earliest date to the latest: + + ```javascript + SELECT products.model, sales.sales_transaction_date INTO bat_ltd_sales FROM sales INNER JOIN products ON sales.product_id=products.product_id WHERE sales.product_id=8 ORDER BY sales.sales_transaction_date; + ``` + +6. Select the first five lines of `bat_ltd_sales`, using the following query: + + ```javascript + SELECT * FROM bat_ltd_sales LIMIT 5; + ``` + + The following table shows the transaction details for the first five entries of `Bat Limited Edition`: + + | model | sales\_transaction\_date | + | --- | --- | + | Bat Limited Edition | 2017-02-15 01:49:02 | + | Bat Limited Edition | 2017-02-15 89:42:37 | + | Bat Limited Edition | 2017-02-15 10:48:31 | + | Bat Limited Edition | 2017-02-15 12:22:41 | + | Bat Limited Edition | 2017-02-15 13:51:34 | + | (5 rows) | + + Figure 9.15: First five sales of the Bat Limited Edition Scooter + +7. Calculate the total number of sales for `Bat Limited Edition`. We can check this by using the `COUNT` function: + + ```javascript + SELECT COUNT(model) FROM bat_ltd_sales; + ``` + + The total sales count can be seen in the following figure: + + ```javascript + count ----------- 5803 (1 row) + ``` + + Figure 9.16: Count of Bat Limited Edition sales + + This is compared to the original Bat Scooter, which sold 7,328 items. +8. Check the transaction details of the last Bat Limited Edition sale. We can check this by using the `MAX` function: + + ```javascript + SELECT MAX(sales_transaction_date) FROM bat_ltd_sales; + ``` + + The transaction details of the last `Bat Limited Edition` product are as follows: + + ```javascript + max ------------------- 2019-05-31 15:08:03 (1 row) + ``` + + Figure 9.17: Last date (MAX) of the Bat Limited Edition sale + +9. Adjust the table to cast the transaction date column as a date, discarding the time information. As with the original Bat Scooter, we are only interested in the date of the sale, not the date and time of the sale. Write the following query: + + ```javascript + ALTER TABLE bat_ltd_sales ALTER COLUMN sales_transaction_date TYPE date; + ``` + +10. Again, select the first five records of `bat_ltd_sales`: + + ```javascript + SELECT * FROM bat_ltd_sales LIMIT 5; + ``` + + The following table shows the first five records of `bat_ltd_sales`: + + | model | sales\_transaction\_date | + | --- | --- | + | Bat Limited Edition | 2017-02-15 | + | Bat Limited Edition | 2017-02-15 | + | Bat Limited Edition | 2017-02-15 | + | Bat Limited Edition | 2017-02-15 | + | Bat Limited Edition | 2017-02-15 | + | (5 rows) | + + Figure 9.18: Select the first five Bat Limited Edition sales by date + +11. In a similar manner to the standard Bat Scooter, create a count of sales on a daily basis. Insert the results into the `bat_ltd_sales_count` table by using the following query: + + ```javascript + SELECT sales_transaction_date, count(sales_transaction_date) INTO bat_ltd_sales_count FROM bat_ltd_sales GROUP BY sales_transaction_date ORDER BY sales_transaction_date; + ``` + +12. List the sales count of all the `Bat Limited` products using the following query: + + ```javascript + SELECT * FROM bat_ltd_sales_count; + ``` + + The sales count is shown in the following figure: + + | sales\_transaction\_date | count | + | --- | --- | + | 2017-02-15 | 6 | + | 2017-02-16 | 2 | + | 2017-02-17 | 1 | + | 2017-02-18 | 4 | + | 2017-02-19 | 5 | + | 2017-02-20 | 6 | + | 2017-02-21 | 5 | + | 2017-02-22 | 4 | + | 2017-02-23 | 6 | + | 2017-02-24 | 2 | + | 2017-02-25 | 2 | + | 2017-02-26 | 2 | + | 2017-02-27 | 4 | + | 2017-02-28 | 4 | + | 2017-03-01 | 5 | + | 2017-03-02 | 1 | + + Figure 9.19: Bat Limited Edition daily sales + +13. Compute the cumulative sum of the daily sales figures and insert the resulting table into `bat_ltd_sales_growth`: + + ```javascript + SELECT *, sum(count) OVER (ORDER BY sales_transaction_date) INTO bat_ltd_sales_growth FROM bat_ltd_sales_count; + ``` + + Selct the first 22 days of sales records from `bat_ltd_sales_growth`: + + ```javascript + SELECT * FROM bat_ltd_sales_growth LIMIT 22; + `` + The following table displays the first 22 records of sales growth + | sales\_transaction\_date | count | sum | + | --- | --- | --- | + | 2017-02-15 | 6 | 6 | + | 2017-02-16 | 2 | 8 | + | 2017-02-17 | 1 | 9 | + | 2017-02-18 | 4 | 13 | + | 2017-02-19 | 5 | 18 | + | 2017-02-20 | 6 | 24 | + | 2017-02-21 | 5 | 29 | + | 2017-02-22 | 4 | 33 | + | 2017-02-23 | 6 | 39 | + | 2017-02-24 | 2 | 41 | + | 2017-02-25 | 2 | 43 | + | 2017-02-26 | 2 | 45 | + | 2017-02-27 | 4 | 49 | + | 2017-02-28 | 4 | 53 | + | 2017-03-01 | 5 | 58 | + | 2017-03-02 | 1 | 59 | + | 2017-03-03 | 3 | 62 | + | 2017-03-04 | 8 | 70 | + | 2017-03-05 | 4 | 74 | + | 2017-03-06 | 7 | 81 | + | 2017-03-07 | 7 | 88 | + | 2017-03-08 | 8 | 96 | + | (22 rows) | + + Figure 9.20: Bat Limited Edition sales – cumulativm + +15. Compare this sales record with the one for the original Bat Scooter sales, as shown in the following code: + + ```javascript + SELECT * FROM bat_sales_growth LIMIT 22; + ``` + + + The following table shows the sales details for the first 22 records of the `bat_sales_growth` table: + + | sales\_transaction\_date | count | sum | + | --- | --- | --- | + | 2016-10-10 00:00:00 | 9 | 9 | + | 2016-10-11 00:00:00 | 6 | 15 | + | 2016-10-12 00:00:00 | 10 | 25 | + | 2016-10-13 00:00:00 | 10 | 35 | + | 2016-10-14 00:00:00 | 5 | 40 | + | 2016-10-15 00:00:00 | 10 | 50 | + | 2016-10-16 00:00:00 | 14 | 64 | + | 2016-10-17 00:00:00 | 9 | 73 | + | 2016-10-18 00:00:00 | 11 | 84 | + | 2016-10-19 00:00:00 | 12 | 96 | + | 2016-10-20 00:00:00 | 10 | 106 | + | 2016-10-21 00:00:00 | 6 | 112 | + | 2016-10-22 00:00:00 | 2 | 114 | + | 2016-10-23 00:00:00 | 5 | 119 | + | 2016-10-24 00:00:00 | 6 | 125 | + | 2016-10-25 00:00:00 | 9 | 134 | + | 2016-10-26 00:00:00 | 2 | 136 | + | 2016-10-27 00:00:00 | 4 | 140 | + | 2016-10-28 00:00:00 | 7 | 147 | + | 2016-10-29 00:00:00 | 5 | 152 | + | 2016-10-30 00:00:00 | 5 | 157 | + | 2016-10-31 00:00:00 | 3 | 160 | + | (22 rows) | + + Figure 9.21: Bat Scooter cumulative sales for 22 rows + + Sales of the limited-edition scooter did not reach double digits during the first 22 days, nor did the daily volume of sales fluctuate as much. In keeping with the overall sales figure, the limited edition sold 64 fewer units over the first 22 days. +16. Compute the 7-day `lag` function for the `sum` column and insert the results into the `bat_ltd_sales_delay` table: + + ```javascript + SELECT *, lag(sum , 7) OVER (ORDER BY sales_transaction_date) INTO bat_ltd_sales_delay FROM bat_ltd_sales_growth; + ``` + +17. Compute the sales growth for `bat_ltd_sales_delay` in a similar manner to the exercise completed in _Activity 9.1_, _Quantifying the Sales Drop_. Label the column for the results of this calculation as `volume` and store the resulting table in `bat_ltd_sales_vol`: + + ```javascript + SELECT *, (sum-lag)/lag AS volume INTO bat_ltd_sales_vol FROM bat_ltd_sales_delay; + ``` + +18. Look at the first 22 records of sales in `bat_ltd_sales_vol`: + + ```javascript + SELECT * FROM bat-ltd_sales_vol LIMIT 22; + ``` + + + The sales volume can be seen in the following figure: + + | sales\_transaction\_date | count | sum | lag | volume | + | --- | --- | --- | --- | --- | + | 2017-02-15 | 6 | 6 | | | + | 2017-02-16 | 2 | 8 | | | + | 2017-02-17 | 1 | 9 | | | + | 2017-02-18 | 4 | 13 | | | + | 2017-02-19 | 5 | 18 | | | + | 2017-02-20 | 6 | 24 | | | + | 2017-02-21 | 5 | 29 | | | + | 2017-02-23 | 4 | 33 | | | + | 2017-02-24 | 2 | 41 | 9 | 3.5555555555555556 | + | 2017-02-25 | 2 | 43 | 13 | 2.3076923076923077 | + | 2017-02-26 | 2 | 45 | 18 | 1.5000000000000000 | + | 2017-02-27 | 4 | 49 | 24 | 1.0416666666666667 | + | 2017-02-28 | 4 | 53 | 29 | 0.82758620689655172414 | + | 2017-03-01 | 5 | 58 | 33 | 0.75757575757575757576 | + | 2017-03-02 | 1 | 59 | 39 | 0.51282051282051282051 | + | 2017-03-03 | 3 | 62 | 41 | 0.51219512195121951220 | + | 2017-03-04 | 8 | 70 | 43 | 0.62790697674418604651 | + | 2017-03-05 | 4 | 74 | 45 | 0.64444444444444444444 | + | 2017-03-06 | 7 | 81 | 49 | 0.65306122448979591837 | + | 2017-03-07 | 7 | 88 | 53 | 0.66037735849056603774 | + | 2017-03-08 | 8 | 96 | 58 | 0.65517241379310344828 | + | (22 rows) | + + Figure 9.22: Bat Scooter cumulative sales showing volume + + +Looking at the `volume` column in the preceding diagram, we can again see that the sales growth is more consistent than the original Bat Scooter. The growth within the first week is less than that of the original model, but it is sustained over a longer period. After 22 days of sales, the sales growth of the limited-edition scooter is 65% compared to the previous week, as compared with the 28% growth identified in the second activity of the lesson. + +At this stage, we have collected data from two similar products launched at different time periods and found some differences in the trajectory of the sales growth over the first 3 weeks of sales. In a professional setting, we may also consider employing more sophisticated statistical comparison methods, such as tests for differences of mean, variance, survival analysis, or other techniques. These methods lie outside the scope of this course and, as such, limited comparative methods will be used. + +While we have shown there to be a difference in sales between the two Bat Scooters, we also cannot rule out the fact that the sales differences can be attributed to the difference in the sales price of the two scooters, with the limited-edition scooter being $100 more expensive. In the next activity, we will compare the sales of the Bat Scooter to the 2013 Lemon, which is $100 cheaper, was launched 3 years prior, is no longer in production, and started production in the first half of the calendar year. + +Activity 9.2: Analyzing the Difference in the Sales Price Hypothesis + +In this activity, we are going to investigate the hypothesis that the reduction in sales growth can be attributed to the price point of the Bat Scooter. Previously, we considered the launch date. However, there could be another factor – the sales price included. If we consider the product list of scooters shown in _Figure 9.23_, and exclude the Bat model scooter, we can see that there are two price categories, $699.99 and above, or $499.99 and below. The Bat Scooter sits exactly between these two groups; perhaps the reduction in sales growth can be attributed to the different pricing model. In this activity, we will test this hypothesis by comparing Bat sales to the 2013 Lemon: + +| product\_id | model | year | product\_type | base\_msrp | production\_start\_date | production\_end\_date | +| --- | --- | --- | --- | --- | --- | --- | +| 12 | Lemon Zester | 2019 | scooter | 349.99 | 2019-02-04 00:00:00 | | +| 1 | Lemon | 2010 | scooter | 399.99 | 2010-03-03 00:00:00 | 2012-06-08 00:00:00 | +| 3 | Lemon | 2013 | scooter | 499.99 | 2013-05-01 00:00:00 | 2018-12-28 00:00:00 | +| 7 | Bat | 2016 | scooter | 599.99 | 2016-10-10 00:00:00 | | +| 5 | Blade | 2014 | scooter | 699.99 | 2014-06-23 00:00:00 | 2015-01-27 00:00:00 | +| 8 | Bat Limited Edition | 2017 | scooter | 699.99 | 2017-02-15 00:00:00 | | +| 2 | Lemon Limited Edition | 2011 | scooter | 799.99 | 2011-01-03 00:00:00 | 2011-03-30 00:00:00 | +| (7 rows) | + +Figure 9.23: List of scooter models + +The following are the steps to perform: + +1. Load the `sqlda` database from the accompanying source code located at this [link](https://github.com/TrainingByPackt/SQL-for-Data-Analytics/tree/master/Datasets). +2. Select the `sales_transaction_date` column from the year 2013 for `Lemon` model sales and insert the column into a table called `lemon_sales`. +3. Count the sales records available for 2013 for the `Lemon` model. +4. Display the latest `sales_transaction_date` column. +5. Convert the `sales_transaction_date` column to a date type. +6. Count the number of sales per day within the `lemon_sales` table and insert the data into a table called `lemon_sales_count`. +7. Calculate the cumulative sum of sales and insert the corresponding table into a new table labeled `lemon_sales_sum`. +8. Compute the 7-day `lag` function on the `sum` column and save the result to `lemon_sales_delay`. +9. Calculate the growth rate using the data from `lemon_sales_delay` and store the resulting table in `lemon_sales_growth`. +10. Inspect the first 22 records of the `lemon_sales_growth` table by examining the `volume` data. + +**Solution** + +1. Load the sqlda database: + + ```javascript + psql sqlda + ``` + +2. Select the sales\_transaction\_date column from the 2013 Lemon sales and insert the column into a table called lemon\_sales: + + ```javascript + SELECT sales_transaction_date INTO lemon_sales FROM sales WHERE product_id=3; + ``` + +3. Count the sales records available for the 2013 Lemon by running the following query: + + ```javascript + SELECT count(sales_transaction_date) FROM lemon_sales; + ``` + + We can see that **16558** records are available: + + ```javascript + count -------------------- 16558 (1 row) + ``` + + Figure A: Sales records for the 2013 Lemon Scooter + +4. Use the max function to check the latest sales\_transaction\_date column: + + ```javascript + SELECT max(sales_transaction_date) FROM lemon_sales; + ``` + + The following figure displays the sales\_transaction\_date column: + + ```javascript + max ---------------- 2018-12-27 19:12:!0 (1 row) + ``` + + Figure B: Production between May 2013 and December 2018 + +5. Convert the sales\_transaction\_date column to a date type using the following query: + + ```javascript + ALTER TABLE lemon_sales ALTER COLUMN sales_transaction_date TYPE DATE; + ``` + + We are converting the datatype from DATE\_TIME to DATE so as to remove the time information from the field. We are only interested in accumulating numbers, but just the date and not the time. Hence, it is easier just to remove the time information from the field. +6. Count the number of sales per day within the lemon\_sales table and insert this figure into a table called lemon\_sales\_count: + + ```javascript + SELECT *, COUNT(sales_transaction_date) INTO lemon_sales_count FROM lemon_sales GROUP BY sales_transaction_date,lemon_sales.customer_id ORDER BY sales_transaction_date; + ``` + +7. Calculate the cumulative sum of sales and insert the corresponding table into a new table labeled lemon\_sales\_sum: + + ```javascript + SELECT *, sum(count) OVER (ORDER BY sales_transaction_date) INTO lemon_sales_sum FROM lemon_sales_count; + ``` + +8. Compute the 7-day lag function on the sum column and save the result to lemon\_sales\_delay: + + ```javascript + SELECT *, lag(sum, 7) OVER (ORDER BY sales_transaction_date) INTO lemon_sales_delay FROM lemon_sales_sum; + ``` + +9. Calculate the growth rate using the data from lemon\_sales\_delay and store the resulting table in lemon\_sales\_growth. Label the growth rate column as volume: + + ```javascript + SELECT *, (sum-lag)/lag AS volume INTO lemon_sales_growth FROM lemon_sales_delay; + ``` + +10. Inspect the first 22 records of the lemon\_sales\_growth table by examining the volume data: + + ```javascript + SELECT * FROM lemon_sales_growth LIMIT 22; + ``` + + The following table shows the sales growth: + + | sales\_transaction\_date | count | sum | lag | volume | + | --- | --- | --- | --- | --- | + | 2013-05-01 | 6 | 6 | | | + | 2013-05-02 | 8 | 14 | | | + | 2013-05-03 | 4 | 18 | | | + | 2013-05-04 | 9 | 27 | | | + | 2013-05-05 | 9 | 36 | | | + | 2013-05-06 | 6 | 42 | | | + | 2013-05-07 | 8 | 50 | | | + | 2013-05-08 | 6 | 56 | 6 | 8.3333333333333333 | + | 2013-05-09 | 6 | 62 | 14 | 3.4285714285714286 | + | 2013-05-10 | 9 | 71 | 18 | 2.9444444444444444 | + | 2013-05-11 | 3 | 74 | 27 | 1.7407407407407407 | + | 2013-05-12 | 4 | 78 | 36 | 1.1666666666666667 | + | 2013-05-13 | 7 | 85 | 42 | 1.0238095238095238 | + | 2013-05-14 | 3 | 88 | 50 | 0.76000000000000000000 | + | 2013-05-15 | 3 | 91 | 56 | 0.62500000000000000000 | + | 2013-05-16 | 4 | 95 | 62 | 0.53225806451612903226 | + | 2013-05-17 | 6 | 101 | 71 | 0.42253521126760563380 | + | 2013-05-18 | 9 | 110 | 74 | 0.48648648648648648649 | + | 2013-05-19 | 6 | 116 | 78 | 0.48717948717948717949 | + | 2013-05-20 | 6 | 122 | 85 | 0.43529411764705882353 | + | 2013-05-21 | 11 | 133 | 88 | 0.51136363636363636364 | + | 2013-05-22 | 8 | 141 | 91 | 0.54945054945054945055 | + | (22 rows) | + + Figure C: Sales growth of the Lemon Scooter + +Similar to the previous exercise, we have calculated the cumulative sum, lag, and relative sales growth of the Lemon Scooter. We can see that the initial sales volume is much larger than the other scooters, at over 800%, and again finishes higher at 55% + +**Expected Output:** + +| sales\_transaction\_date | count | sum | lag | volume | +| --- | --- | --- | --- | --- | +| 2013-05-01 6 | 6 | 6 | | | +| 2013-05-02 | 8 | 14 | | | +| 2013-05-03 | 4 | 18 | | | +| 2013-05-04 | 9 | 27 | | | +| 2013-05-05 | 9 | 36 | | | +| 2013-05-06 | 6 | 42 | | | +| 2013-05-07 | 8 | 50 | | | +| 2013-05-08 | 6 | 56 | 6 | 8.3333333333333333 | +| 2013-05-09 | 6 | 62 | 14 | 3.4285714285714286 | +| 2013-05-10 | 9 | 71 | 18 | 2.9444444444444444 | +| 2013-05-11 | 3 | 74 | 27 | 1.7407407407407407 | +| 2013-05-12 | 4 | 78 | 36 | 1.1666666666666667 | +| 2013-05-13 | 7 | 85 | 42 | 1.0238095238095238 | +| 2013-05-14 | 3 | 88 | 50 | 0.76000000000000000000 | +| 2013-05-15 | 3 | 91 | 56 | 0.62500000000000000000 | +| 2013-05-16 | 4 | 95 | 62 | 0.53225806451612903226 | +| 2013-05-17 | 6 | 101 | 71 | 0.42253521126760563380 | +| 2013-05-18 | 9 | 110 | 74 | 0.48648648648648648649 | +| 2013-05-19 | 6 | 116 | 78 | 0.48717948717948717949 | +| 2013-05-20 | 6 | 122 | 85 | 0.43529411764705882353 | +| 2013-05-21 | 11 | 133 | 88 | 0.51136363636363636364 | +| 2013-05-22 | 8 | 141 | 91 | 0.54945054945054945055 | +| (22 rows) | + +Figure 9.54: Sales growth of the Lemon Scooter + +Now that we have collected data to test the two hypotheses of timing and cost, what observations can we make and what conclusions can we draw? The first observation that we can make is regarding the total volume of sales for the three different scooter products. The Lemon Scooter, over its production life cycle of 4.5 years, sold 16,558 units, while the two Bat Scooters, the Original and Limited Edition models, sold 7,328 and 5,803 units, respectively, and are still currently in production, with the Bat Scooter launching about 4 months earlier and with approximately 2.5 years of sales data available. Looking at the sales growth of the three different scooters, we can also make a few different observations: + +- The original Bat Scooter, which launched in October at a price of $599.99, experienced a 700% sales growth in its second week of production and finished the first 22 days with 28% growth and a sales figure of 160 units. +- The Bat Limited Edition Scooter, which launched in February at a price of $699.99, experienced 450% growth at the start of its second week of production and finished with 96 sales and 66% growth over the first 22 days. +- The 2013 Lemon Scooter, which launched in May at a price of $499.99, experienced 830% growth in the second week of production and ended its first 22 days with 141 sales and 55% growth. + +Based on this information, we can make a number of different conclusions: + +- The initial growth rate starting in the second week of sales correlates to the cost of the scooter. As the cost increased to $699.99, the initial growth rate dropped from 830% to 450%. +- The number of units sold in the first 22 days does not directly correlate to the cost. The $599.99 Bat Scooter sold more than the 2013 Lemon Scooter in that first period despite the price difference. +- There is some evidence to suggest that the reduction in sales can be attributed to seasonal variations given the significant reduction in growth and the fact that the original Bat Scooter is the only one released in October. So far, the evidence suggests that the drop can be attributed to the difference in launch timing. + +Before we draw the conclusion that the difference can be attributed to seasonal variations and launch timing, let's ensure that we have extensively tested a range of possibilities. Perhaps marketing work, such as email campaigns, that is, when the emails were sent, and the frequency with which the emails were opened, made a difference. + +Now that we have considered both the launch timing and the suggested retail price of the scooter as a possible cause of the reduction in sales, we will direct our efforts to other potential causes, such as the rate of opening of marketing emails. Does the marketing email opening rate have an effect on sales growth throughout the first 3 weeks? We will find this out in our next exercise. + +Exercise 9.4: Analyzing Sales Growth by Email Opening Rate + +In this exercise, we will analyze the sales growth using the email opening rate. To investigate the hypothesis that a decrease in the rate of opening emails impacted the Bat Scooter sales rate, we will again select the Bat and Lemon Scooters and will compare the email opening rate. + +Perform the following steps to complete the exercise: + +1. Load the `sqlda` database: + + ```javascript + psql sqlda + ``` + +2. Firstly, look at the `emails` table to see what information is available. Select the first five rows of the `emails` table: + + ```javascript + SELECT * FROM emails LIMIT 5; + ``` + + The following table displays the email information for the first five rows: + + | email\_id | customer\_id | email\_subject | opened | clicked | bounced | sent\_date | opened\_date | clicked\_date | + | --- | --- | --- | --- | --- | --- | --- | --- | --- | + | 1 | 18 | Introducing A Limited Edition | f | f | f | 2011-01-03 15:00:00 | | | + | 2 | 30 | Introducing A Limited Edition | f | f | f | 2011-01-03 15:00:00 | | | + | 3 | 41 | Introducing A Limited Edition | t | f | f | 2011-01-03 15:00:00 | 2011-01-04 10:41:11 | | + | 4 | 52 | Introducing A Limited Edition | f | f | f | 2011-01-03 15:00:00 | | + | 5 | 59 | Introducing A Limited Edition | f | f | f | 2011-01-03 15:00:00 | | | + | (5 rows) | + + Figure 9.55: Sales growth of the Lemon Scooter + + To investigate our hypothesis, we need to know whether an email was opened, and when it was opened, as well as who the customer was who opened the email and whether that customer purchased a scooter. If the email marketing campaign was successful in maintaining the sales growth rate, we would expect a customer to open an email soon before a scooter was purchased. + + The period in which the emails were sent, as well as the ID of customers who received and opened an email, can help us to determine whether a customer who made a sale may have been encouraged to do so following the receipt of an email. + +3. To determine the hypothesis, we need to collect the `customer_id` column from both the `emails` table and the `bat_sales` table for the Bat Scooter, the `opened`, `sent_date`, `opened_date`, and `email_subject` columns from `emails` table, as well as the `sales_transaction_date` column from the `bat_sales` table. As we only want the email records of customers who purchased a Bat Scooter, we will join the `customer_id` column in both tables. Then, insert the results into a new table – `bat_emails`: + + ```javascript + SELECT emails.email_subject, emails.customer_id, emails.opened, emails.sent_date, emails.opened_date, bat_sales.sales_transaction_date INTO bat_emails FROM emails INNER JOIN bat_sales ON bat_sales.customer_id=emails.customer_id ORDER BY bat_sales.sales_transaction_date; + ``` + +4. Select the first 10 rows of the `bat_emails` table, ordering the results by `sales_transaction_date`: + + ```javascript + SELECT * FROM bat_emails LIMIT 10; + ``` + + The following table shows the first 10 rows of the `bat_emails` table ordered by `sales_transaction_date`: + + | email\_subject | customer\_id | opened | sent\_date | opened\_date | sales\_transaction\_date | + | --- | --- | --- | --- | --- | --- | + | A New Year, And Some New EVs | 11678 | f | 2019-01-07 15:00:00 | | 2016-10-10 00:00:00 | + | A Brand New Scooter...and Car | 40250 | f | 2014-05-06 15:00:00 | | 2016-10-10 00:00:00 | + | We Really Outdid Ourselves this Year | 24125 | f | 2017-01-15 15:00:00 | | 2016-10-10 00:00:00 | + | Tis' the Season for Savings | 31307 | t | 2015-11-26 15:00:00 | 2015-11-27 04:55:07 | 2016-10-10 00:06:00 | + | 25% off all EVs. Its a Christmas Miracle! | 42213 | f | 2016-11-25 15:00:00 | | 2016-10-10 00:00:00 | + | Zoom zoom Black Friday Sale | 40250 | f | 2014-11-28 15:00:00 | | 2016-10-10 00:00:00 | + | Save the Planet with same Holiday Savings. | 4553 | f | 2016-11-23 15:00:00 | | 2016 10 10 00:00:00 | + | The 2013 Lemon Scooter is Here | 24125 | t | 2013-03-01 15:00:00 | 2013-03-02 14:43:34 | 2016 10 10 00:00:00 | + | The 2013 Lemon Scooter is Here | 40250 | f | 2013-03-01 15:00:00 | | 2016-10-10 00:00:00 | + | Save the Planet with some Holiday Savings. | 40250 | f | 2018-11-23 15:00:00 | | 2016-10-10 00:00:00 | + | (10 rows) | + + Figure 9.56: Email and sales information joined on customer\_id + + We can see here that there are several emails unopened, over a range of sent dates, and that some customers have received multiple emails. Looking at the subjects of the emails, some of them don't seem related to the Zoom scooters at all. +5. Select all rows where the `sent_date` email predates the `sales_transaction_date` column, order by `customer_id`, and limit the output to the first 22 rows. This will help us to know which emails were sent to each customer before they purchased their scooter. Write the following query to do so: + + ```javascript + SELECT * FROM bat_emails WHERE sent_date < sales_transaction_date ORDER BY customer_id LIMIT 22; + ``` + + The following table lists the emails sent to the customers before the `sales_transaction_date` column: + + ![The figure shows the output of the above query. ](https://s3.amazonaws.com/jigyaasa_content_static/sql-data-anal/C11861_09_27.jpg) + + Figure 9.57: Emails sent to customers before the sale transaction date + +6. Delete the rows of the `bat_emails` table where emails were sent more than 6 months prior to production. As we can see, there are some emails that were sent years before the transaction date. We can easily remove some of the unwanted emails by removing those sent before the Bat Scooter was in production. From the products table, the production start date for the Bat Scooter is October 10, 2016: + + ```javascript + DELETE FROM bat_emails WHERE sent_date < '2016-04-10'; + ``` + + Note + + In this exercise, we are removing information that we no longer require from an existing table. This differs from the previous exercises, where we created multiple tables each with slightly different information from other. The technique you apply will differ depending upon the requirements of the problem being solved; do you require a traceable record of analysis, or is efficiency and reduced storage key? + +7. Delete the rows where the sent date is after the purchase date, as they are not relevant to the sale: + + ```javascript + DELETE FROM bat_emails WHERE sent_date > sales_transaction_date; + ``` + +8. Delete those rows where the difference between the transaction date and the sent date exceeds 30, as we also only want those emails that were sent shortly before the scooter purchase. An email 1 year beforehand is probably unlikely to influence a purchasing decision, but one closer to the purchase date may have influenced the sales decision. We will set a limit of 1 month (30 days) before the purchase. Write the following query to do so: + + ```javascript + DELETE FROM bat_emails WHERE (sales_transaction_date-sent_date) > '30 days'; + ``` + +9. Examine the first 22 rows again ordered by `customer_id` by running the following query: + + ```javascript + SELECT * FROM bat_emails ORDER BY customer_id LIMIT 22; + ``` + + The following table shows the emails where the difference between the transaction date and the sent date is less than 30: + + ![The figure shows the output of the above query.](https://s3.amazonaws.com/jigyaasa_content_static/sql-data-anal/C11861_09_28.jpg) + + Figure 9.58: Emails sent close to the date of sale + + At this stage, we have reasonably filtered the available data based on the dates the email was sent and opened. Looking at the preceding `email_subject` column, it also appears that there are a few emails unrelated to the Bat Scooter, for example, **25% of all EVs. It's a Christmas Miracle!** and **Black Friday. Green Cars**. These emails seem more related to electric car production instead of scooters, and so we can remove them from our analysis. +10. Select the distinct value from the `email_subject` column to get a list of the different emails sent to the customers: + + ```javascript + SELECT DISTINCT(email_subject) FROM bat_emails; + ``` + + The following table shows a list of distinct email subjects: + + ```javascript + email subject --------------------------------------- Black Friday. Green Cars. 25% off all EVs. It's a Christmas Miracle! A New Year, And Some New EVs Like a Bat out of Heaven Save the Planet with sme Holiday Savings. We Really Outdid Ourselves this Year (6 rows) + ``` + + Figure 9.59: Unique email subjects sent to potential customers of the Bat Scooter + +11. Delete all records that have `Black Friday` in the email subject. These emails do not appear relevant to the sale of the Bat Scooter: + + ```javascript + DELETE FROM bat_emails WHERE position('Black Friday' in email_subject)>0; + ``` + + Note + + The `position` function in the preceding example is used to find any records where the `Black Friday` string is at the first character in the mail or more in `email_structure`. Thus, we are deleting any rows where `Black Friday` is in the email subject. For more information on PostgreSQL, refer to the documentation regarding [string functions](https://www.postgresql.org/docs/current/functions-string.html). + +12. Delete all rows where **25% off all EVs. It's a Christmas Miracle!** and **A New Year, And Some New EVs** can be found in the `email_subject`: + + ```javascript + DELETE FROM bat_emails WHERE position('25% off all EV' in email_subject)>0; DELETE FROM bat_emails WHERE position('Some New EV' in email_subject)>0; + ``` + +13. At this stage, we have our final dataset of emails sent to customers. Count the number of rows that are left in the sample by writing the following query: + + ```javascript + SELECT count(sales_transaction_date) FROM bat_emails; + ``` + + an see that **401** rows are left in the sample: + ```javascript + count ----------- 401 (1 row) + ``` + Figure 9.60: Count of the final Bat Scooter email dataset + Wewill now compute the percentage of emails that were opened relative to sales. Count the emails that were opened by writing the following query: + + ```javascript + SELECT count(opened) FROM bat_emails WHERE opene' + + We can see that **98** emailse opened: + + ```javascript + count ------------ 98 (1) + ``` + + Figure 9.61: Count of opened Bat Scooter cagn eails + +15. Count the customers who received emails and made a purchase. We will determine this by counting the number of unique (or distinct) customers that are in the `bat_emails` table: + + ```javascript + SELECT COUNT(DISTINCT(customer_id)) FROM bat_emails; + ``` + + + We can see that **396** customers who received an email made a purchase: + + ```javascript + count ----------- 396 (1 row) + ``` + + Figure 9.62: Count of unique customers who received a Bat Scooter campaign email + +16. Count the unique (or distinct) customers who made a purchase by writing the following query: + + ```javascript + SELECT COUNT(DISTINCT(customer_id)) FROM bat_sales; + ``` + + + Following is the output of the preceding code: + + ```javascript + count ---------- 6659 (1 row) + ``` + + Figure 9.63: Count of unique customers + +17. Calculate the percentage of customers who purchased a Bat Scooter after receiving an email: + + ```javascript + SELECT 396.0/6659.0 AS email_rate; + ``` + + + The output of the preceding query is displayed as follows: + + ```javascript + email_rate ---------------------- 0.05946838864694398558 (1 row) + ``` + + Figure 9.64: Percentage of customers who received an email + + Note + + In the preceding calculation, you can see that we included a decimal place in the figures, for example, 396.0 instead of a simple integer value (396). This is because the resulting value will be represented as less than 1 percentage point. If we excluded these decimal places, the SQL server would have completed the division operation as integers and the result would be 0. + + Just under 6% of customers who made a purchase received an email regarding the Bat Scooter. Since 18% of customers who received an email made a purchase, there is a strong argument to be made that actively increasing the size of the customer base who receive marketing emails could increase Bat Scooter sales. +18. Limit the scope of our data to be all sales prior to November 1, 2016 and put the data in a new table called `bat_emails_threewks`. So far, we have examined the email opening rate throughout all available data for the Bat Scooter. Check the rate throughout for the first 3 weeks, where we saw a reduction in sales: + + ```javascript + SELECT * INTO bat_emails_threewks FROM bat_emails WHERE sales_transaction_date < '2016-11-01'; + ``` + +19. Now, count the number of emails opened during this period: + + ```javascript + SELECT COUNT(opened) FROM bat_emails_threewks; + ``` + + + We can see that we have sent **82** emails during this period: + + ```javascript + count ---------------------- 82 (1 row) + ``` + + Figure 9.65: Count of emails opened in the first 3 weeks + +20. Now, count the number of emails opened in the first 3 weeks: + + ```javascript + SELECT COUNT(opened) FROM bat_emails_threewks WHERE opened='t'; + ``` + + + The following is the output of the preceding code: + + ```javascript + count ---------------------- 15 (1 row) + ``` + + Figure 9.66: Count of emails opened + + + We can see that **15** emails were opened in the first 3 weeks. +21. Count the number of customers who received emails during the first 3 weeks of sales and who then made a purchase by using the following query: + + ```javascript + SELECT COUNT(DISTINCT(customer_id)) FROM bat_emails_threewks; + ``` + + + We can see that **82** customers received emails during the first 3 weeks: + + ```javascript + count ---------------------- 82 (1 row) + ``` + + Figure 9.67: Customers who made a purchase in the first 3 weeks + +22. Calculate the percentage of customers who opened emails pertaining to the Bat Scooter and then made a purchase in the first 3 weeks by using the following query: + + ```javascript + SELECT 15.0/82.0 AS sale_rate; + ``` + + + The following table shows the calculated percentage: + + ```javascript + sale_rate 0.18292682926829268293 (1 row) + ``` + + Figure 9.68: Percentage of customers in the first 3 weeks who opened emails + + + Approximately 18% of customers who received an email about the Bat Scooter made a purchase in the first 3 weeks. This is consistent with the rate for all available data for the Bat Scooter. +23. Calculate how many unique customers we have in total throughout the first 3 weeks. This information is useful context when considering the percentages, we just calculated. 3 sales out of 4 equate to 75% but, in this situation, we would prefer a lower rate of the opening but for a much larger customer base. Information on larger customer bases is generally more useful as it is typically more representative of the entire customer base, rather than a small sample of it. We already know that 82 customers received emails: + + ```javascript + SELECT COUNT(DISTINCT(customer_id)) FROM bat_sales WHERE sales_transaction_date < '2016-11-01'; + ``` + + + The following output reflects **160** customers where the transaction took place before November 1, 2016: + + ```javascript + count ------------ 160 (1 row) + ``` + + Figure 9.69: Number of distinct customers from bat\_sales + + +There were 160 customers in the first 3 weeks, 82 of whom received emails, which is slightly over 50% of customers. This is much more than 6% of customers over the entire period of availability of the scooter. + +Now that we have examined the performance of the email marketing campaign for the Bat Scooter, we need a control or comparison group to establish whether the results were consistent with that of other products. Without a group to compare against, we simply do not know whether the email campaign of the Bat Scooter was good, bad, or neither. We will perform the next exercise to investigate performance. + +Exercise 9.5: Analyzing the Performance of the Email Marketing Campaign + +In this exercise, we will investigate the performance of the email marketing campaign for the Lemon Scooter to allow for a comparison with the Bat Scooter. Our hypothesis is that if the email marketing campaign performance of the Bat Scooter is consistent with another, such as the 2013 Lemon, then the reduction in sales cannot be attributed to differences in the email campaigns. + +Perform the following steps to complete the exercise: + +1. Load the `sqlda` database: + + ```javascript + psql sqlda + ``` + +2. Drop the existing `lemon_sales` table: + + ```javascript + DROP TABLE lemon_sales; + ``` + +3. The 2013 Lemon Scooter is `product_id = 3`. Select `customer_id` and `sales_transaction_date` from the sales table for the 2013 Lemon Scooter. Insert the information into a table called `lemon_sales`: + + ```javascript + SELECT customer_id, sales_transaction_date INTO lemon_sales FROM sales WHERE product_id=3; + ``` + +4. Select all information from the `emails` database for customers who purchased a 2013 Lemon Scooter. Place the information in a new table called `lemon_emails`: + + ```javascript + SELECT emails.customer_id, emails.email_subject, emails.opened, emails.sent_date, emails.opened_date, lemon_sales.sales_transaction_date INTO lemon_emails FROM emails INNER JOIN lemon_sales ON emails.customer_id=lemon_sales.customer_id; + ``` + +5. Remove all emails sent before the start of production of the 2013 Lemon Scooter. For this, we first require the date when production started: + + ```javascript + SELECT production_start_date FROM products Where product_id=3; + ``` + + The following table shows the `production_start_date` column: + + ```javascript + production_start_data --------------------------------- 2013-5-01 00:00:00 (1 row) + ``` + + Figure 9.70: Production start date of the Lemon Scooter + + Now, delete the emails that were sent before the start of production of the 2013 Lemon Scooter: + + ```javascript + DELETE FROM lemon_emails WHERE sent_date < '2013-05-01'; + ``` + +6. Remove all rows where the sent date occurred after the `sales_transaction_date` column: + + ```javascript + DELETE FROM lemon_emails WHERE sent_date > sales_transaction_date; + ``` + +7. Remove all rows where the sent date occurred more than 30 days before the `sales_transaction_date` column: + + ```javascript + DELETE FROM lemon_emails WHERE (sales_transaction_date - sent_date) > '30 days'; + ``` + +8. Remove all rows from `lemon_emails` where the email subject is not related to a Lemon Scooter. Before doing this, we will search for all distinct emails: + + ```javascript + SELECT DISTINCT(email_subject) FROM lemon_emails; + ``` + + The following table shows the distinct email subjects: + + ```javascript + email_subject ---------------------------------------------------------- Tis' the Season for Savings 25% off all EVs. It's a Christmas Miracle! A Brand New Scooter...and Car Like a Bat out of Heaven Save the Planet with some Holiday Savings. Shocking Holiday Savings on Electric Scooters An Electric Car fr a New Age We cut you a deal: 20% off a Blade Black Friday. Green Cars. Zoom Zoom Back Friday Sale (11 rows) + ``` + + Figure 9.71: Lemon Scooter campaign emails sent + + Now, delete the email subject not related to the Lemon Scooter using the `DELETE` command: + + ```javascript + DELETE FROM lemon_emails WHERE POSITION('25% off all EVs.' in email_subject)>0; DELETE FROM lemon_emails WHERE POSITION('Like a Bat out of Heaven' in email_subject)>0; DELETE FROM lemon_emails WHERE POSITION('Save the Planet' in email_subject)>0; DELETE FROM lemon_emails WHERE POSITION('An Electric Car' in email_subject)>0; DELETE FROM lemon_emails WHERE POSITION('We cut you a deal' in email_subject)>0; DELETE FROM lemon_emails WHERE POSITION('Black Friday. Green Cars.' in email_subject)>0; DELETE FROM lemon_emails WHERE POSITION('Zoom' in email_subject)>0; + ``` + +9. Now, check how many emails of `lemon_scooter` customers were opened: + + ```javascript + SELECT COUNT(opened) FROM lemon_emails WHERE opened='t'; + ``` + + We can see that **128** emails were opened: + + ```javascript + count --------- 128 (1 rows) + ``` + + Figure 9.72: Lemon Scooter campaign emails opened + +10. List the number of customers who received emails and made a purchase: + + ```javascript + SELECT COUNT(DISTINCT(customer_id)) FROM lemon_emails; + ``` + + The following figure shows that **506** customers made a purchase after receiving emails: + + ```javascript + count --------- 506 (1 rows) + ``` + + Figure 9.73: Unique customers who purchased a Lemon Scooter + +11. Calculate the percentage of customers who opened the received emails and made a purchase: + + ```javascript + SELECT 128.0/506.0 AS email_rate; + ``` + + We can see that 25% of customers opened the emails and made a purchase: + + ```javascript + email_rate ------------------------------- 0.25296442687747035573 (1 row) + ``` + + Figure 9.74: Lemon Scooter customer email rate + +12. Calculate the number of unique customers who made a purchase: + + ```javascript + SELECT COUNT(DISTINCT(customer_id)) FROM lemon_sales; + ``` + + We can see that **13854** customers made a purchase: + + ```javascript + count ------------------------------- 13854 (1 row) + ``` + + Figure 9.75: Count of unique Lemon Scooter customers + +13. Calculate the percentage of customers who made a purchase having received an email. This will enable a comparison with the corresponding figure for the Bat Scooter: + + ```javascript + SELECT 506.0/13854.0 AS email_sales; + ``` + + The preceding calculation generates a 36% output: + + ```javascript + email_sales ------------------------- 0.03652374765410711708 (1 row) + ``` + + Figure 9.76: Lemon Scooter customers who received an email + Seect all records from `lemon_emails` where a sale occurred within the first 3 weeks of the start of production. Store the results in a new table – `lemon_emails_threewks`: + + ```javascript + SELECT * INTO lemon_emails_threewks FROM lemon_emails WHERE sales_transaction_date <13-0-01'; + ``` + +15. Count the number of emails that were made for Lemon Scooters in the first 3 weeks: + + ```javascript + SELECT COUNT(sales_transaction_date) FROM lemon_emails_threewks; + ``` + + + The following is the output of the preceding code: + + ```javascript + count ----------- 0 (1 row) + ``` + + Figure 9.77: Unique sales of the Lemon Scooter in the first 3 weeks + + +There is a lot of interesting information here. We can see that 25% of customers who opened an email made a purchase, which is a lot higher than the 18% figure for the Bat Scooter. We have also calculated that just over 3.6% of customers who purchased a Lemon Scooter were sent an email, which is much lower than the almost 6% of Bat Scooter customers. The final interesting piece of information we can see is that none of the Lemon Scooter customers received an email during the first 3 weeks of product launch compared with the 82 Bat Scooter customers, which is approximately 50% of all customers in the first 3 weeks! + +In this exercise, we investigated the performance of an email marketing campaign for the Lemon Scooter to allow for a comparison with the Bat Scooter using various SQL techniques. diff --git "a/Lesson Using SQL to Uncover the Truth \342\200\223 a Case Study en-uCertify.sql" "b/Lesson Using SQL to Uncover the Truth \342\200\223 a Case Study en-uCertify.sql" new file mode 100644 index 0000000..7e855cc --- /dev/null +++ "b/Lesson Using SQL to Uncover the Truth \342\200\223 a Case Study en-uCertify.sql" @@ -0,0 +1,1495 @@ + +# Lesson : Using SQL to Uncover the Truth – a Case Study en-uCertify + +> ## Excerpt +> +> uCertify offers online computer courses and hands-on labs on project management, data analytics, cybersecurity, and more to advance your IT career. en + +--- +In this case study, we will be following the scientific method to help solve our problem, which, at its heart, is about testing guesses (or hypotheses) using objectively collected data. We can decompose the scientific method into the following key steps: + +1. Define the question to answer what caused the drop-in sales of the Bat Scooter after approximately 2 weeks. +2. Complete background research to gather sufficient information to propose an initial hypothesis for the event or phenomenon. +3. Construct a hypothesis to explain the event or answer the question. +4. Define and execute an objective experiment to test the hypothesis. In an ideal scenario, all aspects of the experiment should be controlled and fixed, except for the phenomenon that is being tested under the hypothesis. +5. Analyze the data collected during the experiment. +6. Report the result of the analysis, which will hopefully explain why there was a drop in the sale of Bat Scooters. + +It is to be noted that in this lesson, we are completing a post-hoc analysis of the data, that is, the event has happened, and all available data has been collected. Post-hoc data analysis is particularly useful when events have been recorded that cannot be repeated or when certain external factors cannot be controlled. It is with this data that we are able to perform our analysis, and, as such, we will extract information to support or refute our hypothesis. We will, however, be unable to definitively confirm or reject the hypothesis without practical experimentation. The question that will be the subject of this lesson and that we need to answer is this: why did the sales of the ZoomZoom Bat Scooter drop by approximately 20% after about 2 weeks? + +So, let's start with the absolute basics. + +Exercise 9.1: Preliminary Data Collection Using SQL Techniques + +In this exercise, we will collect preliminary data using SQL techniques. We have been told that the pre-orders for the ZoomZoom Bat Scooter were good, but the orders suddenly dropped by 20%. So, when was production started on the scooter, and how much was it selling for? How does the Bat Scooter compare with other types of scooters in terms of price? The goal of this exercise is to answer these questions: + +1. Load the `sqlda` database from the accompanying source code is located [here](https://github.com/TrainingByPackt/SQL-for-Data-Analytics/tree/master/Datasets): + + ```javascript + psql sqlda + ``` + +2. List the model, `base_msrp` (MSRP: manufacturer's suggested retail price) and `production_start_date` fields within the product table for product types matching `scooter`: + + ```javascript + SELECT model, base_msrp, production_start_date FROM products WHERE product_type='scooter'; + ``` + + The following table shows the details of all the products for the `scooter` product type: + + | model | base\_msrp | production\_start\_date | + | --- | --- | --- | + | Lemon | 399.99 | 2010-03-03 00:00:00 | + | Lemon Limited Edition | 799.99 | 2011-01-03 00:00:00 | + | Lemon | 499.99 | 2013-05-01 00:00:00 | + | Blade | 699.99 | 2014-06-23 00:00:00 | + | Bat | 599.99 | 2016-10-10 00:00:00 | + | Bat Limited Edition | 699.99 | 2017-02-15 00:00:00 | + | Lemon Zester (7 rows) | 349.99 | 2019-02-04 00:00:00 | + + Figure 9.1: Basic list of scooters with a base manufacturer suggesting a retail price and production date + + Looking at the results from the search, we can see that we have two scooter products with **Bat** in the name; **Bat** and **Bat Limited Edition**. The **Bat** Scooter, which started production on October 10, 2016, with a suggested retail price of $599.99; and the **Bat Limited Edition** Scooter, which started production approximately 4 months later, on February 15, 2017, at a price of $699.99. + + Looking at the product information supplied, we can see that the Bat Scooter is somewhat unique from a price perspective, being the only scooter with a suggested retail price of $599.99. There are two others at $699.99 and one at $499.99. + + Similarly, if we consider the production start date in isolation, the original Bat Scooter is again unique in that it is the only scooter starting production in the last quarter or even half of the year (date format: _YYYY-MM-DD_). All other scooters start production in the first half of the year, with only the Blade scooter starting production in June. + + In order to use the sales information in conjunction with the product information available, we also need to get the product ID for each of the scooters. + +3. Extract the model name and product IDs for the scooters available within the database. We will need this information to reconcile the product information with the available sales information: + + ```javascript + SELECT model, product_id FROM products WHERE product_type='scooter'; + ``` + + The query yields the product IDs shown in the following table: + + | model | product\_id | + | --- | --- | + | Lemon | 1 | + | Lemon Limited Edition | 2 | + | Lemon | 3 | + | Blade | 5 | + | Bat | 7 | + | Bat Limited Edition | 8 | + | Lemon Zester (7 rows) | 12 | + + Figure 9.2: Scooter product ID codes + +4. Insert the results of this query into a new table called `product_names`: + + ```javascript + SELECT model, product_id INTO product_names FROM products WHERE product_type='scooter'; + ``` + + Inspect the contents of the `product_names` table shown in the following figure: + + | model | product\_id | + | --- | --- | + | Lemon | 1 | + | Lemon Limited Edition | 2 | + | Lemon | 3 | + | Blade | 5 | + | Bat | 7 | + | Bat Limited Edition | 8 | + | Lemon Zester (7 rows) | 12 | + + Figure 9.3: Contents of the new product\_names table + +As described in the output, we can see that the Bat Scooter lies between the price points of some of the other scooters and that it was also manufactured a lot later in the year compared to the others. + +By completing this very preliminary data collection step, we have the information required to collect sales data on the Bat Scooter as well as other scooter products for comparison. While this exercise involved using the simplest SQL commands, it has already yielded some useful information. + +This exercise has also demonstrated that even the simplest SQL commands can reveal useful information and that they should not be underestimated. In the next exercise, we will try to extract the sales information related to the reduction in sales of the Bat Scooter. + +Exercise 9.2: Extracting the Sales Information + +In this exercise, we will use a combination of simple `SELECT` statements, as well as aggregate and window functions, to examine the sales data. With the preliminary information at hand, we can use it to extract the Bat Scooter sales records and discover what is actually going on. We have a table, `product_names`, that contains both the model names and product IDs. We will need to combine this information with the sales records and extract only those for the Bat Scooter: + +1. Load the `sqlda` database: + + ```javascript + psql sqlda + ``` + +2. List the available fields in the `sqlda` database: + + ```javascript + \d + ``` + + The preceding query yields the following fields present in the database: + + | Column | Table "public.sales" Type | Collation | Nullable | Default | + | --- | --- | --- | --- | --- | + | customer\_id | bigint | | | | + | product\_id | bigint | | | | + | sales\_transaction\_date | timestamp without time zone | | | | + | sales\_amount | double precision | | | | + | channel | text | | | | + | dealership\_id | double precision | | | | + + Figure 9.4: Structure of the sales table + + We can see that we have references to customer and product IDs, as well as the transaction date, sales information, the sales channel, and the dealership ID. +3. Use an inner join on the `product_id` columns of both the `product_names` table and the sales table. From the result of the inner join, select the model, `customer_id`, `sales_transaction_date`, `sales_amount`, channel, and `dealership_id`, and store the values in a separate table called `product_sales`: + + ```javascript + SELECT model, customer_id, sales_transaction_date, sales_amount, channel, dealership_id INTO products_sales FROM sales INNER JOIN product_names ON sales.product_id=product_names.product_id; + ``` + + The output of the preceding code can be seen in the next step. + + Note + + Throughout this lesson, we will be storing the results of queries and calculations in separate tables as this will allow you to look at the results of the individual steps in the analysis at any time. In a commercial/production setting, we would typically only store the end result in a separate table, depending upon the context of the problem being solved. + +4. Look at the first five rows of this new table by using the following query: + + ```javascript + SELECT * FROM products_sales LIMIT 5; + ``` + + The following table lists the top five customers who made a purchase. It shows the sale amount and the transaction details, such as the date and time: + + | model | customer\_id | sales\_transaction\_date | sales\_amount | channel | dealership\_id | + | --- | --- | --- | --- | --- | --- | + | Lemon | 41604 | 2012-03-30 22:45:29 | 399.99 | internet | | + | Lemon | 41531 | 2010-09-07 22:53:16 | 399.99 | internet | | + | Lemon | 41443 | 2011-05-24 02:19:11 | 399.99 | internet | | + | Lemon | 41291 | 2010-08-08 14:12:52 | 319.992 | internet | | + | Lemon | 41084 | 2012-01-09 03:34:52 | 319.992 | internet | | + | (5 rows) | + + Figure 9.5: The combined product sales table + +5. Select all the information from the `product_sales` table that is available for the Bat Scooter and order the sales information by `sales_transaction_date` in ascending order. By selecting the data in this way, we can look at the first few days of the sales records in detail: + + ```javascript + SELECT * FROM products_sales WHERE model='Bat' ORDER BY sales_transaction_date; + ``` + + The preceding query generates the following output: + + | model | customer\_id | sales\_transaction\_date | sales\_amount | channel | dealership\_id | + | --- | --- | --- | --- | --- | --- | + | Bat | 4319 | 2016-10-10 00:41:57 | 599.99 | Internet | | + | Bat | 40250 | 2016-10-10 02:47:28 | 599.99 | dealership | 4 | + | Bat | 35497 | 2016-10-10 04:21:08 | 599.99 | dealership | 2 | + | Bat | 4553 | 2016-10-10 07:42:59 | 599.99 | dealership | 11 | + | Bat | 11678 | 2016-10-10 09:21:08 | 599.99 | internet | | + | Bat | 45868 | 2016-10-10 10:29:29 | 599.99 | internet | + | Bat | 24125 | 2016-10-10 18:57:25 | 599.99 | dealership | 1 | + | Bat | 31307 | 2016-10-10 21:22:38 | 599.99 | internet | | + | Bat | 42213 | 2016-10-10 21:27:36 | 599.99 | internet | | + | Bat | 47790 | 2016-10-11 01:28:58 | 599.99 | dealership | 20 | + | Bat | 6342 | 2016-10-11 03:04:57 | 599.99 | internet | | + | Bat | 45880 | 2016-10-11 04:09:19 | 599.99 | dealership | 7 | + | Bat | 43477 | 2016-10-11 05:24:50 | 599.99 | internet | | + | Bat | 6322 | 2016-10-11 08:48:07 | 599.99 | internet | + | Bat | 46653 | 2016-10-11 15:47:01 | 599.99 | dealership | 6 | + | Bat | 9045 | 2016-10-12 00:15:20 | 599.99 | dealership | 19 | + | Bat | 23679 | 2016-10-12 00:17:53 | 539.991 | internet | | + | Bat | 49856 | 2016-10-12 00:26:15 | 599.99 | dealership | 10 | + | Bat | 45256 | 2016-10-12 02:08:01 | 539.991 | dealership | 7 | + | Bat | 48809 | 2016-10-12 05:08:43 | 599.99 | internet | | + | Bat | 42625 | 2016-10-12 06:17:55 | 599.99 | internet | | + | Bat | 39653 | 2016-10-12 06:28:25 | 599.99 | dealership | 7 | + | Bat | 49226 | 2016-10-12 10:26:13 | 539.991 | internet | | + | Bat | 18602 | 2016-10-12 15:09:53 | 599.99 | internet | | + + Figure 9.6: Ordered sales records + +6. Count the number of records available by using the following query: + + ```javascript + SELECT COUNT(model) FROM products_sales WHERE model='Bat'; + ``` + + The model count for the `'Bat'` model is as shown here: + + ```javascript + count --------- 7328 (1 row) + ``` + + **Figure 9.7: Count of the number of sales records** + + So, we have **7328** sales, beginning October 10, 2016. Check the date of the final sales record by performing the next step. +7. Determine the last sale date for the Bat Scooter by selecting the maximum (using the `MAX` function) for `sales_transaction_date`: + + ```javascript + SELECT MAX(sales_transaction_date) FROM products_sales WHERE model='Bat'; + ``` + + The last sale date is shown here: + + ```javascript + Max ------------------ 2019-05-31 22:15:30 (1 row) + ``` + + Figure 9.8: Last sale date + + The last sale in the database occurred on May 31, 2019. +8. Collect the daily sales volume for the Bat Scooter and place it in a new table called `bat_sales` to confirm the information provided by the sales team stating that sales dropped by 20% after the first 2 weeks: + + ```javascript + SELECT * INTO bat_sales FROM products_sales WHERE model='Bat' ORDER BY sales_transaction_date; + ``` + +9. Remove the time information to allow tracking of sales by date, since, at this stage, we are not interested in the time at which each sale occurred. To do so, run the following query: + + ```javascript + UPDATE bat_sales SET sales_transaction_date=DATE(sales_transaction_date); + ``` + +10. Display the first five records of `bat_sales` ordered by `sales_transaction_date`: + + ```javascript + SELECT * FROM bat_sales ORDER BY sales_transaction_date LIMIT 5; + ``` + + The following is the output of the preceding code: + + | model | customer\_id | sales\_transaction\_date | sales\_amount | channel | dealership\_id | + | --- | --- | --- | --- | --- | --- | + | Bat | 4553 | 2016-10-10 00:00:00 | 599.99 | dealership | 11 | + | Bat | 35497 | 2016-10-10 00:00:00 | 599.99 | dealership | 2 | + | Bat | 40250 | 2016-10-10 00:00:00 | 599.99 | dealership | 4 | + | Bat | 4319 | 2016-10-10 00:00:00 | 599.99 | internet | | + | Bat | 11678 | 2016-10-10 00:00:00 | 599.99 | internet | | + | (5 rows) | + + Figure 9.9: First five records of Bat Scooter sales + + Create a new table (`bat_sales_daily`) containing the sales transaction dates and a daily count of total sales: + + ```javascript + SELECT sales_transaction_date, COUNT(sales_transaction_date) INTO bat_sales_daily FROM bat_sales GROUP BY sales_transaction_date ORDER BY sales_transaction_date; + ``` + +11. Examine the first `22` records (a little over 3 weeks), as sales were reported to have dropped after approximately the first 2 weeks: + + ```javascript + SELECT * FROM bat_sales_daily LIMIT 22; + ``` + + This will display the following output: + + | sales\_transaction\_date | count | + | --- | --- | + | 2016-10-10 00:00:00 | 9 | + | 2016-10-11 00:00:00 | 6 | + | 2016-10-12 00:00:00 | 10 | + | 2016-10-13 00:00:00 | 10 | + | 2016-10-14 00:00:00 | 5 | + | 2016-10-15 00:00:00 | 10 | + | 2016-10-16 00:00:00 | 14 | + | 2016-10-17 00:00:00 | 9 | + | 2016-10-18 00:00:00 | 11 | + | 2016-10-19 00:00:00 | 12 | + | 2016-10-20 00:00:00 | 10 | + | 2016-10-21 00:00:00 | 6 | + | 2016-10-22 00:00:00 | 2 | + | 2016-10-23 00:00:00 | 5 | + | 2016-10-24 00:00:00 | 6 | + | 2016-10-25 00:00:00 | 9 | + | 2016-10-26 00:00:00 | 2 | + | 2016-10-27 00:00:00 | 4 | + | 2016-10-28 00:00:00 | 7 | + | 2016-10-29 00:00:00 | 5 | + | 2016-10-30 00:00:00 | 5 | + | 2016-10-31 00:00:00 | 3 | + | (22 rows) | + + Figure 9.10: First 3 weeks of sales + +We can see a drop-in sales after October 20, as there are 7 days in the first 11 rows that record double-digit sales, and none over the next 11 days. + +At this stage, we can confirm that there has been a drop off in sales, although we are yet to quantify precisely the extent of the reduction or the reason for the drop off in sales. + +Activity 9.1: Quantifying the Sales Drop + +In this activity, we will use our knowledge of the windowing methods that we learned in _Lesson 5_, _Window Functions for Data Analysis_. In the previous exercise, we identified the occurrence of the sales drop as being approximately 10 days after launch. Here, we will try to quantify the drop off in sales for the Bat Scooter. + +Perform the following steps to complete the activity: + +1. Load the `sqlda` database from the accompanying source code located at this [link](https://github.com/TrainingByPackt/SQL-for-Data-Analytics/tree/master/Datasets). +2. Using the `OVER` and `ORDER BY` statements, compute the daily cumulative sum of sales. This provides us with a discrete count of sales over time on a daily basis. Insert the results into a new table called `bat_sales_growth`. +3. Compute a 7-day `lag` of the `sum` column, and then insert all the columns of `bat_sales_daily` and the new `lag` column into a new table, `bat_sales_daily_delay`. This `lag` column indicates what sales were like 1 week prior to the given record, allowing us to compare sales with the previous week. +4. Inspect the first 15 rows of `bat_sales_growth`. +5. Compute the sales growth as a percentage, comparing the current sales volume to that of 1 week prior. Insert the resulting table into a new table called `bat_sales_delay_vol`. +6. Compare the first 22 values of the `bat_sales_delay_vol` table to ascertain a sales drop. + +**Solution** + +1. Load the sqlda database: + + ```javascript + psql sqlda + ``` + +2. Compute the daily cumulative sum of sales using the OVER and ORDER BY statements. Insert the results into a new table called bat\_sales\_growth: + + ```javascript + SELECT *, sum(count) OVER (ORDER BY sales_transaction_date) INTO bat_sales_growth FROM bat_sales_daily; + ``` + + The following table shows the daily cumulative sum of sales: + + | sales\_transaction\_date | count | sum | + | --- | --- | --- | + | 2016-10-10 00:00:00 | 9 | 9 | + | 2016-10-11 00:00:00 | 6 | 15 | + | 2016-10-12 00:00:00 | 10 | 25 | + | 2016-10-13 00:00:00 | 10 | 35 | + | 2016-10-14 00:00:00 | 5 | 40 | + | 2016-10-15 00:00:00 | 10 | 50 | + | 2016-10-16 00:00:00 | 14 | 64 | + | 2016-10-17 00:00:00 | 9 | 73 | + | 2016-10-18 00:00:00 | 11 | 84 | + | 2016-10-19 00:00:00 | 12 | 96 | + | 2016-10-20 00:00:00 | 10 | 106 | + | 2016-10-21 00:00:00 | 6 | 112 | + | 2016-10-22 00:00:00 | 2 | 114 | + | 2016-10-23 00:00:00 | 5 | 119 | + | 2016-10-24 00:00:00 | 6 | 125 | + | 2016-10-25 00:00:00 | 9 | 134 | + | 2016-10-26 00:00:00 | 2 | 136 | + | 2016-10-27 00:00:00 | 4 | 140 | + | 2016-10-28 00:00:00 | 7 | 147 | + | 2016-10-29 00:00:00 | 5 | 152 | + | 2016-10-30 00:00:00 | 5 | 157 | + | 2016-10-31 00:00:00 | 3 | 160 | + + Figure A: Daily sales count + +3. Compute a 7-day lag function of the sum column and insert all the columns of bat\_sales\_daily and the new lag column into a new table, bat\_sales\_daily\_delay. This lag column indicates what the sales were like 1 week before the given record: + + ```javascript + SELECT *, lag(sum, 7) OVER (ORDER BY sales_transaction_date) INTO bat_sales_daily_delay FROM bat_sales_growth; + ``` + +4. Inspect the first 15 rows of bat\_sales\_growth: + + ```javascript + SELECT * FROM bat_sales_daily_delay LIMIT 15; + ``` + + The following is the output of the preceding code: + + | sales\_transaction\_date | count | sum | lag | + | --- | --- | --- | --- | + | 2016-10-10 00:00:00 | 9 | 9 | | + | 2016-10-11 00:00:00 | 6 | 15 | | + | 2016-10-12 00:00:00 | 10 | 25 | | + | 2016-10-13 00:00:00 | 10 | 35 | | + | 2016-10-14 00:00:00 | 5 | 40 | | + | 2016-10-15 00:00:00 | 10 | 50 | | + | 2016-10-16 00:00:00 | 14 | 64 | | + | 2016-10-17 00:00:00 | 9 | 73 | 9 | + | 2016-10-18 00:00:00 | 11 | 84 | 15 | + | 2016-10-19 00:00:00 | 12 | 96 | 25 | + | 2016-10-20 00:00:00 | 10 | 106 | 35 | + | 2016-10-21 00:00:00 | 6 | 112 | 40 | + | 2016-10-22 00:00:00 | 2 | 114 | 50 | + | 2016-10-23 00:00:00 | 5 | 119 | 64 | + | 2016-10-24 00:00:00 | 6 | 125 | 73 | + | (15 rows) | + + Figure B: Daily sales delay with lag + +5. Compute the sales growth as a percentage, comparing the current sales volume to that of 1 week prior. Insert the resulting table into a new table called bat\_sales\_delay\_vol: + + ```javascript + SELECT *, (sum-lag)/lag AS volume INTO bat_sales_delay_vol FROM bat_sales_daily_delay ; + ``` + + Note + + The percentage sales volume can be calculated via the following equation: + + ```javascript + (new_volume – old_volume) / old_volume + ``` + +6. Compare the first 22 values of the bat\_sales\_delay\_vol table: + + ```javascript + SELECT * FROM bat_sales_daily_delay_vol LIMIT 22; + ``` + + The delay volume for the first 22 entries can be seen in the following: + + | sales\_transaction\_date | count | sum | lag | volume | + | --- | --- | --- | --- | --- | + | 2016-10-10 00:00:00 | 9 | 9 | | | + | 2016-10-11 00:00:00 | 6 | 15 | | | + | 2016-10-12 00:00:00 | 10 | 25 | | | + | 2016-10-13 00:00:00 | 10 | 35 | | | + | 2016-10-14 00:00:00 | 5 | 40 | | | + | 2016-10-15 00:00:00 | 10 | 50 | | | + | 2016-10-16 00:00:00 | 14 | 64 | | | + | 2016-10-17 00:00:00 | 9 | 73 | 9 | 7.1111111111111111 | + | 2016-10-18 00:00:00 | 11 | 84 | 15 | 4.6000000000000000 | + | 2016-10-19 00:00:00 | 12 | 96 | 25 | 2.8400000000000000 | + | 2016-10-20 00:00:00 | 10 | 106 | 35 | 2.0285714285714286 | + | 2016-10-21 00:00:00 | 6 | 112 | 40 | 1.8000000000000000 | + | 2016-10-22 00:00:00 | 2 | 114 | 50 | 1.2800000000000000 | + | 2016-10-23 00:00:00 | 5 | 119 | 64 | 0.85937500000000000000 | + | 2016-10-24 00:00:00 | 6 | 125 | 73 | 0.71232876712328767123 | + | 2016-10-25 00:00:00 | 9 | 134 | 84 | 0.59523809523809523810 | + | 2016-10-26 00:00:00 | 2 | 136 | 96 | 0.41666666666666666667 | + | 2016-10-27 00:00:00 | 4 | 140 | 106 | 0.32075471698113207547 | + | 2016-10-28 00:00:00 | 7 | 147 | 112 | 0.31250000000000000000 | + | 2016-10-29 00:00:00 | 5 | 152 | 114 | 0.33333333333333333333 | + | 2016-10-30 00:00:00 | 5 | 157 | 119 | 0.31932773109243697479 | + | 2016-10-31 00:00:00 | 3 | 160 | 125 | 0.28000000000000000000 | + | (22 rows) | | | | | + + Figure C: Relative sales volume of the scooter over 3 weeks + +Looking at the output table, we can see four sets of information: the daily sales count, the cumulative sum of the daily sales count, the cumulative sum offset by 1 week (the lag), and the relative daily sales volume. + +**Expected Output:** + +| sales\_transaction\_date | count | sum | lag | volume | +| --- | --- | --- | --- | --- | +| 2016-10-10 00:00:00 | 9 | 9 | | | +| 2016-10-11 00:00:00 | 6 | 15 | | | +| 2016-10-12 00:00:00 | 10 | 25 | | | +| 2016-10-13 00:00:00 | 10 | 35 | | | +| 2016-10-14 00:00:00 | 5 | 40 | | | +| 2016-10-15 00:00:00 | 10 | 50 | | | +| 2016-10-16 00:00:00 | 14 | 64 | | | +| 2016-10-17 00:00:00 | 9 | 73 | 9 | 7.1111111111111111 | +| 2016-10-18 00:00:00 | 11 | 84 | 15 | 4.6000000000000000 | +| 2016-10-19 00:00:00 | 12 | 96 | 25 | 2.8400000000000000 | +| 2016-10-20 00:00:00 | 10 | 106 | 35 | 2.0285714285714286 | +| 2016-10-21 00:00:00 | 6 | 112 | 40 | 1.8000000000000000 | +| 2016-10-22 00:00:00 | 2 | 114 | 50 | 1.2800000000000000 | +| 2016-10-23 00:00:00 | 5 | 119 | 64 | 0.85937500000000000000 | +| 2016-10-24 00:00:00 | 6 | 125 | 73 | 0.71232876712328767123 | +| 2016-10-25 00:00:00 | 9 | 134 | 84 | 0.59523809523809523810 | +| 2016-10-26 00:00:00 | 2 | 136 | 96 | 0.41666666666666666667 | +| 2016-10-27 00:00:00 | 4 | 140 | 106 | 0.32075471698113207547 | +| 2016-10-28 00:00:00 | 7 | 147 | 112 | 0.31250000000000000000 | +| 2016-10-29 00:00:00 | 5 | 152 | 114 | 0.33333333333333333333 | +| 2016-10-30 00:00:00 | 5 | 157 | 119 | 0.31932773109243697479 | +| 2016-10-31 00:00:00 | 3 | 160 | 125 | 0.28000000000000000000 | +| (22 rows) | + +Figure 9.11: Relative sales volume of the Bat Scooter over 3 weeks + +While the count and cumulative `sum` columns are reasonably straightforward, why do we need the `lag` and `volume` columns? This is because we are looking for drops in sales growth over the first couple of weeks, hence, we compare the daily sum of sales to the same values 7 days earlier (the lag). By subtracting the sum and lag values and dividing by the lag, we obtain the volume value and can determine sales growth compared to the previous week. + +Notice that the sales volume on October 17 is 700% above that of the launch date of October 10. By October 22, the volume is over double that of the week prior. As time passes, this relative difference begins to decrease dramatically. By the end of October, the volume is 28% higher than the week prior. At this stage, we have observed and confirmed the presence of a reduction in sales growth after the first 2 weeks. The next stage is to attempt to explain the causes of the reduction. + +Exercise 9.3: Launch Timing Analysis + +In this exercise, we will try to identify the causes of a sales drop. Now that we have confirmed the presence of the sales growth drop, we will try to explain the cause of the event. We will test the hypothesis that the timing of the scooter launch attributed to the reduction in sales. Remember, in _Exercise 9.1, Preliminary Data Collection Using SQL Techniques_, that the ZoomZoom Bat Scooter launched on October 10, 2016. Observe the following steps to complete the exercise: + +1. Load the `sqlda` database: + + ```javascript + psql sqlda + ``` + +2. Examine the other products in the database. In order to determine whether the launch date attributed to the sales drop, we need to compare the ZoomZoom Bat Scooter to other scooter products according to the launch date. Execute the following query to check the launch dates: + + ```javascript + SELECT * FROM products; + ``` + + The following figure shows the launch dates for all the products: + + | product\_id | model | year | product\_type | base\_msrp | production\_start\_date | production\_end\_date | + | --- | --- | --- | --- | --- | --- | --- | + | 1 | Lemon | 2010 | scooter | 399.99 | 2010-03-03 00:00:00 | 2012-06-08 00:00:00 | + | 2 | Lemon Limited Edition | 2011 | scooter | 799.99 | 2011-01-03 00:00:00 | 2011-03-30 00:00:00 | + | 3 | Lemon | 2013 | scooter | 499.99 | 2013-05-01 00:00:00 | 2018-12-28 00:00:00 | + | 4 | Model Chi | 2014 | automobile | 115,000.00 | 2014-06-23 00:00:00 | 2018-12-28 00:00:00 | + | 5 | Blade | 2014 | scooter | 699.99 | 2014-06-23 00:00:00 | 2015-01-27 00:00:00 | + | 6 | Model Sigma | 2015 | automobile | 65,500.00 | 2015-04-15 00:00:00 | 2018-10-01 00:00:00 | + | 7 | Bat | 2016 | scooter | 599.99 | 2016-10-10 00:00:00 | | + | 8 | Bat Limited Edition | 2017 | scooter | 699.99 | 2017-02-15 00:00:00 | | + | 9 | Model Epsilon | 2017 | automobile | 35,000.00 | 2017-02-15 00:00:00 | | + | 10 | Model Gamma | 2017 | automobile | 85,750.00 | 2017-02-15 00:00:00 | | + | 11 | Model Chi | 2019 | automobile | 95,000.00 | 2019-02-04 00:00:00 | | + | 12 | Lemon Zester | 2019 | scooter | 349.99 | 2019-02-04 00:00:00 | | + | (12 rows) | + + Figure 9.12: Products with launch dates + + All the other products launched before July, compared to the Bat Scooter, which launched in October. +3. List all scooters from the `products` table, as we are only interested in comparing scooters: + + ```javascript + SELECT * FROM products WHERE product_type='scooter'; + ``` + + The following table shows all the information for products with the product type of `scooter`: + + | product\_id | model | year | product\_type | base\_msrp | production\_start\_date | production\_end\_date | + | --- | --- | --- | --- | --- | --- | --- | + | 1 | Lemon | 2010 | scooter | 399.99 | 2010-03-03 00:00:00 | 2012-06-08 00:00:00 | + | 2 | Lemon Limited Edition | 2011 | scooter | 799.99 | 2011-01-03 00:00:00 | 2011-03-30 00:00:00 | + | 3 | Lemon | 2013 | scooter | 499.99 | 2013-05-01 00:00:00 | 2018-12-28 00:00:00 | + | 5 | Blade | 2014 | scooter | 699.99 | 2014-06-23 00:00:00 | 2015-01-27 00:00:00 | + | 7 | Bat | 2016 | scooter | 599.99 | 2016-10-10 00:00:00 | | + | 8 | Bat Limited Edition | 2017 | scooter | 699.99 | 2017-02-15 00:00:00 | | + | 12 | Lemon Zester | 2019 | scooter | 349.99 | 2019-02-04 00:00:00 | | + | (7 rows) | + + Figure 9.13: Scooter product launch dates + + To test the hypothesis that the time of year had an impact on sales performance, we require a scooter model to use as the control or reference group. In an ideal world, we could launch the ZoomZoom Bat Scooter in a different location or region, for example, but just at a different time, and then compare the two. However, we cannot do this here. Instead, we will choose a similar scooter launched at a different time. There are several different options in the product database, each with its own similarities and differences to the experimental group (ZoomZoom Bat Scooter). In our opinion, the Bat Limited Edition Scooter is suitable for comparison (the control group). It is slightly more expensive, but it was launched only 4 months after the Bat Scooter. Looking at its name, the Bat Limited Edition Scooter seems to share most of the same features, with a number of extras given that it's a "limited edition." +4. Select the first five rows of the `sales` database: + + ```javascript + SELECT * FROM sales LIMIT 5; + ``` + + The sales information for the first five customers is as follows: + + | customer\_id | product\_id | sales\_transaction\_date | sales\_amount | channel | dealership\_id | + | --- | --- | --- | --- | --- | --- | + | 1 | 7 | 2017-07-19 08:38:41 | 479.992 | internet | | + | 22 | 7 | 2017-08-14 09:59:02 | 599.99 | dealership | 20 | + | 145 | 7 | 2019-01-20 10:40:11 | 479.992 | internet | | + | 289 | 7 | 2017-05-09 14:20:04 | 539.991 | dealership | 7 | + | 331 | 7 | 2019-05-21 20:03:21 | 539.991 | dealership | 4 | + | (5 rows) | + + Figure 9.14: First five rows of sales data + +5. Select the `model` and `sales_transaction_date` columns from both the products and sales tables for the Bat Limited Edition Scooter. Store the results in a table, `bat_ltd_sales`, ordered by the `sales_transaction_date` column, from the earliest date to the latest: + + ```javascript + SELECT products.model, sales.sales_transaction_date INTO bat_ltd_sales FROM sales INNER JOIN products ON sales.product_id=products.product_id WHERE sales.product_id=8 ORDER BY sales.sales_transaction_date; + ``` + +6. Select the first five lines of `bat_ltd_sales`, using the following query: + + ```javascript + SELECT * FROM bat_ltd_sales LIMIT 5; + ``` + + The following table shows the transaction details for the first five entries of `Bat Limited Edition`: + + | model | sales\_transaction\_date | + | --- | --- | + | Bat Limited Edition | 2017-02-15 01:49:02 | + | Bat Limited Edition | 2017-02-15 89:42:37 | + | Bat Limited Edition | 2017-02-15 10:48:31 | + | Bat Limited Edition | 2017-02-15 12:22:41 | + | Bat Limited Edition | 2017-02-15 13:51:34 | + | (5 rows) | + + Figure 9.15: First five sales of the Bat Limited Edition Scooter + +7. Calculate the total number of sales for `Bat Limited Edition`. We can check this by using the `COUNT` function: + + ```javascript + SELECT COUNT(model) FROM bat_ltd_sales; + ``` + + The total sales count can be seen in the following figure: + + ```javascript + count ----------- 5803 (1 row) + ``` + + Figure 9.16: Count of Bat Limited Edition sales + + This is compared to the original Bat Scooter, which sold 7,328 items. +8. Check the transaction details of the last Bat Limited Edition sale. We can check this by using the `MAX` function: + + ```javascript + SELECT MAX(sales_transaction_date) FROM bat_ltd_sales; + ``` + + The transaction details of the last `Bat Limited Edition` product are as follows: + + ```javascript + max ------------------- 2019-05-31 15:08:03 (1 row) + ``` + + Figure 9.17: Last date (MAX) of the Bat Limited Edition sale + +9. Adjust the table to cast the transaction date column as a date, discarding the time information. As with the original Bat Scooter, we are only interested in the date of the sale, not the date and time of the sale. Write the following query: + + ```javascript + ALTER TABLE bat_ltd_sales ALTER COLUMN sales_transaction_date TYPE date; + ``` + +10. Again, select the first five records of `bat_ltd_sales`: + + ```javascript + SELECT * FROM bat_ltd_sales LIMIT 5; + ``` + + The following table shows the first five records of `bat_ltd_sales`: + + | model | sales\_transaction\_date | + | --- | --- | + | Bat Limited Edition | 2017-02-15 | + | Bat Limited Edition | 2017-02-15 | + | Bat Limited Edition | 2017-02-15 | + | Bat Limited Edition | 2017-02-15 | + | Bat Limited Edition | 2017-02-15 | + | (5 rows) | + + Figure 9.18: Select the first five Bat Limited Edition sales by date + +11. In a similar manner to the standard Bat Scooter, create a count of sales on a daily basis. Insert the results into the `bat_ltd_sales_count` table by using the following query: + + ```javascript + SELECT sales_transaction_date, count(sales_transaction_date) INTO bat_ltd_sales_count FROM bat_ltd_sales GROUP BY sales_transaction_date ORDER BY sales_transaction_date; + ``` + +12. List the sales count of all the `Bat Limited` products using the following query: + + ```javascript + SELECT * FROM bat_ltd_sales_count; + ``` + + + The sales count is shown in the following figure: + + | sales\_transaction\_date | count | + | --- | --- | + | 2017-02-15 | 6 | + | 2017-02-16 | 2 | + | 2017-02-17 | 1 | + | 2017-02-18 | 4 | + | 2017-02-19 | 5 | + | 2017-02-20 | 6 | + | 2017-02-21 | 5 | + | 2017-02-22 | 4 | + | 2017-02-23 | 6 | + | 2017-02-24 | 2 | + | 2017-02-25 | 2 | + | 2017-02-26 | 2 | + | 2017-02-27 | 4 | + | 2017-02-28 | 4 | + | 2017-03-01 | 5 | + | 2017-03-02 | 1 | + + Figure 9.19: Bat Limited Edition daily sales + +13. Compute the cumulative sum of the daily sales figures and insert the resulting table into `bat_ltd_sales_growth`: + + ```javascript + SELECT *, sum(count) OVER (ORDER BY sales_transaction_date) INTO bat_ltd_sales_growth FROM bat_ltd_sales_count; + ``` + +14. Select the first 22 days of sales records from `bat_ltd_sales_growth`: + + ```javascript + SELECT * FROM bat_ltd_sales_growth LIMIT 22; + ``` + + + The following table displays the first 22 records of sales growth: + + | sales\_transaction\_date | count | sum | + | --- | --- | --- | + | 2017-02-15 | 6 | 6 | + | 2017-02-16 | 2 | 8 | + | 2017-02-17 | 1 | 9 | + | 2017-02-18 | 4 | 13 | + | 2017-02-19 | 5 | 18 | + | 2017-02-20 | 6 | 24 | + | 2017-02-21 | 5 | 29 | + | 2017-02-22 | 4 | 33 | + | 2017-02-23 | 6 | 39 | + | 2017-02-24 | 2 | 41 | + | 2017-02-25 | 2 | 43 | + | 2017-02-26 | 2 | 45 | + | 2017-02-27 | 4 | 49 | + | 2017-02-28 | 4 | 53 | + | 2017-03-01 | 5 | 58 | + | 2017-03-02 | 1 | 59 | + | 2017-03-03 | 3 | 62 | + | 2017-03-04 | 8 | 70 | + | 2017-03-05 | 4 | 74 | + | 2017-03-06 | 7 | 81 | + | 2017-03-07 | 7 | 88 | + | 2017-03-08 | 8 | 96 | + | (22 rows) | + + Figure 9.20: Bat Limited Edition sales – cumulative sum + +15. Compare this sales record with the one for the original Bat Scooter sales, as shown in the following code: + + ```javascript + SELECT * FROM bat_sales_growth LIMIT 22; + ``` + + + The following table shows the sales details for the first 22 records of the `bat_sales_growth` table: + + | sales\_transaction\_date | count | sum | + | --- | --- | --- | + | 2016-10-10 00:00:00 | 9 | 9 | + | 2016-10-11 00:00:00 | 6 | 15 | + | 2016-10-12 00:00:00 | 10 | 25 | + | 2016-10-13 00:00:00 | 10 | 35 | + | 2016-10-14 00:00:00 | 5 | 40 | + | 2016-10-15 00:00:00 | 10 | 50 | + | 2016-10-16 00:00:00 | 14 | 64 | + | 2016-10-17 00:00:00 | 9 | 73 | + | 2016-10-18 00:00:00 | 11 | 84 | + | 2016-10-19 00:00:00 | 12 | 96 | + | 2016-10-20 00:00:00 | 10 | 106 | + | 2016-10-21 00:00:00 | 6 | 112 | + | 2016-10-22 00:00:00 | 2 | 114 | + | 2016-10-23 00:00:00 | 5 | 119 | + | 2016-10-24 00:00:00 | 6 | 125 | + | 2016-10-25 00:00:00 | 9 | 134 | + | 2016-10-26 00:00:00 | 2 | 136 | + | 2016-10-27 00:00:00 | 4 | 140 | + | 2016-10-28 00:00:00 | 7 | 147 | + | 2016-10-29 00:00:00 | 5 | 152 | + | 2016-10-30 00:00:00 | 5 | 157 | + | 2016-10-31 00:00:00 | 3 | 160 | + | (22 rows) | + + Figure 9.21: Bat Scooter cumulative sales for 22 rows + + Sales of the limited-edition scooter did not reach double digits during the first 22 days, nor did the daily volume of sales fluctuate as much. In keeping with the overall sales figure, the limited edition sold 64 fewer units over the first 22 days. +16. Compute the 7-day `lag` function for the `sum` column and insert the results into the `bat_ltd_sales_delay` table: + + ```javascript + SELECT *, lag(sum , 7) OVER (ORDER BY sales_transaction_date) INTO bat_ltd_sales_delay FROM bat_ltd_sales_growth; + ``` + +17. Compute the sales growth for `bat_ltd_sales_delay` in a similar manner to the exercise completed in _Activity 9.1_, _Quantifying the Sales Drop_. Label the column for the results of this calculation as `volume` and store the resulting table in `bat_ltd_sales_vol`: + + ```javascript + SELECT *, (sum-lag)/lag AS volume INTO bat_ltd_sales_vol FROM bat_ltd_sales_delay; + ``` + +18. Look at the first 22 records of sales in `bat_ltd_sales_vol`: + + ```javascript + SELECT * FROM bat-ltd_sales_vol LIMIT 22; + ``` + + + The sales volume can be seen in the following figure: + + | sales\_transaction\_date | count | sum | lag | volume | + | --- | --- | --- | --- | --- | + | 2017-02-15 | 6 | 6 | | | + | 2017-02-16 | 2 | 8 | | | + | 2017-02-17 | 1 | 9 | | | + | 2017-02-18 | 4 | 13 | | | + | 2017-02-19 | 5 | 18 | | | + | 2017-02-20 | 6 | 24 | | | + | 2017-02-21 | 5 | 29 | | | + | 2017-02-23 | 4 | 33 | | | + | 2017-02-24 | 2 | 41 | 9 | 3.5555555555555556 | + | 2017-02-25 | 2 | 43 | 13 | 2.3076923076923077 | + | 2017-02-26 | 2 | 45 | 18 | 1.5000000000000000 | + | 2017-02-27 | 4 | 49 | 24 | 1.0416666666666667 | + | 2017-02-28 | 4 | 53 | 29 | 0.82758620689655172414 | + | 2017-03-01 | 5 | 58 | 33 | 0.75757575757575757576 | + | 2017-03-02 | 1 | 59 | 39 | 0.51282051282051282051 | + | 2017-03-03 | 3 | 62 | 41 | 0.51219512195121951220 | + | 2017-03-04 | 8 | 70 | 43 | 0.62790697674418604651 | + | 2017-03-05 | 4 | 74 | 45 | 0.64444444444444444444 | + | 2017-03-06 | 7 | 81 | 49 | 0.65306122448979591837 | + | 2017-03-07 | 7 | 88 | 53 | 0.66037735849056603774 | + | 2017-03-08 | 8 | 96 | 58 | 0.65517241379310344828 | + | (22 rows) | + + Figure 9.22: Bat Scooter cumulative sales showing volume + + +Looking at the `volume` column in the preceding diagram, we can again see that the sales growth is more consistent than the original Bat Scooter. The growth within the first week is less than that of the original model, but it is sustained over a longer period. After 22 days of sales, the sales growth of the limited-edition scooter is 65% compared to the previous week, as compared with the 28% growth identified in the second activity of the lesson. + +At this stage, we have collected data from two similar products launched at different time periods and found some differences in the trajectory of the sales growth over the first 3 weeks of sales. In a professional setting, we may also consider employing more sophisticated statistical comparison methods, such as tests for differences of mean, variance, survival analysis, or other techniques. These methods lie outside the scope of this course and, as such, limited comparative methods will be used. + +While we have shown there to be a difference in sales between the two Bat Scooters, we also cannot rule out the fact that the sales differences can be attributed to the difference in the sales price of the two scooters, with the limited-edition scooter being $100 more expensive. In the next activity, we will compare the sales of the Bat Scooter to the 2013 Lemon, which is $100 cheaper, was launched 3 years prior, is no longer in production, and started production in the first half of the calendar year. + +Activity 9.2: Analyzing the Difference in the Sales Price Hypothesis + +In this activity, we are going to investigate the hypothesis that the reduction in sales growth can be attributed to the price point of the Bat Scooter. Previously, we considered the launch date. However, there could be another factor – the sales price included. If we consider the product list of scooters shown in _Figure 9.23_, and exclude the Bat model scooter, we can see that there are two price categories, $699.99 and above, or $499.99 and below. The Bat Scooter sits exactly between these two groups; perhaps the reduction in sales growth can be attributed to the different pricing model. In this activity, we will test this hypothesis by comparing Bat sales to the 2013 Lemon: + +| product\_id | model | year | product\_type | base\_msrp | production\_start\_date | production\_end\_date | +| --- | --- | --- | --- | --- | --- | --- | +| 12 | Lemon Zester | 2019 | scooter | 349.99 | 2019-02-04 00:00:00 | | +| 1 | Lemon | 2010 | scooter | 399.99 | 2010-03-03 00:00:00 | 2012-06-08 00:00:00 | +| 3 | Lemon | 2013 | scooter | 499.99 | 2013-05-01 00:00:00 | 2018-12-28 00:00:00 | +| 7 | Bat | 2016 | scooter | 599.99 | 2016-10-10 00:00:00 | | +| 5 | Blade | 2014 | scooter | 699.99 | 2014-06-23 00:00:00 | 2015-01-27 00:00:00 | +| 8 | Bat Limited Edition | 2017 | scooter | 699.99 | 2017-02-15 00:00:00 | | +| 2 | Lemon Limited Edition | 2011 | scooter | 799.99 | 2011-01-03 00:00:00 | 2011-03-30 00:00:00 | +| (7 rows) | + +Figure 9.23: List of scooter models + +The following are the steps to perform: + +1. Load the `sqlda` database from the accompanying source code located at this [link](https://github.com/TrainingByPackt/SQL-for-Data-Analytics/tree/master/Datasets). +2. Select the `sales_transaction_date` column from the year 2013 for `Lemon` model sales and insert the column into a table called `lemon_sales`. +3. Count the sales records available for 2013 for the `Lemon` model. +4. Display the latest `sales_transaction_date` column. +5. Convert the `sales_transaction_date` column to a date type. +6. Count the number of sales per day within the `lemon_sales` table and insert the data into a table called `lemon_sales_count`. +7. Calculate the cumulative sum of sales and insert the corresponding table into a new table labeled `lemon_sales_sum`. +8. Compute the 7-day `lag` function on the `sum` column and save the result to `lemon_sales_delay`. +9. Calculate the growth rate using the data from `lemon_sales_delay` and store the resulting table in `lemon_sales_growth`. +10. Inspect the first 22 records of the `lemon_sales_growth` table by examining the `volume` data. + +**Solution** + +1. Load the sqlda database: + + ```javascript + psql sqlda + ``` + +2. Select the sales\_transaction\_date column from the 2013 Lemon sales and insert the column into a table called lemon\_sales: + + ```javascript + SELECT sales_transaction_date INTO lemon_sales FROM sales WHERE product_id=3; + ``` + +3. Count the sales records available for the 2013 Lemon by running the following query: + + ```javascript + SELECT count(sales_transaction_date) FROM lemon_sales; + ``` + + We can see that **16558** records are available: + + ```javascript + count -------------------- 16558 (1 row) + ``` + + Figure A: Sales records for the 2013 Lemon Scooter + +4. Use the max function to check the latest sales\_transaction\_date column: + + ```javascript + SELECT max(sales_transaction_date) FROM lemon_sales; + ``` + + The following figure displays the sales\_transaction\_date column: + + ```javascript + max ---------------- 2018-12-27 19:12:!0 (1 row) + ``` + + Figure B: Production between May 2013 and December 2018 + +5. Convert the sales\_transaction\_date column to a date type using the following query: + + ```javascript + ALTER TABLE lemon_sales ALTER COLUMN sales_transaction_date TYPE DATE; + ``` + + We are converting the datatype from DATE\_TIME to DATE so as to remove the time information from the field. We are only interested in accumulating numbers, but just the date and not the time. Hence, it is easier just to remove the time information from the field. +6. Count the number of sales per day within the lemon\_sales table and insert this figure into a table called lemon\_sales\_count: + + ```javascript + SELECT *, COUNT(sales_transaction_date) INTO lemon_sales_count FROM lemon_sales GROUP BY sales_transaction_date,lemon_sales.customer_id ORDER BY sales_transaction_date; + ``` + +7. Calculate the cumulative sum of sales and insert the corresponding table into a new table labeled lemon\_sales\_sum: + + ```javascript + SELECT *, sum(count) OVER (ORDER BY sales_transaction_date) INTO lemon_sales_sum FROM lemon_sales_count; + ``` + +8. Compute the 7-day lag function on the sum column and save the result to lemon\_sales\_delay: + + ```javascript + SELECT *, lag(sum, 7) OVER (ORDER BY sales_transaction_date) INTO lemon_sales_delay FROM lemon_sales_sum; + ``` + +9. Calculate the growth rate using the data from lemon\_sales\_delay and store the resulting table in lemon\_sales\_growth. Label the growth rate column as volume: + + ```javascript + SELECT *, (sum-lag)/lag AS volume INTO lemon_sales_growth FROM lemon_sales_delay; + ``` + +10. Inspect the first 22 records of the lemon\_sales\_growth table by examining the volume data: + + ```javascript + SELECT * FROM lemon_sales_growth LIMIT 22; + ``` + + The following table shows the sales growth: + + | sales\_transaction\_date | count | sum | lag | volume | + | --- | --- | --- | --- | --- | + | 2013-05-01 | 6 | 6 | | | + | 2013-05-02 | 8 | 14 | | | + | 2013-05-03 | 4 | 18 | | | + | 2013-05-04 | 9 | 27 | | | + | 2013-05-05 | 9 | 36 | | | + | 2013-05-06 | 6 | 42 | | | + | 2013-05-07 | 8 | 50 | | | + | 2013-05-08 | 6 | 56 | 6 | 8.3333333333333333 | + | 2013-05-09 | 6 | 62 | 14 | 3.4285714285714286 | + | 2013-05-10 | 9 | 71 | 18 | 2.9444444444444444 | + | 2013-05-11 | 3 | 74 | 27 | 1.7407407407407407 | + | 2013-05-12 | 4 | 78 | 36 | 1.1666666666666667 | + | 2013-05-13 | 7 | 85 | 42 | 1.0238095238095238 | + | 2013-05-14 | 3 | 88 | 50 | 0.76000000000000000000 | + | 2013-05-15 | 3 | 91 | 56 | 0.62500000000000000000 | + | 2013-05-16 | 4 | 95 | 62 | 0.53225806451612903226 | + | 2013-05-17 | 6 | 101 | 71 | 0.42253521126760563380 | + | 2013-05-18 | 9 | 110 | 74 | 0.48648648648648648649 | + | 2013-05-19 | 6 | 116 | 78 | 0.48717948717948717949 | + | 2013-05-20 | 6 | 122 | 85 | 0.43529411764705882353 | + | 2013-05-21 | 11 | 133 | 88 | 0.51136363636363636364 | + | 2013-05-22 | 8 | 141 | 91 | 0.54945054945054945055 | + | (22 rows) | + + Figure C: Sales growth of the Lemon Scooter + +Similar to the previous exercise, we have calculated the cumulative sum, lag, and relative sales growth of the Lemon Scooter. We can see that the initial sales volume is much larger than the other scooters, at over 800%, and again finishes higher at 55% + +**Expected Output:** + +| sales\_transaction\_date | count | sum | lag | volume | +| --- | --- | --- | --- | --- | +| 2013-05-01 6 | 6 | 6 | | | +| 2013-05-02 | 8 | 14 | | | +| 2013-05-03 | 4 | 18 | | | +| 2013-05-04 | 9 | 27 | | | +| 2013-05-05 | 9 | 36 | | | +| 2013-05-06 | 6 | 42 | | | +| 2013-05-07 | 8 | 50 | | | +| 2013-05-08 | 6 | 56 | 6 | 8.3333333333333333 | +| 2013-05-09 | 6 | 62 | 14 | 3.4285714285714286 | +| 2013-05-10 | 9 | 71 | 18 | 2.9444444444444444 | +| 2013-05-11 | 3 | 74 | 27 | 1.7407407407407407 | +| 2013-05-12 | 4 | 78 | 36 | 1.1666666666666667 | +| 2013-05-13 | 7 | 85 | 42 | 1.0238095238095238 | +| 2013-05-14 | 3 | 88 | 50 | 0.76000000000000000000 | +| 2013-05-15 | 3 | 91 | 56 | 0.62500000000000000000 | +| 2013-05-16 | 4 | 95 | 62 | 0.53225806451612903226 | +| 2013-05-17 | 6 | 101 | 71 | 0.42253521126760563380 | +| 2013-05-18 | 9 | 110 | 74 | 0.48648648648648648649 | +| 2013-05-19 | 6 | 116 | 78 | 0.48717948717948717949 | +| 2013-05-20 | 6 | 122 | 85 | 0.43529411764705882353 | +| 2013-05-21 | 11 | 133 | 88 | 0.51136363636363636364 | +| 2013-05-22 | 8 | 141 | 91 | 0.54945054945054945055 | +| (22 rows) | + +Figure 9.54: Sales growth of the Lemon Scooter + +Now that we have collected data to test the two hypotheses of timing and cost, what observations can we make and what conclusions can we draw? The first observation that we can make is regarding the total volume of sales for the three different scooter products. The Lemon Scooter, over its production life cycle of 4.5 years, sold 16,558 units, while the two Bat Scooters, the Original and Limited Edition models, sold 7,328 and 5,803 units, respectively, and are still currently in production, with the Bat Scooter launching about 4 months earlier and with approximately 2.5 years of sales data available. Looking at the sales growth of the three different scooters, we can also make a few different observations: + +- The original Bat Scooter, which launched in October at a price of $599.99, experienced a 700% sales growth in its second week of production and finished the first 22 days with 28% growth and a sales figure of 160 units. +- The Bat Limited Edition Scooter, which launched in February at a price of $699.99, experienced 450% growth at the start of its second week of production and finished with 96 sales and 66% growth over the first 22 days. +- The 2013 Lemon Scooter, which launched in May at a price of $499.99, experienced 830% growth in the second week of production and ended its first 22 days with 141 sales and 55% growth. + +Based on this information, we can make a number of different conclusions: + +- The initial growth rate starting in the second week of sales correlates to the cost of the scooter. As the cost increased to $699.99, the initial growth rate dropped from 830% to 450%. +- The number of units sold in the first 22 days does not directly correlate to the cost. The $599.99 Bat Scooter sold more than the 2013 Lemon Scooter in that first period despite the price difference. +- There is some evidence to suggest that the reduction in sales can be attributed to seasonal variations given the significant reduction in growth and the fact that the original Bat Scooter is the only one released in October. So far, the evidence suggests that the drop can be attributed to the difference in launch timing. + +Before we draw the conclusion that the difference can be attributed to seasonal variations and launch timing, let's ensure that we have extensively tested a range of possibilities. Perhaps marketing work, such as email campaigns, that is, when the emails were sent, and the frequency with which the emails were opened, made a difference. + +Now that we have considered both the launch timing and the suggested retail price of the scooter as a possible cause of the reduction in sales, we will direct our efforts to other potential causes, such as the rate of opening of marketing emails. Does the marketing email opening rate have an effect on sales growth throughout the first 3 weeks? We will find this out in our next exercise. + +Exercise 9.4: Analyzing Sales Growth by Email Opening Rate + +In this exercise, we will analyze the sales growth using the email opening rate. To investigate the hypothesis that a decrease in the rate of opening emails impacted the Bat Scooter sales rate, we will again select the Bat and Lemon Scooters and will compare the email opening rate. + +Perform the following steps to complete the exercise: + +1. Load the `sqlda` database: + + ```javascript + psql sqlda + ``` + +2. Firstly, look at the `emails` table to see what information is available. Select the first five rows of the `emails` table: + + ```javascript + SELECT * FROM emails LIMIT 5; + ``` + + The following table displays the email information for the first five rows: + + | email\_id | customer\_id | email\_subject | opened | clicked | bounced | sent\_date | opened\_date | clicked\_date | + | --- | --- | --- | --- | --- | --- | --- | --- | --- | + | 1 | 18 | Introducing A Limited Edition | f | f | f | 2011-01-03 15:00:00 | | | + | 2 | 30 | Introducing A Limited Edition | f | f | f | 2011-01-03 15:00:00 | | | + | 3 | 41 | Introducing A Limited Edition | t | f | f | 2011-01-03 15:00:00 | 2011-01-04 10:41:11 | | + | 4 | 52 | Introducing A Limited Edition | f | f | f | 2011-01-03 15:00:00 | | + | 5 | 59 | Introducing A Limited Edition | f | f | f | 2011-01-03 15:00:00 | | | + | (5 rows) | + + Figure 9.55: Sales growth of the Lemon Scooter + + To investigate our hypothesis, we need to know whether an email was opened, and when it was opened, as well as who the customer was who opened the email and whether that customer purchased a scooter. If the email marketing campaign was successful in maintaining the sales growth rate, we would expect a customer to open an email soon before a scooter was purchased. + + The period in which the emails were sent, as well as the ID of customers who received and opened an email, can help us to determine whether a customer who made a sale may have been encouraged to do so following the receipt of an email. + +3. To determine the hypothesis, we need to collect the `customer_id` column from both the `emails` table and the `bat_sales` table for the Bat Scooter, the `opened`, `sent_date`, `opened_date`, and `email_subject` columns from `emails` table, as well as the `sales_transaction_date` column from the `bat_sales` table. As we only want the email records of customers who purchased a Bat Scooter, we will join the `customer_id` column in both tables. Then, insert the results into a new table – `bat_emails`: + + ```javascript + SELECT emails.email_subject, emails.customer_id, emails.opened, emails.sent_date, emails.opened_date, bat_sales.sales_transaction_date INTO bat_emails FROM emails INNER JOIN bat_sales ON bat_sales.customer_id=emails.customer_id ORDER BY bat_sales.sales_transaction_date; + ``` + +4. Select the first 10 rows of the `bat_emails` table, ordering the results by `sales_transaction_date`: + + ```javascript + SELECT * FROM bat_emails LIMIT 10; + ``` + + The following table shows the first 10 rows of the `bat_emails` table ordered by `sales_transaction_date`: + + | email\_subject | customer\_id | opened | sent\_date | opened\_date | sales\_transaction\_date | + | --- | --- | --- | --- | --- | --- | + | A New Year, And Some New EVs | 11678 | f | 2019-01-07 15:00:00 | | 2016-10-10 00:00:00 | + | A Brand New Scooter...and Car | 40250 | f | 2014-05-06 15:00:00 | | 2016-10-10 00:00:00 | + | We Really Outdid Ourselves this Year | 24125 | f | 2017-01-15 15:00:00 | | 2016-10-10 00:00:00 | + | Tis' the Season for Savings | 31307 | t | 2015-11-26 15:00:00 | 2015-11-27 04:55:07 | 2016-10-10 00:06:00 | + | 25% off all EVs. Its a Christmas Miracle! | 42213 | f | 2016-11-25 15:00:00 | | 2016-10-10 00:00:00 | + | Zoom zoom Black Friday Sale | 40250 | f | 2014-11-28 15:00:00 | | 2016-10-10 00:00:00 | + | Save the Planet with same Holiday Savings. | 4553 | f | 2016-11-23 15:00:00 | | 2016 10 10 00:00:00 | + | The 2013 Lemon Scooter is Here | 24125 | t | 2013-03-01 15:00:00 | 2013-03-02 14:43:34 | 2016 10 10 00:00:00 | + | The 2013 Lemon Scooter is Here | 40250 | f | 2013-03-01 15:00:00 | | 2016-10-10 00:00:00 | + | Save the Planet with some Holiday Savings. | 40250 | f | 2018-11-23 15:00:00 | | 2016-10-10 00:00:00 | + | (10 rows) | + + Figure 9.56: Email and sales information joined on customer\_id + + We can see here that there are several emails unopened, over a range of sent dates, and that some customers have received multiple emails. Looking at the subjects of the emails, some of them don't seem related to the Zoom scooters at all. +5. Select all rows where the `sent_date` email predates the `sales_transaction_date` column, order by `customer_id`, and limit the output to the first 22 rows. This will help us to know which emails were sent to each customer before they purchased their scooter. Write the following query to do so: + + ```javascript + SELECT * FROM bat_emails WHERE sent_date < sales_transaction_date ORDER BY customer_id LIMIT 22; + ``` + + The following table lists the emails sent to the customers before the `sales_transaction_date` column: + + ![The figure shows the output of the above query. ](https://s3.amazonaws.com/jigyaasa_content_static/sql-data-anal/C11861_09_27.jpg) + + Figure 9.57: Emails sent to customers before the sale transaction date + +6. Delete the rows of the `bat_emails` table where emails were sent more than 6 months prior to production. As we can see, there are some emails that were sent years before the transaction date. We can easily remove some of the unwanted emails by removing those sent before the Bat Scooter was in production. From the products table, the production start date for the Bat Scooter is October 10, 2016: + + ```javascript + DELETE FROM bat_emails WHERE sent_date < '2016-04-10'; + ``` + + Note + + In this exercise, we are removing information that we no longer require from an existing table. This differs from the previous exercises, where we created multiple tables each with slightly different information from other. The technique you apply will differ depending upon the requirements of the problem being solved; do you require a traceable record of analysis, or is efficiency and reduced storage key? + +7. Delete the rows where the sent date is after the purchase date, as they are not relevant to the sale: + + ```javascript + DELETE FROM bat_emails WHERE sent_date > sales_transaction_date; + ``` + +8. Delete those rows where the difference between the transaction date and the sent date exceeds 30, as we also only want those emails that were sent shortly before the scooter purchase. An email 1 year beforehand is probably unlikely to influence a purchasing decision, but one closer to the purchase date may have influenced the sales decision. We will set a limit of 1 month (30 days) before the purchase. Write the following query to do so: + + ```javascript + DELETE FROM bat_emails WHERE (sales_transaction_date-sent_date) > '30 days'; + ``` + +9. Examine the first 22 rows again ordered by `customer_id` by running the following query: + + ```javascript + SELECT * FROM bat_emails ORDER BY customer_id LIMIT 22; + ``` + + The following table shows the emails where the difference between the transaction date and the sent date is less than 30: + + ![The figure shows the output of the above query.](https://s3.amazonaws.com/jigyaasa_content_static/sql-data-anal/C11861_09_28.jpg) + + Figure 9.58: Emails sent close to the date of sale + + At this stage, we have reasonably filtered the available data based on the dates the email was sent and opened. Looking at the preceding `email_subject` column, it also appears that there are a few emails unrelated to the Bat Scooter, for example, **25% of all EVs. It's a Christmas Miracle!** and **Black Friday. Green Cars**. These emails seem more related to electric car production instead of scooters, and so we can remove them from our analysis. +10. Select the distinct value from the `email_subject` column to get a list of the different emails sent to the customers: + + ```javascript + SELECT DISTINCT(email_subject) FROM bat_emails; + ``` + + The following table shows a list of distinct email subjects: + + ```javascript + email subject --------------------------------------- Black Friday. Green Cars. 25% off all EVs. It's a Christmas Miracle! A New Year, And Some New EVs Like a Bat out of Heaven Save the Planet with sme Holiday Savings. We Really Outdid Ourselves this Year (6 rows) + ``` + + Figure 9.59: Unique email subjects sent to potential customers of the Bat Scooter + +11. Delete all records that have `Black Friday` in the email subject. These emails do not appear relevant to the sale of the Bat Scooter: + + ```javascript + DELETE FROM bat_emails WHERE position('Black Friday' in email_subject)>0; + ``` + + Note + + The `position` function in the preceding example is used to find any records where the `Black Friday` string is at the first character in the mail or more in `email_structure`. Thus, we are deleting any rows where `Black Friday` is in the email subject. For more information on PostgreSQL, refer to the documentation regarding [string functions](https://www.postgresql.org/docs/current/functions-string.html). + +12. Delete all rows where **25% off all EVs. It's a Christmas Miracle!** and **A New Year, And Some New EVs** can be found in the `email_subject`: + + ```javascript + DELETE FROM bat_emails WHERE position('25% off all EV' in email_subject)>0; DELETE FROM bat_emails WHERE position('Some New EV' in email_subject)>0; + ``` + +13. At this stage, we have our final dataset of emails sent to customers. Count the number of rows that are left in the sample by writing the following query: + + ```javascript + SELECT count(sales_transaction_date) FROM bat_emails; + ``` + + + We can see that **401** rows are left in the sample: + + ```javascript + count ----------- 401 (1 row) + ``` + + Figure 9.60: Count of the final Bat Scooter email dataset + +14. We will now compute the percentage of emails that were opened relative to sales. Count the emails that were opened by writing the following query: + + ```javascript + SELECT count(opened) FROM bat_emails WHERE opened='t' + ``` + + + We can see that **98** emails were opened: + + ```javascript + count ------------ 98 (1 row) + ``` + + Figure 9.61: Count of opened Bat Scooter campaign emails + +15. Count the customers who received emails and made a purchase. We will determine this by counting the number of unique (or distinct) customers that are in the `bat_emails` table: + + ```javascript + SELECT COUNT(DISTINCT(customer_id)) FROM bat_emails; + ``` + + + We can see that **396** customers who received an email made a purchase: + + ```javascript + count ----------- 396 (1 row) + ``` + + Figure 9.62: Count of unique customers who received a Bat Scooter campaign email + +16. Count the unique (or distinct) customers who made a purchase by writing the following query: + + ```javascript + SELECT COUNT(DISTINCT(customer_id)) FROM bat_sales; + ``` + + + Following is the output of the preceding code: + + ```javascript + count ---------- 6659 (1 row) + ``` + + Figure 9.63: Count of unique customers + +17. Calculate the percentage of customers who purchased a Bat Scooter after receiving an email: + + ```javascript + SELECT 396.0/6659.0 AS email_rate; + ``` + + + The output of the preceding query is displayed as follows: + + ```javascript + email_rate ---------------------- 0.05946838864694398558 (1 row) + ``` + + Figure 9.64: Percentage of customers who received an email + + Note + + In the preceding calculation, you can see that we included a decimal place in the figures, for example, 396.0 instead of a simple integer value (396). This is because the resulting value will be represented as less than 1 percentage point. If we excluded these decimal places, the SQL server would have completed the division operation as integers and the result would be 0. + + Just under 6% of customers who made a purchase received an email regarding the Bat Scooter. Since 18% of customers who received an email made a purchase, there is a strong argument to be made that actively increasing the size of the customer base who receive marketing emails could increase Bat Scooter sales. +18. Limit the scope of our data to be all sales prior to November 1, 2016 and put the data in a new table called `bat_emails_threewks`. So far, we have examined the email opening rate throughout all available data for the Bat Scooter. Check the rate throughout for the first 3 weeks, where we saw a reduction in sales: + + ```javascript + SELECT * INTO bat_emails_threewks FROM bat_emails WHERE sales_transaction_date < '2016-11-01'; + ``` + +19. Now, count the number of emails opened during this period: + + ```javascript + SELECT COUNT(opened) FROM bat_emails_threewks; + ``` + + + We can see that we have sent **82** emails during this period: + + ```javascript + count ---------------------- 82 (1 row) + ``` + + Figure 9.65: Count of emails opened in the first 3 weeks + +20. Now, count the number of emails opened in the first 3 weeks: + + ```javascript + SELECT COUNT(opened) FROM bat_emails_threewks WHERE opened='t'; + ``` + + + The following is the output of the preceding code: + + ```javascript + count ---------------------- 15 (1 row) + ``` + + Figure 9.66: Count of emails opened + + + We can see that **15** emails were opened in the first 3 weeks. +21. Count the number of customers who received emails during the first 3 weeks of sales and who then made a purchase by using the following query: + + ```javascript + SELECT COUNT(DISTINCT(customer_id)) FROM bat_emails_threewks; + ``` + + + We can see that **82** customers received emails during the first 3 weeks: + + ```javascript + count ---------------------- 82 (1 row) + ``` + + Figure 9.67: Customers who made a purchase in the first 3 weeks + +22. Calculate the percentage of customers who opened emails pertaining to the Bat Scooter and then made a purchase in the first 3 weeks by using the following query: + + ```javascript + SELECT 15.0/82.0 AS sale_rate; + ``` + + + The following table shows the calculated percentage: + + ```javascript + sale_rate 0.18292682926829268293 (1 row) + ``` + + Figure 9.68: Percentage of customers in the first 3 weeks who opened emails + + + Approximately 18% of customers who received an email about the Bat Scooter made a purchase in the first 3 weeks. This is consistent with the rate for all available data for the Bat Scooter. +23. Calculate how many unique customers we have in total throughout the first 3 weeks. This information is useful context when considering the percentages, we just calculated. 3 sales out of 4 equate to 75% but, in this situation, we would prefer a lower rate of the opening but for a much larger customer base. Information on larger customer bases is generally more useful as it is typically more representative of the entire customer base, rather than a small sample of it. We already know that 82 customers received emails: + + ```javascript + SELECT COUNT(DISTINCT(customer_id)) FROM bat_sales WHERE sales_transaction_date < '2016-11-01'; + ``` + + + The following output reflects **160** customers where the transaction took place before November 1, 2016: + + ```javascript + count ------------ 160 (1 row) + ``` + + Figure 9.69: Number of distinct customers from bat\_sales + + +There were 160 customers in the first 3 weeks, 82 of whom received emails, which is slightly over 50% of customers. This is much more than 6% of customers over the entire period of availability of the scooter. + +Now that we have examined the performance of the email marketing campaign for the Bat Scooter, we need a control or comparison group to establish whether the results were consistent with that of other products. Without a group to compare against, we simply do not know whether the email campaign of the Bat Scooter was good, bad, or neither. We will perform the next exercise to investigate performance. + +Exercise 9.5: Analyzing the Performance of the Email Marketing Campaign + +In this exercise, we will investigate the performance of the email marketing campaign for the Lemon Scooter to allow for a comparison with the Bat Scooter. Our hypothesis is that if the email marketing campaign performance of the Bat Scooter is consistent with another, such as the 2013 Lemon, then the reduction in sales cannot be attributed to differences in the email campaigns. + +Perform the following steps to complete the exercise: + +1. Load the `sqlda` database: + + ```javascript + psql sqlda + ``` + +2. Drop the existing `lemon_sales` table: + + ```javascript + DROP TABLE lemon_sales; + ``` + +3. The 2013 Lemon Scooter is `product_id = 3`. Select `customer_id` and `sales_transaction_date` from the sales table for the 2013 Lemon Scooter. Insert the information into a table called `lemon_sales`: + + ```javascript + SELECT customer_id, sales_transaction_date INTO lemon_sales FROM sales WHERE product_id=3; + ``` + +4. Select all information from the `emails` database for customers who purchased a 2013 Lemon Scooter. Place the information in a new table called `lemon_emails`: + + ```javascript + SELECT emails.customer_id, emails.email_subject, emails.opened, emails.sent_date, emails.opened_date, lemon_sales.sales_transaction_date INTO lemon_emails FROM emails INNER JOIN lemon_sales ON emails.customer_id=lemon_sales.customer_id; + ``` + +5. Remove all emails sent before the start of production of the 2013 Lemon Scooter. For this, we first require the date when production started: + + ```javascript + SELECT production_start_date FROM products Where product_id=3; + ``` + + The following table shows the `production_start_date` column: + + ```javascript + production_start_data --------------------------------- 2013-5-01 00:00:00 (1 row) + ``` + + Figure 9.70: Production start date of the Lemon Scooter + + Now, delete the emails that were sent before the start of production of the 2013 Lemon Scooter: + + ```javascript + DELETE FROM lemon_emails WHERE sent_date < '2013-05-01'; + ``` + +6. Remove all rows where the sent date occurred after the `sales_transaction_date` column: + + ```javascript + DELETE FROM lemon_emails WHERE sent_date > sales_transaction_date; + ``` + +7. Remove all rows where the sent date occurred more than 30 days before the `sales_transaction_date` column: + + ```javascript + DELETE FROM lemon_emails WHERE (sales_transaction_date - sent_date) > '30 days'; + ``` + +8. Remove all rows from `lemon_emails` where the email subject is not related to a Lemon Scooter. Before doing this, we will search for all distinct emails: + + ```javascript + SELECT DISTINCT(email_subject) FROM lemon_emails; + ``` + + The following table shows the distinct email subjects: + + ```javascript + email_subject ---------------------------------------------------------- Tis' the Season for Savings 25% off all EVs. It's a Christmas Miracle! A Brand New Scooter...and Car Like a Bat out of Heaven Save the Planet with some Holiday Savings. Shocking Holiday Savings on Electric Scooters An Electric Car fr a New Age We cut you a deal: 20% off a Blade Black Friday. Green Cars. Zoom Zoom Back Friday Sale (11 rows) + ``` + + Figure 9.71: Lemon Scooter campaign emails sent + + Now, delete the email subject not related to the Lemon Scooter using the `DELETE` command: + + ```javascript + DELETE FROM lemon_emails WHERE POSITION('25% off all EVs.' in email_subject)>0; DELETE FROM lemon_emails WHERE POSITION('Like a Bat out of Heaven' in email_subject)>0; DELETE FROM lemon_emails WHERE POSITION('Save the Planet' in email_subject)>0; DELETE FROM lemon_emails WHERE POSITION('An Electric Car' in email_subject)>0; DELETE FROM lemon_emails WHERE POSITION('We cut you a deal' in email_subject)>0; DELETE FROM lemon_emails WHERE POSITION('Black Friday. Green Cars.' in email_subject)>0; DELETE FROM lemon_emails WHERE POSITION('Zoom' in email_subject)>0; + ``` + +9. Now, check how many emails of `lemon_scooter` customers were opened: + + ```javascript + SELECT COUNT(opened) FROM lemon_emails WHERE opened='t'; + ``` + + We can see that **128** emails were opened: + + ```javascript + count --------- 128 (1 rows) + ``` + + Figure 9.72: Lemon Scooter campaign emails opened + +10. List the number of customers who received emails and made a purchase: + + ```javascript + SELECT COUNT(DISTINCT(customer_id)) FROM lemon_emails; + ``` + + The following figure shows that **506** customers made a purchase after receiving emails: + + ```javascript + count --------- 506 (1 rows) + ``` + + Figure 9.73: Unique customers who purchased a Lemon Scooter + +11. Calculate the percentage of customers who opened the received emails and made a purchase: + + ```javascript + SELECT 128.0/506.0 AS email_rate; + ``` + + We can see that 25% of customers opened the emails and made a purchase: + + ```javascript + email_rate ------------------------------- 0.25296442687747035573 (1 row) + ``` + + Figure 9.74: Lemon Scooter customer email rate + +12. Calculate the number of unique customers who made a purchase: + + ```javascript + SELECT COUNT(DISTINCT(customer_id)) FROM lemon_sales; + ``` + + + We can see that **13854** customers made a purchase: + + ```javascript + count ------------------------------- 13854 (1 row) + ``` + + Figure 9.75: Count of unique Lemon Scooter customers + +13. Calculate the percentage of customers who made a purchase having received an email. This will enable a comparison with the corresponding figure for the Bat Scooter: + + ```javascript + SELECT 506.0/13854.0 AS email_sales; + ``` + + + The preceding calculation generates a 36% output: + + ```javascript + email_sales ------------------------- 0.03652374765410711708 (1 row) + ``` + + Figure 9.76: Lemon Scooter customers who received an email + +14. Select all records from `lemon_emails` where a sale occurred within the first 3 weeks of the start of production. Store the results in a new table – `lemon_emails_threewks`: + + ```javascript + SELECT * INTO lemon_emails_threewks FROM lemon_emails WHERE sales_transaction_date < '2013-06-01'; + ``` + +15. Count the number of emails that were made for Lemon Scooters in the first 3 weeks: + + ```javascript + SELECT COUNT(sales_transaction_date) FROM lemon_emails_threewks; + ``` + + + The following is the output of the preceding code: + + ```javascript + count ----------- 0 (1 row) + ``` + + Figure 9.77: Unique sales of the Lemon Scooter in the first 3 weeks + + +There is a lot of interesting information here. We can see that 25% of customers who opened an email made a purchase, which is a lot higher than the 18% figure for the Bat Scooter. We have also calculated that just over 3.6% of customers who purchased a Lemon Scooter were sent an email, which is much lower than the almost 6% of Bat Scooter customers. The final interesting piece of information we can see is that none of the Lemon Scooter customers received an email during the first 3 weeks of product launch compared with the 82 Bat Scooter customers, which is approximately 50% of all customers in the first 3 weeks! + +In this exercise, we investigated the performance of an email marketing campaign for the Lemon Scooter to allow for a comparison with the Bat Scooter using various SQL techniques. diff --git a/Lesson02/with.pgsql b/Lesson02/with.pgsql new file mode 100644 index 0000000..f7503fb --- /dev/null +++ b/Lesson02/with.pgsql @@ -0,0 +1,8 @@ +WITH d as ( +SELECT * FROM dealerships + WHERE dealerships.state = 'CA' + ) +SELECT * +FROM salespeople +INNER JOIN d ON d.dealership_id = salespeople.dealership_id +ORDER BY 1; \ No newline at end of file diff --git a/W2_case_study.sql b/W2_case_study.sql new file mode 100644 index 0000000..b831de2 --- /dev/null +++ b/W2_case_study.sql @@ -0,0 +1,281 @@ +/*markdown +# Exercise 9.1: Preliminary Data Collection Using SQL Techniques +This exercise collects preliminary data. We will load the database, list scooter product details, extract product IDs, and store the results in a new table. + + +*/ + +/*markdown + +## Step 1: Load the sqlda database + + +*/ + +/*markdown + +psql sqlda + + +*/ + +/*markdown + +## Step 2: List the model, base_msrp, and production_start_date for scooter products + + +*/ + +SELECT model, base_msrp, production_start_date +FROM products +WHERE product_type = 'scooter'; + + + +/*markdown +## Step 3: Extract the model name and product IDs for scooter products + + +*/ + +SELECT model, product_id +FROM products +WHERE product_type = 'scooter'; + + + +/*markdown +## Step 4: Insert the above results into a new table called product_names + + +*/ + +SELECT model, product_id +INTO product_names +FROM products +WHERE product_type = 'scooter'; + + + +/*markdown + +# Exercise 9.2: Extracting the Sales Information +In this exercise we join sales data with the product names and then isolate Bat Scooter sales. + + +*/ + +/*markdown + +## Step 1: Load the sqlda database + + +*/ + +/*markdown + +psql sqlda + + +*/ + +/*markdown + +## Step 2: List the available fields in the database + + +*/ + +/*markdown + +## Step 3: Create a new table (products_sales) by joining sales and product_names on product_id + + +*/ + +SELECT model, customer_id, sales_transaction_date, sales_amount, channel, dealership_id +INTO products_sales +FROM sales +INNER JOIN product_names + ON sales.product_id = product_names.product_id; + + + +/*markdown +## Step 4: Display the first five rows of products_sales + + +*/ + +SELECT * +FROM products_sales +LIMIT 5; + + + +/*markdown +## Step 5: Retrieve Bat Scooter sales ordered by sales_transaction_date + + +*/ + +SELECT * +FROM products_sales +WHERE model = 'Bat' +ORDER BY sales_transaction_date; + + + +/*markdown +## Step 6: Count the number of Bat Scooter sales records + + +*/ + +SELECT COUNT(model) +FROM products_sales +WHERE model = 'Bat'; + + + +/*markdown +## Step 7: Determine the last sale date for the Bat Scooter + + +*/ + +SELECT MAX(sales_transaction_date) +FROM products_sales +WHERE model = 'Bat'; + + + +/*markdown +## Step 8: Insert Bat Scooter sales records into a new table (bat_sales) ordered by date + + +*/ + +SELECT * +INTO bat_sales +FROM products_sales +WHERE model = 'Bat' +ORDER BY sales_transaction_date; + + + +/*markdown +## Step 9: Remove the time information in bat_sales (convert to date) + + +*/ + +UPDATE bat_sales +SET sales_transaction_date = DATE(sales_transaction_date); + + + +/*markdown +## Step 10: Display the first five records of bat_sales ordered by date + + +*/ + +SELECT * +FROM bat_sales +ORDER BY sales_transaction_date +LIMIT 5; + + + +/*markdown +## Step 11: Create bat_sales_daily table with daily sales count + + +*/ + +SELECT sales_transaction_date, COUNT(sales_transaction_date) +INTO bat_sales_daily +FROM bat_sales +GROUP BY sales_transaction_date +ORDER BY sales_transaction_date; + + + +/*markdown + +# Activity 9.1: Quantifying the Sales Drop +Here we compute a cumulative sum of daily sales, apply a 7-day lag, and calculate the growth rate (volume). + + +*/ + +/*markdown + +## Step 1: Load the sqlda database + +psql sqlda +*/ + +/*markdown + +## Step 2: Compute the daily cumulative sum of sales and insert into bat_sales_growth +*/ + +SELECT *, sum(count) OVER (ORDER BY sales_transaction_date) AS cumulative_sum +INTO bat_sales_growth +FROM bat_sales_daily; + + + +/*markdown +## Step 3: Compute a 7-day lag of the cumulative sum and insert into bat_sales_daily_delay + + +*/ + +SELECT *, lag(cumulative_sum, 7) OVER (ORDER BY sales_transaction_date) AS lag_value +INTO bat_sales_daily_delay +FROM bat_sales_growth; + + + +/*markdown +## Step 4: Inspect the first 15 rows of bat_sales_daily_delay + + +*/ + +SELECT * +FROM bat_sales_daily_delay +LIMIT 15; + + + +/*markdown +## Step 5: Compute sales growth as a percentage and insert into bat_sales_delay_vol + + +*/ + +SELECT *, (cumulative_sum - lag_value) / lag_value AS volume +INTO bat_sales_delay_vol +FROM bat_sales_daily_delay; + + + +/*markdown +## Step 6: Display the first 22 records of bat_sales_delay_vol + + +*/ + +SELECT * +FROM bat_sales_delay_vol +LIMIT 22; + + + +/*markdown +#### ---------------------------------------------------------------------------------- +*/ \ No newline at end of file diff --git a/case_study_exercises_with_js_blocks.sql b/case_study_exercises_with_js_blocks.sql new file mode 100644 index 0000000..5e23072 --- /dev/null +++ b/case_study_exercises_with_js_blocks.sql @@ -0,0 +1,175 @@ +/*markdown +-- Exercise 9.1: Preliminary Data Collection Using SQL Techniques +*/ + +/*markdown +1. Load +the `sqlda` +database: +*/ + +psql sqlda + +/*markdown + +2. List the model, `base_msrp`, and `production_start_date` for +scooters: +*/ + +SELECT model, base_msrp, production_start_date +FROM products +WHERE product_type='scooter'; + + +3. Extract model names and product IDs for +scooters: + +SELECT model, product_id +FROM products +WHERE product_type='scooter'; + + +4. +Insert results +into `product_names`: + +SELECT model, product_id +INTO product_names +FROM products +WHERE product_type='scooter'; + + +-- Exercise 9.2: Extracting the Sales Information + +1. Inner join `product_names` and `sales` +tables: + +SELECT model, customer_id, sales_transaction_date, sales_amount, channel, dealership_id +INTO products_sales +FROM sales INNER JOIN product_names ON sales.product_id=product_names.product_id; + + +2. View first five rows of `products_sales`: + +SELECT * +FROM products_sales LIMIT +5; + + +3. +Select Bat Scooter +sales ordered by +date: + +SELECT * +FROM products_sales +WHERE model='Bat' +ORDER BY sales_transaction_date; + + +4. Count Bat Scooter sales +records: + +SELECT COUNT(model) +FROM products_sales +WHERE model='Bat'; + + +5. Determine last sale date for Bat +Scooter: + +SELECT MAX(sales_transaction_date) +FROM products_sales +WHERE model='Bat'; + + +-- Activity 9.1: Quantifying the Sales Drop + +1. Compute daily cumulative sum of +sales: + +SELECT *, sum(count) OVER (ORDER BY sales_transaction_date) +INTO bat_sales_growth +FROM bat_sales_daily; + + +2. Compute 7-day lag of cumulative +sum: + +SELECT *, lag(sum, 7) OVER (ORDER BY sales_transaction_date) +INTO bat_sales_daily_delay +FROM bat_sales_growth; + + +3. Compute sales growth +percentage: + +SELECT *, (sum-lag)/lag AS volume +INTO bat_sales_delay_vol +FROM bat_sales_daily_delay; + + +-- Exercise 9.3: Launch Timing Analysis + +1. Examine scooter launch +dates: + +SELECT * +FROM products +WHERE product_type='scooter'; + + +2. +Select Bat Limited +Edition +sales: + +SELECT products.model, sales.sales_transaction_date +INTO bat_ltd_sales +FROM sales INNER JOIN products ON sales.product_id=products.product_id +WHERE sales.product_id=8 +ORDER BY sales.sales_transaction_date; + + +-- Activity 9.2: Analyzing the Difference in the Sales Price Hypothesis + +1. +Select 2013 Lemon +sales: + +SELECT sales_transaction_date +INTO lemon_sales +FROM sales +WHERE product_id=3; + + +2. Compute cumulative sum and lag for Lemon +sales: + +SELECT *, sum(count) OVER (ORDER BY sales_transaction_date) +INTO lemon_sales_sum +FROM lemon_sales_count; +SELECT *, lag(sum, 7) OVER (ORDER BY sales_transaction_date) +INTO lemon_sales_delay +FROM lemon_sales_sum; + + +-- Exercise 9.4: Analyzing Sales Growth by Email Opening Rate + +1. Join email and sales data for Bat +Scooter: + +SELECT emails.email_subject, emails.customer_id, emails.opened, emails.sent_date, emails.opened_date, bat_sales.sales_transaction_date +INTO bat_emails +FROM emails INNER JOIN bat_sales ON bat_sales.customer_id=emails.customer_id +ORDER BY bat_sales.sales_transaction_date; + + +-- Exercise 9.5: Analyzing the Performance of the Email Marketing Campaign + +1. Join email and sales data for Lemon +Scooter: + +SELECT emails.customer_id, emails.email_subject, emails.opened, emails.sent_date, emails.opened_date, lemon_sales.sales_transaction_date +INTO lemon_emails +FROM emails INNER JOIN lemon_sales ON emails.customer_id=lemon_sales.customer_id; diff --git a/w2.ipynb b/w2.ipynb new file mode 100644 index 0000000..bada86f --- /dev/null +++ b/w2.ipynb @@ -0,0 +1,229 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "language": "markdown" + }, + "source": [ + "# Exercise 9.1: Preliminary Data Collection Using SQL Techniques\n", + "1. Load the `sqlda` database from the accompanying source code located [here](https://github.com/TrainingByPackt/SQL-for-Data-Analytics/tree/master/Datasets)." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "language": "markdown" + }, + "source": [ + "2. List the model, `base_msrp` (MSRP: manufacturer's suggested retail price) and `production_start_date` fields within the product table for product types matching `scooter`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "language": "sql", + "vscode": { + "languageId": "sql" + } + }, + "outputs": [], + "source": [ + "-- Active: 1742829589420@@127.0.0.1@54321@postgres\n", + "SELECT model, base_msrp, production_start_date FROM products WHERE product_type='scooter';" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "da1c5c26", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": { + "language": "markdown" + }, + "source": [ + "3. Extract the model name and product IDs for the scooters available within the database. We will need this information to reconcile the product information with the available sales information." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "language": "sql", + "vscode": { + "languageId": "sql" + } + }, + "outputs": [], + "source": [ + "SELECT model, product_id FROM products WHERE product_type='scooter';" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "language": "markdown" + }, + "source": [ + "4. Insert the results of this query into a new table called `product_names`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "language": "sql", + "vscode": { + "languageId": "sql" + } + }, + "outputs": [], + "source": [ + "SELECT model, product_id INTO product_names FROM products WHERE product_type='scooter';" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "language": "markdown" + }, + "source": [ + "# Exercise 9.2: Extracting the Sales Information\n", + "1. Use an inner join on the `product_id` columns of both the `product_names` table and the `sales` table. From the result of the inner join, select the `model`, `customer_id`, `sales_transaction_date`, `sales_amount`, `channel`, and `dealership_id`, and store the values in a separate table called `product_sales`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "language": "sql", + "vscode": { + "languageId": "sql" + } + }, + "outputs": [], + "source": [ + "SELECT model, customer_id, sales_transaction_date, sales_amount, channel, dealership_id INTO products_sales FROM sales INNER JOIN product_names ON sales.product_id=product_names.product_id;" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "language": "markdown" + }, + "source": [ + "2. Look at the first five rows of this new table by using the following query." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "language": "sql", + "vscode": { + "languageId": "sql" + } + }, + "outputs": [], + "source": [ + "SELECT * FROM products_sales LIMIT 5;" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "language": "markdown" + }, + "source": [ + "3. Select all the information from the `product_sales` table that is available for the Bat Scooter and order the sales information by `sales_transaction_date` in ascending order." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "language": "sql", + "vscode": { + "languageId": "sql" + } + }, + "outputs": [], + "source": [ + "SELECT * FROM products_sales WHERE model='Bat' ORDER BY sales_transaction_date;" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "language": "markdown" + }, + "source": [ + "4. Count the number of records available by using the following query." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "language": "sql", + "vscode": { + "languageId": "sql" + } + }, + "outputs": [], + "source": [ + "SELECT COUNT(model) FROM products_sales WHERE model='Bat';" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "language": "markdown" + }, + "source": [ + "5. Determine the last sale date for the Bat Scooter by selecting the maximum (using the `MAX` function) for `sales_transaction_date`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "language": "sql", + "vscode": { + "languageId": "sql" + } + }, + "outputs": [], + "source": [ + "SELECT MAX(sales_transaction_date) FROM products_sales WHERE model='Bat';" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.13.2" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/week2_case_study.sql b/week2_case_study.sql new file mode 100644 index 0000000..297e20f --- /dev/null +++ b/week2_case_study.sql @@ -0,0 +1,76 @@ +/*markdown + +/*markdown +# Exercise 9.1: Preliminary Data Collection Using SQL Techniques + +*/ + +/*markdown +-- Active: 1742829589420@@127.0.0.1@54321@sqlda +1. Load the `sqlda` database from the accompanying source code located [here](https://github.com/TrainingByPackt/SQL-for-Data-Analytics/tree/master/Datasets). +*/ +*/ + +/*markdown +-- Load the sqlda database +-- Command depends on your database setup +*/ + +/*markdown +2. List the model, `base_msrp` (MSRP: manufacturer's suggested retail price) and `production_start_date` fields within the product table for product types matching `scooter`. +*/ + +-- Active: 1738444549781@@127.0.0.1@54321@sqlda +SELECT model, base_msrp, production_start_date +FROM products +WHERE product_type = 'scooter'; + +/*markdown +3. Extract the model name and product IDs for the scooters available within the database. We will need this information to reconcile the product information with the available sales information. +*/ + +-- Active: 1738444549781@@127.0.0.1@54321@sqlda@public +SELECT model, product_id INTO product_names +FROM products +WHERE product_type = 'scooter'; +SELECT * FROM product_names; + +/*markdown +## Exercise 9.2: Extracting the Sales Information +*/ + +/*markdown + +#### 1. Use an inner join on the `product_id` columns of both the `product_names` table and the `sales` table. From the result of the inner join, select the `model`, `customer_id`, `sales_transaction_date`, `sales_amount`, `channel`, and `dealership_id`, and store the values in a separate table called `product_sales`. +*/ + +SELECT + model, + customer_id, + sales_transaction_date, + sales_amount, + channel, + dealership_id INTO product_sales +FROM sales + INNER JOIN product_names ON sales.product_id = product_names.product_id; + +/*markdown +2. View all rows of the `product_sales` table. +*/ + +SELECT * FROM product_sales; + +/*markdown +3. Select all the information from the `product_sales` table that is available for the Bat Scooter and order the sales information by `sales_transaction_date` in ascending order. By selecting the data in this way, we can look at the first few days of the sales records in detail. +*/ + +SELECT * FROM product_sales +WHERE model = 'Bat' +ORDER BY sales_transaction_date; + +/*markdown +4. Count the number of records available by the following query. +*/ + +-- Determine the last sale date for the Bat Scooter by selecting the maximum (using the MAX function) for sales_transaction_date +SELECT MAX(sales_transaction_date) FROM products_sales WHERE model='Bat'; From ca0c915af3ae6e8080ef59cf7b70b83c5c926305 Mon Sep 17 00:00:00 2001 From: Jay Feulner Date: Wed, 2 Apr 2025 10:33:26 -0500 Subject: [PATCH 2/2] Add preliminary data collection and sales extraction exercises using SQL techniques - Created Jupyter Notebook (w2_.ipynb) for data collection and analysis tasks. - Added steps to load the sqlda database, list scooter product details, and store results in a new table. - Implemented sales data extraction and analysis for Bat Scooter sales. - Developed SQL scripts for creating and manipulating tables related to product and sales data. - Included markdown documentation for clarity and guidance on each step of the exercises. - Added PostgreSQL scripts for data manipulation and analysis in separate files (w2_case_study.dib, w2_case_study.md, week2_case_study.sql, with.pgsql). --- .ipynb_checkpoints/Untitled-checkpoint.ipynb | 6 + .ipynb_checkpoints/Untitled1-checkpoint.ipynb | 6 + .../W2_case_study-checkpoint.ipynb | 1513 +++++++++++++++++ .../W2_case_study-checkpoint.py | 281 +++ .../W2_case_study_bk-checkpoint.sql | 83 +- .ipynb_checkpoints/sqlite-checkpoint.ipynb | 567 ++++++ .../w2-checkpoint.ipynb | 0 .../w2_case_study-copy-checkpoint.dib | 242 +++ .ipynb_checkpoints/w2_study-checkpoint.md | 137 ++ .../2025-03-30_sqlda.docx | Bin ...42\200\223 a Case Study en-uCertify.ipynb" | 1488 ++++++++++++++++ ... \342\200\223 a Case Study en-uCertify.md" | 0 ...\342\200\223 a Case Study en-uCertify.sql" | 0 DAT375_Week2/W2_case_study.ipynb | 1450 ++++++++++++++++ DAT375_Week2/W2_case_study.pdf | Bin 0 -> 43385 bytes DAT375_Week2/W2_case_study_bk.sql | 278 +++ .../case_study_exercises_with_js_blocks.sql | 6 +- DAT375_Week2/jup.ipynb | 167 ++ DAT375_Week2/pg.ipynb | 106 ++ DAT375_Week2/sqlite.ipynb | 567 ++++++ DAT375_Week2/w2_.ipynb | 712 ++++++++ DAT375_Week2/w2_case_study.dib | 244 +++ DAT375_Week2/w2_case_study.md | 150 ++ .../week2_case_study.sql | 35 +- {Lesson02 => DAT375_Week2}/with.pgsql | 5 +- 25 files changed, 7982 insertions(+), 61 deletions(-) create mode 100644 .ipynb_checkpoints/Untitled-checkpoint.ipynb create mode 100644 .ipynb_checkpoints/Untitled1-checkpoint.ipynb create mode 100644 .ipynb_checkpoints/W2_case_study-checkpoint.ipynb create mode 100644 .ipynb_checkpoints/W2_case_study-checkpoint.py rename W2_case_study.sql => .ipynb_checkpoints/W2_case_study_bk-checkpoint.sql (99%) create mode 100644 .ipynb_checkpoints/sqlite-checkpoint.ipynb rename w2.ipynb => .ipynb_checkpoints/w2-checkpoint.ipynb (100%) create mode 100644 .ipynb_checkpoints/w2_case_study-copy-checkpoint.dib create mode 100644 .ipynb_checkpoints/w2_study-checkpoint.md rename 2025-03-30_sqlda.docx => DAT375_Week2/2025-03-30_sqlda.docx (100%) create mode 100644 "DAT375_Week2/Lesson Using SQL to Uncover the Truth \342\200\223 a Case Study en-uCertify.ipynb" rename "Lesson Using SQL to Uncover the Truth \342\200\223 a Case Study en-uCertify.md" => "DAT375_Week2/Lesson Using SQL to Uncover the Truth \342\200\223 a Case Study en-uCertify.md" (100%) rename "Lesson Using SQL to Uncover the Truth \342\200\223 a Case Study en-uCertify.sql" => "DAT375_Week2/Lesson Using SQL to Uncover the Truth \342\200\223 a Case Study en-uCertify.sql" (100%) create mode 100644 DAT375_Week2/W2_case_study.ipynb create mode 100644 DAT375_Week2/W2_case_study.pdf create mode 100644 DAT375_Week2/W2_case_study_bk.sql rename case_study_exercises_with_js_blocks.sql => DAT375_Week2/case_study_exercises_with_js_blocks.sql (99%) create mode 100644 DAT375_Week2/jup.ipynb create mode 100644 DAT375_Week2/pg.ipynb create mode 100644 DAT375_Week2/sqlite.ipynb create mode 100644 DAT375_Week2/w2_.ipynb create mode 100644 DAT375_Week2/w2_case_study.dib create mode 100644 DAT375_Week2/w2_case_study.md rename week2_case_study.sql => DAT375_Week2/week2_case_study.sql (81%) rename {Lesson02 => DAT375_Week2}/with.pgsql (66%) diff --git a/.ipynb_checkpoints/Untitled-checkpoint.ipynb b/.ipynb_checkpoints/Untitled-checkpoint.ipynb new file mode 100644 index 0000000..363fcab --- /dev/null +++ b/.ipynb_checkpoints/Untitled-checkpoint.ipynb @@ -0,0 +1,6 @@ +{ + "cells": [], + "metadata": {}, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/.ipynb_checkpoints/Untitled1-checkpoint.ipynb b/.ipynb_checkpoints/Untitled1-checkpoint.ipynb new file mode 100644 index 0000000..363fcab --- /dev/null +++ b/.ipynb_checkpoints/Untitled1-checkpoint.ipynb @@ -0,0 +1,6 @@ +{ + "cells": [], + "metadata": {}, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/.ipynb_checkpoints/W2_case_study-checkpoint.ipynb b/.ipynb_checkpoints/W2_case_study-checkpoint.ipynb new file mode 100644 index 0000000..169a769 --- /dev/null +++ b/.ipynb_checkpoints/W2_case_study-checkpoint.ipynb @@ -0,0 +1,1513 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "00c797c0", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6fe15880", + "metadata": { + "vscode": { + "languageId": "sql" + } + }, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c494da67", + "metadata": { + "lines_to_next_cell": 2 + }, + "outputs": [], + "source": [ + "\n", + "# Exercise 9.1: Preliminary Data Collection Using SQL Techniques\n", + "This exercise collects preliminary data. We will load the database, list scooter product details, extract product IDs, and store the results in a new table." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6eebf59d", + "metadata": {}, + "outputs": [], + "source": [ + "*/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6d425818", + "metadata": {}, + "outputs": [], + "source": [ + "" + ] + }, + { + "cell_type": "markdown", + "id": "e84ea60d", + "metadata": { + "lines_to_next_cell": 2 + }, + "source": [ + "# Step 1: Load the sqlda database" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8f13d875", + "metadata": {}, + "outputs": [], + "source": [ + "*/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e490fd73", + "metadata": {}, + "outputs": [], + "source": [ + "" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6d6bb15c", + "metadata": { + "lines_to_next_cell": 2 + }, + "outputs": [], + "source": [ + "psql sqlda" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ec5feb75", + "metadata": {}, + "outputs": [], + "source": [ + "*/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "40610ba5", + "metadata": {}, + "outputs": [], + "source": [ + "" + ] + }, + { + "cell_type": "markdown", + "id": "24dbb526", + "metadata": { + "lines_to_next_cell": 2 + }, + "source": [ + "# Step 2: List the model, base_msrp, and production_start_date for scooter products" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "33e4d5fa", + "metadata": {}, + "outputs": [], + "source": [ + "*/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d23ffa3e", + "metadata": { + "vscode": { + "languageId": "sql" + } + }, + "outputs": [], + "source": [ + "SELECT model, base_msrp, production_start_date \n", + "FROM products \n", + "WHERE product_type = 'scooter';" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "11b08776", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "id": "3b9621db", + "metadata": { + "lines_to_next_cell": 2 + }, + "source": [ + "\n", + "## Step 3: Extract the model name and product IDs for scooter products" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5cc90d0b", + "metadata": {}, + "outputs": [], + "source": [ + "*/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6d0fafc5", + "metadata": {}, + "outputs": [], + "source": [ + "SELECT model, product_id \n", + "FROM products \n", + "WHERE product_type = 'scooter';" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "943872ba", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6fd2f7cb", + "metadata": { + "lines_to_next_cell": 2 + }, + "outputs": [], + "source": [ + "\n", + "## Step 4: Insert the above results into a new table called product_names" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b21b3c21", + "metadata": {}, + "outputs": [], + "source": [ + "*/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "62d38961", + "metadata": {}, + "outputs": [], + "source": [ + "SELECT model, product_id \n", + "INTO product_names \n", + "FROM products \n", + "WHERE product_type = 'scooter';" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7ab8aff3", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f288ce91", + "metadata": {}, + "outputs": [], + "source": [ + "" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "610bfeae", + "metadata": { + "lines_to_next_cell": 2 + }, + "outputs": [], + "source": [ + "# Exercise 9.2: Extracting the Sales Information\n", + "In this exercise we join sales data with the product names and then isolate Bat Scooter sales." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "13d08097", + "metadata": {}, + "outputs": [], + "source": [ + "*/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "245e8d09", + "metadata": {}, + "outputs": [], + "source": [ + "" + ] + }, + { + "cell_type": "markdown", + "id": "f6278281", + "metadata": { + "lines_to_next_cell": 2 + }, + "source": [ + "# Step 1: Load the sqlda database" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6859f35a", + "metadata": {}, + "outputs": [], + "source": [ + "*/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "009996ce", + "metadata": {}, + "outputs": [], + "source": [ + "" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "75f9b534", + "metadata": { + "lines_to_next_cell": 2 + }, + "outputs": [], + "source": [ + "psql sqlda" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3aa81367", + "metadata": {}, + "outputs": [], + "source": [ + "*/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3cf725ec", + "metadata": {}, + "outputs": [], + "source": [ + "" + ] + }, + { + "cell_type": "markdown", + "id": "adc91e33", + "metadata": { + "lines_to_next_cell": 2 + }, + "source": [ + "# Step 2: List the available fields in the database" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a8af02e9", + "metadata": {}, + "outputs": [], + "source": [ + "*/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "996e6070", + "metadata": {}, + "outputs": [], + "source": [ + "" + ] + }, + { + "cell_type": "markdown", + "id": "1f8388f7", + "metadata": { + "lines_to_next_cell": 2 + }, + "source": [ + "# Step 3: Create a new table (products_sales) by joining sales and product_names on product_id" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4e8885e5", + "metadata": {}, + "outputs": [], + "source": [ + "*/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2a2cc65d", + "metadata": {}, + "outputs": [], + "source": [ + "SELECT model, customer_id, sales_transaction_date, sales_amount, channel, dealership_id \n", + "INTO products_sales \n", + "FROM sales \n", + "INNER JOIN product_names \n", + " ON sales.product_id = product_names.product_id;" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9c9683fb", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6e593559", + "metadata": { + "lines_to_next_cell": 2 + }, + "outputs": [], + "source": [ + "\n", + "## Step 4: Display the first five rows of products_sales" + ] + }, + { + "cell_type": "markdown", + "id": "08fa49c4", + "metadata": {}, + "source": [ + "# Exercise 9.1: Preliminary Data Collection Using SQL Techniques\n", + "This exercise collects preliminary data. We will load the database, list scooter product details, extract product IDs, and store the results in a new table." + ] + }, + { + "cell_type": "markdown", + "id": "cd9e9cec", + "metadata": {}, + "source": [ + "## Step 1: Load the sqlda database" + ] + }, + { + "cell_type": "markdown", + "id": "5c904f26", + "metadata": {}, + "source": [ + "psql sqlda" + ] + }, + { + "cell_type": "markdown", + "id": "735c1e30", + "metadata": {}, + "source": [ + "## Step 2: List the model, base_msrp, and production_start_date for scooter products" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a1f56bd4", + "metadata": {}, + "outputs": [], + "source": [ + "SELECT model, base_msrp, production_start_date \n", + "FROM products \n", + "WHERE product_type = 'scooter';" + ] + }, + { + "cell_type": "markdown", + "id": "61f20e2e", + "metadata": {}, + "source": [ + "## Step 3: Extract the model name and product IDs for scooter products" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6f1d9fe8", + "metadata": {}, + "outputs": [], + "source": [ + "SELECT model, product_id \n", + "FROM products \n", + "WHERE product_type = 'scooter';" + ] + }, + { + "cell_type": "markdown", + "id": "f8d10e79", + "metadata": {}, + "source": [ + "## Step 4: Insert the above results into a new table called product_names" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3ba5016c", + "metadata": {}, + "outputs": [], + "source": [ + "SELECT model, product_id \n", + "INTO product_names \n", + "FROM products \n", + "WHERE product_type = 'scooter';" + ] + }, + { + "cell_type": "markdown", + "id": "cb13f9f6", + "metadata": {}, + "source": [ + "# Exercise 9.2: Extracting the Sales Information\n", + "In this exercise we join sales data with the product names and then isolate Bat Scooter sales." + ] + }, + { + "cell_type": "markdown", + "id": "320897ce", + "metadata": {}, + "source": [ + "## Step 1: Load the sqlda database" + ] + }, + { + "cell_type": "markdown", + "id": "024ebee9", + "metadata": {}, + "source": [ + "psql sqlda" + ] + }, + { + "cell_type": "markdown", + "id": "db8191c4", + "metadata": {}, + "source": [ + "## Step 2: List the available fields in the database" + ] + }, + { + "cell_type": "markdown", + "id": "7b450661", + "metadata": {}, + "source": [ + "## Step 3: Create a new table (products_sales) by joining sales and product_names on product_id" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "63c62885", + "metadata": {}, + "outputs": [], + "source": [ + "SELECT model, customer_id, sales_transaction_date, sales_amount, channel, dealership_id \n", + "INTO products_sales \n", + "FROM sales \n", + "INNER JOIN product_names \n", + " ON sales.product_id = product_names.product_id;" + ] + }, + { + "cell_type": "markdown", + "id": "ac71ec20", + "metadata": {}, + "source": [ + "## Step 4: Display the first five rows of products_sales" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c86d6b99", + "metadata": {}, + "outputs": [], + "source": [ + "SELECT * \n", + "FROM products_sales \n", + "LIMIT 5;" + ] + }, + { + "cell_type": "markdown", + "id": "61ce0852", + "metadata": {}, + "source": [ + "## Step 5: Retrieve Bat Scooter sales ordered by sales_transaction_date" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7b697683", + "metadata": {}, + "outputs": [], + "source": [ + "SELECT * \n", + "FROM products_sales \n", + "WHERE model = 'Bat' \n", + "ORDER BY sales_transaction_date;" + ] + }, + { + "cell_type": "markdown", + "id": "2c413c11", + "metadata": {}, + "source": [ + "## Step 6: Count the number of Bat Scooter sales records" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2f086b97", + "metadata": {}, + "outputs": [], + "source": [ + "SELECT COUNT(model) \n", + "FROM products_sales \n", + "WHERE model = 'Bat';" + ] + }, + { + "cell_type": "markdown", + "id": "b44b3c2f", + "metadata": {}, + "source": [ + "## Step 7: Determine the last sale date for the Bat Scooter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "89fc6fdf", + "metadata": {}, + "outputs": [], + "source": [ + "SELECT MAX(sales_transaction_date) \n", + "FROM products_sales \n", + "WHERE model = 'Bat';" + ] + }, + { + "cell_type": "markdown", + "id": "7ad5e59f", + "metadata": {}, + "source": [ + "## Step 8: Insert Bat Scooter sales records into a new table (bat_sales) ordered by date" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d2f2fa8d", + "metadata": {}, + "outputs": [], + "source": [ + "SELECT * \n", + "INTO bat_sales \n", + "FROM products_sales \n", + "WHERE model = 'Bat' \n", + "ORDER BY sales_transaction_date;" + ] + }, + { + "cell_type": "markdown", + "id": "8569edce", + "metadata": {}, + "source": [ + "## Step 9: Remove the time information in bat_sales (convert to date)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c6218139", + "metadata": {}, + "outputs": [], + "source": [ + "UPDATE bat_sales \n", + "SET sales_transaction_date = DATE(sales_transaction_date);" + ] + }, + { + "cell_type": "markdown", + "id": "98204ab0", + "metadata": {}, + "source": [ + "## Step 10: Display the first five records of bat_sales ordered by date" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8a78b197", + "metadata": {}, + "outputs": [], + "source": [ + "SELECT * \n", + "FROM bat_sales \n", + "ORDER BY sales_transaction_date \n", + "LIMIT 5;" + ] + }, + { + "cell_type": "markdown", + "id": "d7a25b21", + "metadata": {}, + "source": [ + "## Step 11: Create bat_sales_daily table with daily sales count" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "abc86a3e", + "metadata": {}, + "outputs": [], + "source": [ + "SELECT sales_transaction_date, COUNT(sales_transaction_date) \n", + "INTO bat_sales_daily \n", + "FROM bat_sales \n", + "GROUP BY sales_transaction_date \n", + "ORDER BY sales_transaction_date;" + ] + }, + { + "cell_type": "markdown", + "id": "b157c6cf", + "metadata": {}, + "source": [ + "# Activity 9.1: Quantifying the Sales Drop\n", + "Here we compute a cumulative sum of daily sales, apply a 7-day lag, and calculate the growth rate (volume)." + ] + }, + { + "cell_type": "markdown", + "id": "fccfcb9e", + "metadata": {}, + "source": [ + "## Step 1: Load the sqlda database\n", + "\n", + "psql sqlda" + ] + }, + { + "cell_type": "markdown", + "id": "f68a4539", + "metadata": {}, + "source": [ + "## Step 2: Compute the daily cumulative sum of sales and insert into bat_sales_growth" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c5c67a07", + "metadata": {}, + "outputs": [], + "source": [ + "SELECT *, sum(count) OVER (ORDER BY sales_transaction_date) AS cumulative_sum\n", + "INTO bat_sales_growth\n", + "FROM bat_sales_daily;" + ] + }, + { + "cell_type": "markdown", + "id": "fd59ef82", + "metadata": {}, + "source": [ + "## Step 3: Compute a 7-day lag of the cumulative sum and insert into bat_sales_daily_delay" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "41990786", + "metadata": {}, + "outputs": [], + "source": [ + "SELECT *, lag(cumulative_sum, 7) OVER (ORDER BY sales_transaction_date) AS lag_value\n", + "INTO bat_sales_daily_delay\n", + "FROM bat_sales_growth;" + ] + }, + { + "cell_type": "markdown", + "id": "5cdec19c", + "metadata": {}, + "source": [ + "## Step 4: Inspect the first 15 rows of bat_sales_daily_delay" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac10527f", + "metadata": {}, + "outputs": [], + "source": [ + "SELECT * \n", + "FROM bat_sales_daily_delay \n", + "LIMIT 15;" + ] + }, + { + "cell_type": "markdown", + "id": "31a50be9", + "metadata": {}, + "source": [ + "## Step 5: Compute sales growth as a percentage and insert into bat_sales_delay_vol" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b955737e", + "metadata": {}, + "outputs": [], + "source": [ + "SELECT *, (cumulative_sum - lag_value) / lag_value AS volume\n", + "INTO bat_sales_delay_vol\n", + "FROM bat_sales_daily_delay;" + ] + }, + { + "cell_type": "markdown", + "id": "c814eeff", + "metadata": {}, + "source": [ + "## Step 6: Display the first 22 records of bat_sales_delay_vol" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e1978c91", + "metadata": {}, + "outputs": [], + "source": [ + "SELECT * \n", + "FROM bat_sales_delay_vol \n", + "LIMIT 22;" + ] + }, + { + "cell_type": "markdown", + "id": "5215b24c", + "metadata": {}, + "source": [ + "#### ----------------------------------------------------------------------------------" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2536ea0e", + "metadata": {}, + "outputs": [], + "source": [ + "*/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "85063dc4", + "metadata": {}, + "outputs": [], + "source": [ + "SELECT * \n", + "FROM products_sales \n", + "LIMIT 5;" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b11baa93", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b528ba0c", + "metadata": { + "lines_to_next_cell": 2 + }, + "outputs": [], + "source": [ + "\n", + "## Step 5: Retrieve Bat Scooter sales ordered by sales_transaction_date" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d21a7d4f", + "metadata": {}, + "outputs": [], + "source": [ + "*/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b7cf315e", + "metadata": {}, + "outputs": [], + "source": [ + "SELECT * \n", + "FROM products_sales \n", + "WHERE model = 'Bat' \n", + "ORDER BY sales_transaction_date;" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d6dab14d", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7ee34b48", + "metadata": { + "lines_to_next_cell": 2 + }, + "outputs": [], + "source": [ + "\n", + "## Step 6: Count the number of Bat Scooter sales records" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b42a0fd8", + "metadata": {}, + "outputs": [], + "source": [ + "*/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f0317e49", + "metadata": {}, + "outputs": [], + "source": [ + "SELECT COUNT(model) \n", + "FROM products_sales \n", + "WHERE model = 'Bat';" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d02892bb", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c9618f8c", + "metadata": { + "lines_to_next_cell": 2 + }, + "outputs": [], + "source": [ + "\n", + "## Step 7: Determine the last sale date for the Bat Scooter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "86fde4df", + "metadata": {}, + "outputs": [], + "source": [ + "*/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b95e07be", + "metadata": {}, + "outputs": [], + "source": [ + "SELECT MAX(sales_transaction_date) \n", + "FROM products_sales \n", + "WHERE model = 'Bat';" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "daba74d5", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9cb5729b", + "metadata": { + "lines_to_next_cell": 2 + }, + "outputs": [], + "source": [ + "\n", + "## Step 8: Insert Bat Scooter sales records into a new table (bat_sales) ordered by date" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a12eeba8", + "metadata": {}, + "outputs": [], + "source": [ + "*/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5fb0a33", + "metadata": {}, + "outputs": [], + "source": [ + "SELECT * \n", + "INTO bat_sales \n", + "FROM products_sales \n", + "WHERE model = 'Bat' \n", + "ORDER BY sales_transaction_date;" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f0503217", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "06983b3e", + "metadata": { + "lines_to_next_cell": 2 + }, + "outputs": [], + "source": [ + "\n", + "## Step 9: Remove the time information in bat_sales (convert to date)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c1558401", + "metadata": {}, + "outputs": [], + "source": [ + "*/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5122b580", + "metadata": {}, + "outputs": [], + "source": [ + "UPDATE bat_sales \n", + "SET sales_transaction_date = DATE(sales_transaction_date);" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fcaf6505", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c93d97e6", + "metadata": { + "lines_to_next_cell": 2 + }, + "outputs": [], + "source": [ + "\n", + "## Step 10: Display the first five records of bat_sales ordered by date" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e93a144e", + "metadata": {}, + "outputs": [], + "source": [ + "*/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d8675e3f", + "metadata": {}, + "outputs": [], + "source": [ + "SELECT * \n", + "FROM bat_sales \n", + "ORDER BY sales_transaction_date \n", + "LIMIT 5;" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "eeeed0dd", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c9137bcb", + "metadata": { + "lines_to_next_cell": 2 + }, + "outputs": [], + "source": [ + "\n", + "## Step 11: Create bat_sales_daily table with daily sales count" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4511890b", + "metadata": {}, + "outputs": [], + "source": [ + "*/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a330ceb1", + "metadata": {}, + "outputs": [], + "source": [ + "SELECT sales_transaction_date, COUNT(sales_transaction_date) \n", + "INTO bat_sales_daily \n", + "FROM bat_sales \n", + "GROUP BY sales_transaction_date \n", + "ORDER BY sales_transaction_date;" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "95d2dead", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "dc27c011", + "metadata": {}, + "outputs": [], + "source": [ + "" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fc6289cd", + "metadata": { + "lines_to_next_cell": 2 + }, + "outputs": [], + "source": [ + "# Activity 9.1: Quantifying the Sales Drop\n", + "Here we compute a cumulative sum of daily sales, apply a 7-day lag, and calculate the growth rate (volume)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b505fffa", + "metadata": {}, + "outputs": [], + "source": [ + "*/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c7707919", + "metadata": {}, + "outputs": [], + "source": [ + "" + ] + }, + { + "cell_type": "markdown", + "id": "e7ec4be0", + "metadata": {}, + "source": [ + "# Step 1: Load the sqlda database" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "87450942", + "metadata": {}, + "outputs": [], + "source": [ + "psql sqlda\n", + "*/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0053dc21", + "metadata": {}, + "outputs": [], + "source": [ + "" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7e0f8343", + "metadata": {}, + "outputs": [], + "source": [ + "## Step 2: Compute the daily cumulative sum of sales and insert into bat_sales_growth\n", + "*/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9037ec9f", + "metadata": {}, + "outputs": [], + "source": [ + "SELECT *, sum(count) OVER (ORDER BY sales_transaction_date) AS cumulative_sum\n", + "INTO bat_sales_growth\n", + "FROM bat_sales_daily;" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2172ebe3", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4c9d53f1", + "metadata": { + "lines_to_next_cell": 2 + }, + "outputs": [], + "source": [ + "\n", + "## Step 3: Compute a 7-day lag of the cumulative sum and insert into bat_sales_daily_delay" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e2abdc9a", + "metadata": {}, + "outputs": [], + "source": [ + "*/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ee606e34", + "metadata": {}, + "outputs": [], + "source": [ + "SELECT *, lag(cumulative_sum, 7) OVER (ORDER BY sales_transaction_date) AS lag_value\n", + "INTO bat_sales_daily_delay\n", + "FROM bat_sales_growth;" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "64ece351", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ed0abead", + "metadata": { + "lines_to_next_cell": 2 + }, + "outputs": [], + "source": [ + "\n", + "## Step 4: Inspect the first 15 rows of bat_sales_daily_delay" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d76d2a03", + "metadata": {}, + "outputs": [], + "source": [ + "*/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "035280c1", + "metadata": {}, + "outputs": [], + "source": [ + "SELECT * \n", + "FROM bat_sales_daily_delay \n", + "LIMIT 15;" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f5ed089a", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "623662a0", + "metadata": { + "lines_to_next_cell": 2 + }, + "outputs": [], + "source": [ + "\n", + "## Step 5: Compute sales growth as a percentage and insert into bat_sales_delay_vol" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f6b808a4", + "metadata": {}, + "outputs": [], + "source": [ + "*/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "feb2cf00", + "metadata": {}, + "outputs": [], + "source": [ + "SELECT *, (cumulative_sum - lag_value) / lag_value AS volume\n", + "INTO bat_sales_delay_vol\n", + "FROM bat_sales_daily_delay;" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6b3c5544", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "645dee5f", + "metadata": { + "lines_to_next_cell": 2 + }, + "outputs": [], + "source": [ + "\n", + "## Step 6: Display the first 22 records of bat_sales_delay_vol" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "95cb1e91", + "metadata": {}, + "outputs": [], + "source": [ + "*/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "11254ede", + "metadata": {}, + "outputs": [], + "source": [ + "SELECT * \n", + "FROM bat_sales_delay_vol \n", + "LIMIT 22;" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a8df8de0", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "34dd48cd", + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "#### ----------------------------------------------------------------------------------\n", + "*/" + ] + } + ], + "metadata": { + "jupytext": { + "cell_metadata_filter": "-all", + "main_language": "python", + "notebook_metadata_filter": "-all" + }, + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "name": "python", + "version": "3.13.2" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/.ipynb_checkpoints/W2_case_study-checkpoint.py b/.ipynb_checkpoints/W2_case_study-checkpoint.py new file mode 100644 index 0000000..db5be62 --- /dev/null +++ b/.ipynb_checkpoints/W2_case_study-checkpoint.py @@ -0,0 +1,281 @@ + +# Exercise 9.1: Preliminary Data Collection Using SQL Techniques +This exercise collects preliminary data. We will load the database, list scooter product details, extract product IDs, and store the results in a new table. + + +*/ + + + +## Step 1: Load the sqlda database + + +*/ + + + +psql sqlda + + +*/ + + + +## Step 2: List the model, base_msrp, and production_start_date for scooter products + + +*/ + +SELECT model, base_msrp, production_start_date +FROM products +WHERE product_type = 'scooter'; + + + + +## Step 3: Extract the model name and product IDs for scooter products + + +*/ + +SELECT model, product_id +FROM products +WHERE product_type = 'scooter'; + + + + +## Step 4: Insert the above results into a new table called product_names + + +*/ + +SELECT model, product_id +INTO product_names +FROM products +WHERE product_type = 'scooter'; + + + + + +# Exercise 9.2: Extracting the Sales Information +In this exercise we join sales data with the product names and then isolate Bat Scooter sales. + + +*/ + + + +## Step 1: Load the sqlda database + + +*/ + + + +psql sqlda + + +*/ + + + +## Step 2: List the available fields in the database + + +*/ + + + +## Step 3: Create a new table (products_sales) by joining sales and product_names on product_id + + +*/ + +SELECT model, customer_id, sales_transaction_date, sales_amount, channel, dealership_id +INTO products_sales +FROM sales +INNER JOIN product_names + ON sales.product_id = product_names.product_id; + + + + +## Step 4: Display the first five rows of products_sales + + +*/ + +SELECT * +FROM products_sales +LIMIT 5; + + + + +## Step 5: Retrieve Bat Scooter sales ordered by sales_transaction_date + + +*/ + +SELECT * +FROM products_sales +WHERE model = 'Bat' +ORDER BY sales_transaction_date; + + + + +## Step 6: Count the number of Bat Scooter sales records + + +*/ + +SELECT COUNT(model) +FROM products_sales +WHERE model = 'Bat'; + + + + +## Step 7: Determine the last sale date for the Bat Scooter + + +*/ + +SELECT MAX(sales_transaction_date) +FROM products_sales +WHERE model = 'Bat'; + + + + +## Step 8: Insert Bat Scooter sales records into a new table (bat_sales) ordered by date + + +*/ + +SELECT * +INTO bat_sales +FROM products_sales +WHERE model = 'Bat' +ORDER BY sales_transaction_date; + + + + +## Step 9: Remove the time information in bat_sales (convert to date) + + +*/ + +UPDATE bat_sales +SET sales_transaction_date = DATE(sales_transaction_date); + + + + +## Step 10: Display the first five records of bat_sales ordered by date + + +*/ + +SELECT * +FROM bat_sales +ORDER BY sales_transaction_date +LIMIT 5; + + + + +## Step 11: Create bat_sales_daily table with daily sales count + + +*/ + +SELECT sales_transaction_date, COUNT(sales_transaction_date) +INTO bat_sales_daily +FROM bat_sales +GROUP BY sales_transaction_date +ORDER BY sales_transaction_date; + + + + + +# Activity 9.1: Quantifying the Sales Drop +Here we compute a cumulative sum of daily sales, apply a 7-day lag, and calculate the growth rate (volume). + + +*/ + + + +## Step 1: Load the sqlda database + +psql sqlda +*/ + + + +## Step 2: Compute the daily cumulative sum of sales and insert into bat_sales_growth +*/ + +SELECT *, sum(count) OVER (ORDER BY sales_transaction_date) AS cumulative_sum +INTO bat_sales_growth +FROM bat_sales_daily; + + + + +## Step 3: Compute a 7-day lag of the cumulative sum and insert into bat_sales_daily_delay + + +*/ + +SELECT *, lag(cumulative_sum, 7) OVER (ORDER BY sales_transaction_date) AS lag_value +INTO bat_sales_daily_delay +FROM bat_sales_growth; + + + + +## Step 4: Inspect the first 15 rows of bat_sales_daily_delay + + +*/ + +SELECT * +FROM bat_sales_daily_delay +LIMIT 15; + + + + +## Step 5: Compute sales growth as a percentage and insert into bat_sales_delay_vol + + +*/ + +SELECT *, (cumulative_sum - lag_value) / lag_value AS volume +INTO bat_sales_delay_vol +FROM bat_sales_daily_delay; + + + + +## Step 6: Display the first 22 records of bat_sales_delay_vol + + +*/ + +SELECT * +FROM bat_sales_delay_vol +LIMIT 22; + + + + +#### ---------------------------------------------------------------------------------- +*/ \ No newline at end of file diff --git a/W2_case_study.sql b/.ipynb_checkpoints/W2_case_study_bk-checkpoint.sql similarity index 99% rename from W2_case_study.sql rename to .ipynb_checkpoints/W2_case_study_bk-checkpoint.sql index b831de2..49efde4 100644 --- a/W2_case_study.sql +++ b/.ipynb_checkpoints/W2_case_study_bk-checkpoint.sql @@ -1,98 +1,78 @@ /*markdown # Exercise 9.1: Preliminary Data Collection Using SQL Techniques -This exercise collects preliminary data. We will load the database, list scooter product details, extract product IDs, and store the results in a new table. +This exercise collects preliminary data. We will load the database, list scooter product details, extract product IDs, and store the results in a new table. */ /*markdown - ## Step 1: Load the sqlda database - - */ /*markdown - -psql sqlda - - -*/ - -/*markdown - ## Step 2: List the model, base_msrp, and production_start_date for scooter products - - */ + SELECT model, base_msrp, production_start_date FROM products WHERE product_type = 'scooter'; - - /*markdown ## Step 3: Extract the model name and product IDs for scooter products - - */ + SELECT model, product_id FROM products WHERE product_type = 'scooter'; - - /*markdown -## Step 4: Insert the above results into a new table called product_names - +## Step 4: Insert the above results into a new table called product_names */ + SELECT model, product_id INTO product_names FROM products WHERE product_type = 'scooter'; - /*markdown - # Exercise 9.2: Extracting the Sales Information + + In this exercise we join sales data with the product names and then isolate Bat Scooter sales. + + */ /*markdown ## Step 1: Load the sqlda database - - */ /*markdown psql sqlda - */ /*markdown ## Step 2: List the available fields in the database - */ /*markdown ## Step 3: Create a new table (products_sales) by joining sales and product_names on product_id - - */ + SELECT model, customer_id, sales_transaction_date, sales_amount, channel, dealership_id INTO products_sales FROM sales @@ -102,23 +82,26 @@ INNER JOIN product_names /*markdown + ## Step 4: Display the first five rows of products_sales */ + SELECT * FROM products_sales LIMIT 5; - /*markdown + ## Step 5: Retrieve Bat Scooter sales ordered by sales_transaction_date */ + SELECT * FROM products_sales WHERE model = 'Bat' @@ -127,11 +110,13 @@ ORDER BY sales_transaction_date; /*markdown + ## Step 6: Count the number of Bat Scooter sales records */ + SELECT COUNT(model) FROM products_sales WHERE model = 'Bat'; @@ -139,11 +124,13 @@ WHERE model = 'Bat'; /*markdown + ## Step 7: Determine the last sale date for the Bat Scooter */ + SELECT MAX(sales_transaction_date) FROM products_sales WHERE model = 'Bat'; @@ -151,11 +138,13 @@ WHERE model = 'Bat'; /*markdown + ## Step 8: Insert Bat Scooter sales records into a new table (bat_sales) ordered by date */ + SELECT * INTO bat_sales FROM products_sales @@ -165,22 +154,26 @@ ORDER BY sales_transaction_date; /*markdown + ## Step 9: Remove the time information in bat_sales (convert to date) */ + UPDATE bat_sales SET sales_transaction_date = DATE(sales_transaction_date); /*markdown + ## Step 10: Display the first five records of bat_sales ordered by date */ + SELECT * FROM bat_sales ORDER BY sales_transaction_date @@ -189,11 +182,12 @@ LIMIT 5; /*markdown -## Step 11: Create bat_sales_daily table with daily sales count +## Step 11: Create bat_sales_daily table with daily sales count */ + SELECT sales_transaction_date, COUNT(sales_transaction_date) INTO bat_sales_daily FROM bat_sales @@ -201,20 +195,20 @@ GROUP BY sales_transaction_date ORDER BY sales_transaction_date; - /*markdown - # Activity 9.1: Quantifying the Sales Drop Here we compute a cumulative sum of daily sales, apply a 7-day lag, and calculate the growth rate (volume). - */ /*markdown ## Step 1: Load the sqlda database +*/ +/*markdown psql sqlda + */ /*markdown @@ -222,6 +216,7 @@ psql sqlda ## Step 2: Compute the daily cumulative sum of sales and insert into bat_sales_growth */ + SELECT *, sum(count) OVER (ORDER BY sales_transaction_date) AS cumulative_sum INTO bat_sales_growth FROM bat_sales_daily; @@ -229,35 +224,35 @@ FROM bat_sales_daily; /*markdown + ## Step 3: Compute a 7-day lag of the cumulative sum and insert into bat_sales_daily_delay */ + SELECT *, lag(cumulative_sum, 7) OVER (ORDER BY sales_transaction_date) AS lag_value INTO bat_sales_daily_delay FROM bat_sales_growth; - - /*markdown -## Step 4: Inspect the first 15 rows of bat_sales_daily_delay - +## Step 4: Inspect the first 15 rows of bat_sales_daily_delay */ + SELECT * FROM bat_sales_daily_delay LIMIT 15; - /*markdown -## Step 5: Compute sales growth as a percentage and insert into bat_sales_delay_vol +## Step 5: Compute sales growth as a percentage and insert into bat_sales_delay_vol */ + SELECT *, (cumulative_sum - lag_value) / lag_value AS volume INTO bat_sales_delay_vol FROM bat_sales_daily_delay; @@ -265,11 +260,11 @@ FROM bat_sales_daily_delay; /*markdown -## Step 6: Display the first 22 records of bat_sales_delay_vol - +## Step 6: Display the first 22 records of bat_sales_delay_vol */ + SELECT * FROM bat_sales_delay_vol LIMIT 22; @@ -277,5 +272,7 @@ LIMIT 22; /*markdown + #### ---------------------------------------------------------------------------------- + */ \ No newline at end of file diff --git a/.ipynb_checkpoints/sqlite-checkpoint.ipynb b/.ipynb_checkpoints/sqlite-checkpoint.ipynb new file mode 100644 index 0000000..59f38de --- /dev/null +++ b/.ipynb_checkpoints/sqlite-checkpoint.ipynb @@ -0,0 +1,567 @@ +{ + "metadata": { + "kernelspec": { + "display_name": "SQLite", + "language": "sql", + "name": "SQLite" + }, + "language_info": { + "codemirror_mode": "sql", + "file_extension": "", + "mimetype": "", + "name": "sql", + "version": "3.32.3" + } + }, + "nbformat_minor": 4, + "nbformat": 4, + "cells": [ + { + "cell_type": "markdown", + "source": "# JupyterLite `xeus-sqlite` Kernel Demo\n\nThe [`jupyterlite/xeus-sqlite-kernel`](https://github.com/jupyterlite/xeus-sqlite-kernel) wraps the original [`jupyter-xeus/xeus-sqlite`](https://github.com/jupyter-xeus/xeus-sqlite/) kernel for use in JupyterLite.\n\nOriginal kernel docs can be found [here](https://xeus-sqlite.readthedocs.io/en/latest/).\n\nThe kernel provides cell magic for command line database operations, and native execution of SQL code against a connected database.", + "metadata": {} + }, + { + "cell_type": "markdown", + "source": "## Creating a Database\n\nLine magic is used to create an in-memory database:", + "metadata": {} + }, + { + "cell_type": "code", + "source": "%CREATE example_db.db", + "metadata": { + "trusted": true + }, + "outputs": [], + "execution_count": 1 + }, + { + "cell_type": "markdown", + "source": "Currently, there is no ability to:\n\n- save the database to browser storage;\n- export the database;\n- load a database from browser storage;\n- load a database from a URL;\n- load a database from the desktop;\n- connect to a remote sqlite database file.", + "metadata": {} + }, + { + "cell_type": "markdown", + "source": "## Create and Populate Tables\n\nTables are created and populated using SQL:", + "metadata": {} + }, + { + "cell_type": "code", + "source": "CREATE TABLE players (Name STRING, Class STRING, Level INTEGER, Hitpoints INTEGER)", + "metadata": { + "trusted": true + }, + "outputs": [], + "execution_count": 2 + }, + { + "cell_type": "code", + "source": "INSERT INTO players (Name, Class, Level, Hitpoints) VALUES (\"Martin Splitskull\", \"Warrior\", 3, 40)", + "metadata": { + "trusted": true + }, + "outputs": [], + "execution_count": 3 + }, + { + "cell_type": "code", + "source": "SELECT COUNT(*) as rowcount FROM players", + "metadata": { + "trusted": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
rowcount
1
" + ], + "text/plain": [ + "+----------+\n", + "| rowcount |\n", + "+----------+\n", + "| 1 |\n", + "+----------+" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 4 + }, + { + "cell_type": "markdown", + "source": "Only one command can be executed from within a single code cell:", + "metadata": {} + }, + { + "cell_type": "code", + "source": "INSERT INTO players (Name, Class, Level, Hitpoints) VALUES (\"Sir Wolf\", \"Cleric\", 2, 20);\n\n-- The following will not be inserted\nINSERT INTO players (Name, Class, Level, Hitpoints) VALUES (\"Sylvain, The Grey\", \"Wizard\", 1, 10);", + "metadata": { + "trusted": true + }, + "outputs": [], + "execution_count": 5 + }, + { + "cell_type": "code", + "source": "SELECT Name, Level, Hitpoints FROM players;", + "metadata": { + "trusted": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
NameLevelHitpoints
Martin Splitskull340
Sir Wolf220
" + ], + "text/plain": [ + "+-------------------+-------+-----------+\n", + "| Name | Level | Hitpoints |\n", + "+-------------------+-------+-----------+\n", + "| Martin Splitskull | 3 | 40 |\n", + "+-------------------+-------+-----------+\n", + "| Sir Wolf | 2 | 20 |\n", + "+-------------------+-------+-----------+" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 6 + }, + { + "cell_type": "code", + "source": "INSERT INTO players (Name, Class, Level, Hitpoints) VALUES (\"Sylvain, The Grey\", \"Wizard\", 1, 10);", + "metadata": { + "trusted": true + }, + "outputs": [], + "execution_count": 7 + }, + { + "cell_type": "code", + "source": "SELECT Name, Level, Hitpoints FROM players;", + "metadata": { + "trusted": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
NameLevelHitpoints
Martin Splitskull340
Sir Wolf220
Sylvain, The Grey110
" + ], + "text/plain": [ + "+-------------------+-------+-----------+\n", + "| Name | Level | Hitpoints |\n", + "+-------------------+-------+-----------+\n", + "| Martin Splitskull | 3 | 40 |\n", + "+-------------------+-------+-----------+\n", + "| Sir Wolf | 2 | 20 |\n", + "+-------------------+-------+-----------+\n", + "| Sylvain, The Grey | 1 | 10 |\n", + "+-------------------+-------+-----------+" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 8 + }, + { + "cell_type": "markdown", + "source": "## Querying Tables\n\nA full range of SQL query commands are supported, including aggregation operations:", + "metadata": {} + }, + { + "cell_type": "code", + "source": "SELECT SUM (Level) FROM players", + "metadata": { + "trusted": true + }, + "outputs": [], + "execution_count": null + }, + { + "cell_type": "markdown", + "source": "Grouping also works:", + "metadata": {} + }, + { + "cell_type": "code", + "source": "SELECT Level, SUM(Hitpoints) AS `Total Hitpoints`\nFROM players\nGROUP BY Level\nORDER BY `Total Hitpoints` DESC;", + "metadata": { + "trusted": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
LevelTotal Hitpoints
340
220
110
" + ], + "text/plain": [ + "+-------+-----------------+\n", + "| Level | Total Hitpoints |\n", + "+-------+-----------------+\n", + "| 3 | 40 |\n", + "+-------+-----------------+\n", + "| 2 | 20 |\n", + "+-------+-----------------+\n", + "| 1 | 10 |\n", + "+-------+-----------------+" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 11 + }, + { + "cell_type": "markdown", + "source": "## Charting Using Vega\n\nThe `jupyter-xeus/xeus-sqlite` kernel also bundles Vega charting components.\n\nVega charts can be generated by piping the result of a SQL query into a Vega line magic command.", + "metadata": {} + }, + { + "cell_type": "code", + "source": "%XVEGA_PLOT\n X_FIELD Level\n Y_FIELD Hitpoints\n MARK circle\n WIDTH 100\n HEIGHT 200\n <>\n SELECT Level, Hitpoints FROM players", + "metadata": { + "trusted": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
LevelHitpoints
340
220
110
" + ], + "text/plain": [ + "+-------+-----------+\n", + "| Level | Hitpoints |\n", + "+-------+-----------+\n", + "| 3 | 40 |\n", + "+-------+-----------+\n", + "| 2 | 20 |\n", + "+-------+-----------+\n", + "| 1 | 10 |\n", + "+-------+-----------+" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "application/vnd.vegalite.v3+json": { + "$schema": "https://vega.github.io/schema/vega-lite/v4.json", + "config": { + "axis": { + "grid": true + } + }, + "data": { + "values": [ + { + "Hitpoints": "name", + "Level": "name" + }, + { + "Hitpoints": "40", + "Level": "3" + }, + { + "Hitpoints": "20", + "Level": "2" + }, + { + "Hitpoints": "10", + "Level": "1" + } + ] + }, + "encoding": { + "x": { + "field": "Level", + "type": "quantitative" + }, + "y": { + "field": "Hitpoints", + "type": "quantitative" + } + }, + "height": 200, + "mark": { + "type": "circle" + }, + "width": 100 + }, + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAJMAAAD3CAYAAAAZgGZZAAAAAXNSR0IArs4c6QAAFBRJREFUeF7tnQlsFlUXhk9Ly77+gAUryCIgUIWwVQJKEbAGBEWpLFJZQqrmVwFBi1qgSItIoCAgSqSyCT8KyJZIoICyBatYoSxhKwpYbEGBFixr6Z9zzVe6fT132pnp943vTYhgz9w5896n79yZuTPHJycnJ4fQoIAJCvgAJhNURBdKAcAEEExTADCZJiU6AkxgwDQFAJNpUqIjy2G6c+cOXb58merWrZur9t9//02VKlUiX19fjICDFLAcpnHjxtGhQ4do69at9Oeff9KQIUPIz8+Pzpw5Q2+//TYNHz7cQXI681D2HUmlX/+4rA6ucf1a1Ll1YJEHailMGzdupM8++4zYnRim6dOn09WrVyk2NpbS0tKofv36xC5VuXJlZ46CA46KQUrYfzrfkfTq0KRIoCyD6fTp0/Tqq69SVFQUxcTEKJhGjRpFPXv2pEGDBhHfK+XTXEpKCjVp0sQBsjvzEFZuO0ynUv9xJVd7KLAWDekZVOiALYHpxo0b1L17d4qPj6crV65QdHS0gunFF19UfwYMGKASCQgIoMTERGrUqBHt2bOH9u7dmy9BnmeFhIQ4c5S85Kg2/5xK5/7Mys2WzyK2wsTghIaGUseOHSkjI4NOnDhBERERFBgYSNWrV6cxY8ZQdnY21apVS8HmbiL+0UcfUWRkZJnKzg7rCc5ZVnnkPc1lZWWpKYmtpzneaWpqqoLgwIEDFBcXR6tXr6akpCSaP3++cin+N///ffv2uYUFMN2Tpqxg4gxcE3C+Km/Xquj5EsdZcprLS8ePP/6o5k0M0PXr16l379509OhR9feEhAQKDg4GTBreW5YwudKTcrAcpqJ0OnfuHNWrV4/8/f2LlRHO5BnO5NEwafwiqhDABJh0WRHjABNgEiHRDQBMgEmXFTEOMAEmERLdAMAEmHRZEeMAE2ASIdENAEyASZcVMQ4wASYREt0AwASYdFkR4wATYBIh0Q0ATIBJlxUxDjABJhES3QDABJh0WRHjABNgEiHRDQBMgEmXFTEOMAEmERLdAMAEmHRZEeMAE2ASIdENAEyASZcVMQ4wASYREt0AwASYdFkR4wATYBIh0Q0ATIBJlxUxDjABJhES3QDABJh0WRHjABNgEiHRDQBMgEmXFTEOMAEmERLdAMAEmHRZEeMAE2ASIdENAEyASZcVMQ4wASYREt0AwASYdFkR4wATYBIh0Q0ATIBJlxUxDjABJhES3QDABJh0WRHjABNgEiHRDQBMgEmXFTEOMAEmERLdAMAEmHIVuHXrlqo1V7CenG4lTMAEmJQCEyZMoB07dlCrVq1U5aYVK1aoeilGKmECJsCkqltyvTkuwsOta9eu9M4776gCPEYqYQImwJSrwOHDh2nx4sX05Zdf0rFjx1RNXiOVMAETYMpVIDk5mT799FN1ilu/fr2q12ukEiZ3FBYWpjtfR5wNChRXyNGSEmFcuHD//v307LPPqsObOHGiqhzOBZ5RCbNkIy7VeitZr8a2knKwBCaumPjwww/TL7/8ourKhYeHU7du3dTfUQnT2AC6oqWBLFmvxraScrAEJk7xgw8+oJkzZ1KNGjWoTZs2tHz5cqpYsSIqYRobv9xoaSBL2K2hzaQcLIOJs+RbAVxJnAs7522ohGloDFWwNJDGezS+hZSDpTAZTzf/Friaw9VcaRnK3R4wASbAZJoCgMk0KeFMgAkwmaYAYDJNSjgTYAJMpikAmEyTEs4EmACTaQoAJtOkhDMBJsBkmgKAyTQp4UyACTCZpgBgMk1KOBNgAkymKQCYTJMSzgSYAJNpCgAm06SEMwEmwGSaAoDJNCnhTIAJMJmmwL8AJv6yiZ+fnwWS5e8SzuRAmPhlytGjR9O3335LvXr1oiNHjhAP9GuvvWYpUIDJgTB17txZvbfFr3m/8cYb1K5dOzp58iRdunTJUocCTA6DiV+irFSpEm3cuJHmzJlD/GUT/o5Aw4YN6eDBg/Too49a5k6AyWEw8eG0aNGCWrZsSRs2bKCIiAj1AYopU6bQtWvXqEqVKoDJMgUcCNOyZcto2LBh6sjYjfjjXX379lWfyrGywZkcCBNfvWVmZqr5EX8SJykpSZ3erL6iA0wOgunmzZvq4xPt27en6Oho5Ubc+GqO3enMmTNq7mRVA0wOgmnGjBkUGRnplhX+dmXBL+maCRZgchBM3333HW3ZsoUWLFhAISEh6su5rhYcHEz9+/c3k51CfQEmB8HkOpRNmzapD3ZZeUorikrA5ECYdu/eTbNmzaJ9+/blG/OUlBSqWrWqZe4EmBwIU+vWrdU3vPnOd/ny5XOPcPv27ZgzWfarlL9j6attdqQh5SB+OS47O1vdApg8ebK6orOzwZkc6EwjRoxQ1QZWrVqV7/uUgYGB5OPjYxlfgMmBMAUEBNCFCxcKQZORkaFuYlrVAJMDYeIHvFzzpGDjeigVKlSwiiW1zKW4+1yW7ThPx9I8wY4ceB+ekIeUQ7FzJl5mUrt2bVWViedOBVvTpk3J19fXMj0Bk0OciWvFsetMmzZNLT3Bac6y3xmtjiVX0OqklEFSDm6diZ2InYHLVPD6JX50UrCNHz8+362CUuZaaHM4k0OcqeDI8sqBnTt3Kqi4zJfOMznehtc81axZM193qIRp/NdOcgXjPRrfQspBvM/Eu2SIeMVA3kk4n/p4Xbi7FhcXRwsXLiR+hsfLV9hleP6FSpj6g3jt+i3ac+gcXbySRdf/zqTuHVtSswf+o9+ByZGlhunu3bvUsWNHNWeKiopSjjRv3jz66aefVNkvBqRgc823XCsxY2JiKD09nfi+FCph6o/wsi3J9FtahtogKytLaf/f/h2odvVK+p2YGFlqmP766y+qU6cOzZ07V71MwG3btm3qLZUffvhBOU9RjcuEcQEeFqFHjx7KxXg7VMLUG90bt+7QjP/dexbqgqlfl+bU9qEAvU5Mjio1TDk5OarMFz+fmz17tvrtiI2Npa+//prS0tKIb2i6a7wik++eBwUFUXx8PL388suohKk5wDdv36WlO1IKRXcLCqAWgdbdKJbSK3UlzI8//pjGjBmTbz98yps6darbffNDYJ4fsaMNHDhQxXENOlTClIbr3s8dd5pzHdrZs2dp3bp1ajL9/PPPK6dy11xuxqe1Tp065Ybx61KohKkPk+Mm4HzofM+JJ9z8qhNf1vfr148ef/xxt49STp06Rc2aNcunGr/dwsWfe/furZaz8NryhIQEt3Mu3hj3me5JKM1X9BEteaSUg9atAb4a47d587YuXbrQrl27SvQ4BZUwjQ+oNJDGezS+hZSDCBO7El+VNW7cmNasWaPq7E6aNImWLFlCJ06cKORAxlN0vwWcyYHOxPMjXmXJRZu5bd68WZ2u+AEwX+lZ1QCTA2HiCTdPvocPH66Ojl3pgQceoKFDh6p/Dx482JJvDgAmB8LkbnGc61D5nlNYWJjpBgWYHAjTjh07qEGDBupOODd+0/fnn39Wb/ryMhV+Q8Xf3x8wma6Ag2DiS/zjx4/TyJEj1R9+JZwbL5obO3Ys/fbbb/Tggw9aJiGcyUEwFfd6eLVq1Yifv5UrVw4wWaaAg2By50x8iHyF16hRI0tlhDM5CCbXoZw/f14tcNNZEGcmXYDJITDdvn1bXf7zA91FixapFQIFG14PN/NXp/i+pLvPdmQi5eD2DjgvuQ0NDVUTb34mx+uaCjb+oIWVbgVncogz8WHwBLu4xo9ZrGyAyUEwSa9+441eK3+V8vctnWLsyETKodgHveHh4cTrubnxXe7mzZtT27Ztc/Pmxyr8WWerGpzJQc6UFxJ2Kf5cM68YsKsBJsBkGmuAyUEw8dUav+rE7bnnnlMvA/C6blfr06ePpZ9vBkwOggkTcM/4+ggjJU1+TTsdFNORlEOxE3CeYBf19RPX/vjVJStWC7j6hzM5yJnsoL24fQAmwGQag4AJMAEm0xQATKZJCWcCTIDJNAUAk2lSwpkAE2AyTQHAZJqUcCbABJhMUwAwmSYlnAkwASbTFABMpkkJZwJMgMk0BQCTaVLCmQATYDJNAcBkmpRwJsAEmExTADCZJiWcCTABJtMUAEymSQlnAkyAyTQFAJNpUsKZAFM+mFw15/K+g4dKmMZ/36R31oz3aHwLKQexQoHxXf6zBRc7TE5OVkV7+HOG9913nyp26C2VME/+fomSUy7Q+bSL1Kzx/dT1kQZUtVL5kspR6u2kgSz1DjQ6kHKwDKa1a9fS3r17VY06roLJME2fPt0rKmH+lXmdPlm3X8nrKhrYqF4Nejn0UQ3JrQmRBtKavebvVcrBMphcafDpzQXTqFGjvKIS5oFT6bRx74l8MPE/3hncmSqW97Nj3ArtQxpIO5KScrAVJv7wBf8ZMGCAOnaufJCYmKi+2rtnzx7lZAWbFZUPJOGPp2bSzsPphcKGPdmUKvj7Sps7+uelroRZGnXyOpO3VMLEaa7oEfcoZ/KmSpiYgBcGyiNg4iu7unXrquqXqIRZMp+XBrJkvRrbSsrB8jlTUemiEqaxQeRoaSCN92h8CymHMoFJ9zBwB/yeUtJA6mpamjgpB8AkqCsJWJrBMbKtJ+Qh5QCYAJM204BJW6qSXQ6XsnvtzaWB1O6oFIFSDnAmOJM2XoBJWyo4kyQVYJIUgjNpKwSYtKWCM0lSASZJITiTtkKASVsqOJMkFWCSFIIzaSsEmLSlgjNJUgEmSSE4k7ZCgElbKjiTJBVgkhSCM2krBJi0pYIzSVIBJkkhOJO2QoBJWyo4kyQVYJIUgjNpKwSYtKWCM0lSASZJITiTtkKASVsqOJMkFWCSFIIzaSsEmLSlgjNJUgEmSSE4k7ZCgElbKjiTJBVgkhSCM2krBJi0pYIzSVIBJkkhOJO2QoBJWyo4kyQVYJIUgjNpKwSYtKWCM0lSASZJITiTtkKASVsqOJMkFWCSFIIzaSsEmLSlgjNJUgEmSSE4k7ZCgElbKjiTJBVgkhSCM2krBJi0pYIzSVIBJkkhOJO2Qh4JEyphao9fbqA0kMZ7NL6FlIOtX9v1pkqYLqklAY0PScm28IQ8pBxshclbKmHmHW5JwJKhYXwrT8hDysFWmLylEiZgKtnFiK0wGa2E6e/vT7dv3zb+a4wtLFGAy7yNHDnSbd+2wuQtlTDzquUJlaU4H0/IQ8rBVpi8qRKmCyhJQEssoIhOPSEPKQdbYfLGSpiSgIDpngK2wuTarTdVwgRM92CRtCgTmHR/m7k8fdeuXXXDLYnzhBz4wDwhDykHj4bJEjrQqWUKACbLpP33dewVMN29e1eVsa9SpYrtI3Tnzh26desWVa5c2fZ9u3bIOVy7do1q1qxpew6XL1+mWrVqae3X42FavHgxzZkzhwIDA4lFXbFiBfHNM6tbdnY2HT58mOLj46lcuXI0e/Zsq3dZZP9xcXG0cOFCCg4OpszMTHW/qUWLFpbncuzYMXrppZeoadOmlJWVRUOHDqVBgwYVu1+Phonh4bvgV65coRo1atCbb75J9evXp3fffddyMdkJJk2aRPv376f27duXCUzsiBUqVFCuxK4cExND6enpNG/ePMuPnyFmrQcPHkzbtm2jcePG0cGDB70Xpl9//ZV69uxJKSkp6iBYxAMHDii3sKt98skndOrUqTKBiY/RdZphd+jRoweNHj1adAgztVmwYIFyxvDwcBo/frz3wpScnExhYWF0/PhxdRDLly+nnTt30qJFi8zUq9i+yhomTi4pKYlGjBhBQUFB6hepYsWKth0/n97Xrl2r5oxbt271Xph40s0HwRNwHx+fXHcYO3asbWKWNUzbt2+nIUOG0Ny5c2ngwIG2Hff69eupU6dOdP/996tpBk/CU1NT1b/dNY+eM3HSbdq0IbbaRx55hEJDQ2nKlCn01FNP2SZqWcKUk5Oj5oo8Z+GBtbNNmDBBzdcmT55MR48eVafY8+fPq4sRr4WJHw7zlQS3Pn360MqVK5VL2dUYJp6z8YTU7sZztWbNmuXb7bBhw2jJkiWWp8IARURE0MmTJ6l8+fI0bdo0NW8qrnm8M3HyPPnMyMhQVxdo9irwxx9/UEBAAPn6+oo79gqYxKNAgEcoAJg8YhickQRgcsY4esRRACaPGAZnJAGYnDGOHnEUgMkjhsEZSQAmg+PYr18/2rRpU+7DV4Oba4XzDcqbN2+KD1a1OrMxCDAZFNsF09WrV6lq1aoGt9YL51UKN27coCNHjuht4CFRgMngQBQH065du4jvmH///fcUEhKi/v7VV1/R0qVLaf78+eqRyIwZM2jNmjXqDy8x4UdFvEaL1yjxuq127dqpJS+AyeDAeGO4O5h4zVG1atWoS5cu9PTTT9PEiRPVs8SpU6cqiN577z2KjY1Vi83Y0XhtUPfu3dVzL35w/fnnn9PFixeJ7zg/8cQTgMkb4TCaszuYVq9eTa7X39lleIXo77//rlZHdujQgfz8/JQbtWrVSq0AeOGFF9Tq0ccee4x69eql3Gz37t20efNmev/99wGT0YHxxnh3MH344YfKfXi5SJMmTXIPLTIyUp3K+L+vv/66Ot2x+/DDY36Ni52MHcrVeJkJP8zFac4b6TCYswumqKgo9TSdG7sOz3P4tMYwvfLKKzRz5kz1/7/55hvil04bNmyoYvv27Uu8EuLSpUtUu3Ztat68OX3xxRe0atUqSkxMpA0bNtAzzzwDmAyOi1eGu2AqmDyvPeLF/rxU5cKFC2r+xAvMnnzySRXKa7ASEhLUqY5Pcdz4FkN0dLRaSclt1qxZ9NZbb6k5Fm4NeCUe5ibNb7WcPXuWGjRooJxJp3F8nTp1yvR1Kp08pRjcGpAUws+1FQBM2lIhUFIAMEkK4efaCgAmbakQKCkAmCSF8HNtBf4PeeHBOWN20+UAAAAASUVORK5CYII=" + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 12 + }, + { + "cell_type": "markdown", + "source": "## Database Administration\n\nSeveral line magics are defined to support database administration", + "metadata": {} + }, + { + "cell_type": "code", + "source": "%TABLE_EXISTS players", + "metadata": { + "trusted": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "The table players exists." + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 15 + }, + { + "cell_type": "code", + "source": "%TABLE_EXISTS npcs", + "metadata": { + "trusted": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "The table npcs doesn't exist." + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 16 + }, + { + "cell_type": "code", + "source": "%GET_INFO", + "metadata": { + "trusted": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Magic header string: SQLite format 3\n", + "Page size bytes: 4096\n", + "File format write version: 1\n", + "File format read version: 1\n", + "Reserved space bytes: 0\n", + "Max embedded payload fraction 64\n", + "Min embedded payload fraction: 32\n", + "Leaf payload fraction: 32\n", + "File change counter: 4\n", + "Database size pages: 2\n", + "First freelist trunk page: 0\n", + "Total freelist trunk pages: 0\n", + "Schema cookie: 1\n", + "Schema format number: 4\n", + "Default page cache size bytes: 0\n", + "Largest B tree page number: 0\n", + "Database text encoding: 1\n", + "User version: 0\n", + "Incremental vaccum mode: 0\n", + "Application ID: 0\n", + "Version valid for: 4\n", + "SQLite version: 3032003\n" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 17 + }, + { + "cell_type": "markdown", + "source": "## Connecting to a Different Database\n\nCreating a new database will connect the kernel to the new database instance.", + "metadata": {} + }, + { + "cell_type": "code", + "source": "%CREATE potato.db ", + "metadata": { + "trusted": true + }, + "outputs": [], + "execution_count": 18 + }, + { + "cell_type": "code", + "source": "CREATE TABLE potaters(production INTEGER)", + "metadata": { + "trusted": true + }, + "outputs": [], + "execution_count": 19 + }, + { + "cell_type": "code", + "source": "INSERT INTO potaters (production) VALUES (7)", + "metadata": { + "trusted": true + }, + "outputs": [], + "execution_count": 20 + }, + { + "cell_type": "code", + "source": "SELECT * FROM potaters", + "metadata": { + "trusted": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
production
7
" + ], + "text/plain": [ + "+------------+\n", + "| production |\n", + "+------------+\n", + "| 7 |\n", + "+------------+" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 21 + }, + { + "cell_type": "markdown", + "source": "The original database is lost:", + "metadata": {} + }, + { + "cell_type": "code", + "source": "SELECT Name, Level, Hitpoints FROM players;", + "metadata": { + "trusted": true + }, + "outputs": [ + { + "ename": "Error", + "evalue": "no such table: players", + "output_type": "error", + "traceback": [ + "Error: no such table: players" + ] + } + ], + "execution_count": 23 + } + ] +} \ No newline at end of file diff --git a/w2.ipynb b/.ipynb_checkpoints/w2-checkpoint.ipynb similarity index 100% rename from w2.ipynb rename to .ipynb_checkpoints/w2-checkpoint.ipynb diff --git a/.ipynb_checkpoints/w2_case_study-copy-checkpoint.dib b/.ipynb_checkpoints/w2_case_study-copy-checkpoint.dib new file mode 100644 index 0000000..7b55a15 --- /dev/null +++ b/.ipynb_checkpoints/w2_case_study-copy-checkpoint.dib @@ -0,0 +1,242 @@ +#!meta + +{"kernelInfo":{"defaultKernelName":"csharp","items":[{"name":"csharp","languageName":"csharp"},{"name":"fsharp","languageName":"F#","aliases":["f#","fs"]},{"name":"html","languageName":"HTML"},{"name":"http","languageName":"HTTP"},{"name":"javascript","languageName":"JavaScript","aliases":["js"]},{"name":"mermaid","languageName":"Mermaid"},{"name":"pwsh","languageName":"PowerShell","aliases":["powershell"]},{"name":"sql-myPostgresKernel","languageName":"PostgreSQL"},{"name":"value"}]}} + +#!markdown + +# Exercise 9.1: Preliminary Data Collection Using SQL Techniques +This exercise collects preliminary data. We will load the database, list scooter product details, extract product IDs, and store the results in a new table. + +#!markdown + +## Step 1: Load the sqlda database + +#!csharp + +#r "nuget:Microsoft.DotNet.Interactive.PostgreSql, *-*" + +#!javascript + +#!connect postgres --kernel-name myPostgresKernel --connection-string "Host=localhost;Port=54321;Username=postgres;Password=securepassword;Database=sqlda1" + +#!markdown + +## Step 2: List the model, base_msrp, and production_start_date for scooter products + +#!sql-myPostgresKernel + +SELECT model, base_msrp, production_start_date +FROM products +WHERE product_type = 'scooter'; + +#!markdown + +## Step 3: Extract the model name and product IDs for scooter products + +#!sql-myPostgresKernel + +SELECT model, product_id +FROM products +WHERE product_type = 'scooter'; + +#!markdown + +## Step 4: Insert the above results into a new table called product_names + +#!sql-myPostgresKernel + +Drop table if exists product_names; + +SELECT model, product_id +INTO product_names +FROM products +WHERE product_type = 'scooter'; + +#!markdown + +# Exercise 9.2: Extracting the Sales Information +In this exercise we join sales data with the product names and then isolate Bat Scooter sales. + +#!markdown + +## Step 1: Load the sqlda database + +#!markdown + +psql sqlda + +#!markdown + +## Step 2: List the available fields in the database + +#!markdown + +## Step 3: Create a new table (products_sales) by joining sales and product_names on product_id + +#!sql-myPostgresKernel + +Drop table if exists products_sales; + +SELECT model, customer_id, sales_transaction_date, sales_amount, channel, dealership_id +INTO products_sales +FROM sales +INNER JOIN product_names + ON sales.product_id = product_names.product_id; + +#!markdown + +## Step 4: Display the first five rows of products_sales + +#!sql-myPostgresKernel + +SELECT * +FROM products_sales +LIMIT 5; + +#!markdown + +## Step 5: Retrieve Bat Scooter sales ordered by sales_transaction_date + +#!sql-myPostgresKernel + +SELECT * +FROM products_sales +WHERE model = 'Bat' +ORDER BY sales_transaction_date; + +#!markdown + +## Step 6: Count the number of Bat Scooter sales records + +#!sql-myPostgresKernel + +SELECT COUNT(model) +FROM products_sales +WHERE model = 'Bat'; + +#!markdown + +## Step 7: Determine the last sale date for the Bat Scooter + +#!sql-myPostgresKernel + +SELECT MAX(sales_transaction_date) +FROM products_sales +WHERE model = 'Bat'; + +#!markdown + +## Step 8: Insert Bat Scooter sales records into a new table (bat_sales) ordered by date + +#!sql-myPostgresKernel + +Drop TABLE IF EXISTS bat_sales; + +SELECT * +INTO bat_sales +FROM products_sales +WHERE model = 'Bat' +ORDER BY sales_transaction_date; + +#!markdown + +## Step 9: Remove the time information in bat_sales (convert to date) + +#!sql-myPostgresKernel + +UPDATE bat_sales +SET sales_transaction_date = DATE(sales_transaction_date); + +#!markdown + +## Step 10: Display the first five records of bat_sales ordered by date + +#!sql-myPostgresKernel + +SELECT * +FROM bat_sales +ORDER BY sales_transaction_date +LIMIT 5; + +#!markdown + +## Step 11: Create bat_sales_daily table with daily sales count + +#!sql-myPostgresKernel + +SELECT sales_transaction_date, COUNT(sales_transaction_date) +INTO bat_sales_daily +FROM bat_sales +GROUP BY sales_transaction_date +ORDER BY sales_transaction_date; + +#!markdown + +# Activity 9.1: Quantifying the Sales Drop +Here we compute a cumulative sum of daily sales, apply a 7-day lag, and calculate the growth rate (volume). + +#!markdown + +## Step 1: Load the sqlda database + +psql sqlda + +#!markdown + +## Step 2: Compute the daily cumulative sum of sales and insert into bat_sales_growth + +#!sql-myPostgresKernel + +DROP TABLE IF EXISTS bat_sales_growth; + +SELECT *, sum(count) OVER (ORDER BY sales_transaction_date) AS cumulative_sum +INTO bat_sales_growth +FROM bat_sales_daily; + +#!markdown + +## Step 3: Compute a 7-day lag of the cumulative sum and insert into bat_sales_daily_delay + +#!sql-myPostgresKernel + +DROP TABLE IF EXISTS bat_sales_daily_delay; + +SELECT *, lag(cumulative_sum, 7) OVER (ORDER BY sales_transaction_date) AS lag_value +INTO bat_sales_daily_delay +FROM bat_sales_growth; + +#!markdown + +## Step 4: Inspect the first 15 rows of bat_sales_daily_delay + +#!sql-myPostgresKernel + +SELECT * +FROM bat_sales_daily_delay +LIMIT 15; + +#!markdown + +## Step 5: Compute sales growth as a percentage and insert into bat_sales_delay_vol + +#!sql-myPostgresKernel + +DROP TABLE IF EXISTS bat_sales_delay_vol; + +SELECT *, (cumulative_sum - lag_value) / lag_value AS volume +INTO bat_sales_delay_vol +FROM bat_sales_daily_delay; + +#!markdown + +## Step 6: Display the first 22 records of bat_sales_delay_vol + +#!sql-myPostgresKernel + +SELECT * +FROM bat_sales_delay_vol +LIMIT 22; + +#!markdown + +#### ---------------------------------------------------------------------------------- diff --git a/.ipynb_checkpoints/w2_study-checkpoint.md b/.ipynb_checkpoints/w2_study-checkpoint.md new file mode 100644 index 0000000..eafc30e --- /dev/null +++ b/.ipynb_checkpoints/w2_study-checkpoint.md @@ -0,0 +1,137 @@ +# Exercise 9.1: Preliminary Data Collection Using SQL Techniques + +This exercise collects preliminary data. We will load the database, list scooter product details, extract product IDs, and store the results in a new table. + +## Step 1: Load the sqlda database + +psql sqlda + +## Step 2: List the model, base_msrp, and production_start_date for scooter products + +SELECT model, base_msrp, production_start_date +FROM products +WHERE product_type = 'scooter'; + +## Step 3: Extract the model name and product IDs for scooter products + +SELECT model, product_id +FROM products +WHERE product_type = 'scooter'; + +## Step 4: Insert the above results into a new table called product_names + +SELECT model, product_id +INTO product_names +FROM products +WHERE product_type = 'scooter'; + +# Exercise 9.2: Extracting the Sales Information + +In this exercise we join sales data with the product names and then isolate Bat Scooter sales. + +## Step 1: Load the sqlda database + +psql sqlda + +## Step 2: List the available fields in the database + +## Step 3: Create a new table (products_sales) by joining sales and product_names on product_id + +SELECT model, customer_id, sales_transaction_date, sales_amount, channel, dealership_id +INTO products_sales +FROM sales +INNER JOIN product_names + ON sales.product_id = product_names.product_id; + +## Step 4: Display the first five rows of products_sales + +SELECT * +FROM products_sales +LIMIT 5; + +## Step 5: Retrieve Bat Scooter sales ordered by sales_transaction_date + +SELECT * +FROM products_sales +WHERE model = 'Bat' +ORDER BY sales_transaction_date; + +## Step 6: Count the number of Bat Scooter sales records + +SELECT COUNT(model) +FROM products_sales +WHERE model = 'Bat'; + +## Step 7: Determine the last sale date for the Bat Scooter + +SELECT MAX(sales_transaction_date) +FROM products_sales +WHERE model = 'Bat'; + +## Step 8: Insert Bat Scooter sales records into a new table (bat_sales) ordered by date + +SELECT * +INTO bat_sales +FROM products_sales +WHERE model = 'Bat' +ORDER BY sales_transaction_date; + +## Step 9: Remove the time information in bat_sales (convert to date) + +UPDATE bat_sales +SET sales_transaction_date = DATE(sales_transaction_date); + +## Step 10: Display the first five records of bat_sales ordered by date + +SELECT * +FROM bat_sales +ORDER BY sales_transaction_date +LIMIT 5; + +## Step 11: Create bat_sales_daily table with daily sales count + +SELECT sales_transaction_date, COUNT(sales_transaction_date) +INTO bat_sales_daily +FROM bat_sales +GROUP BY sales_transaction_date +ORDER BY sales_transaction_date; + +# Activity 9.1: Quantifying the Sales Drop + +Here we compute a cumulative sum of daily sales, apply a 7-day lag, and calculate the growth rate (volume). + +## Step 1: Load the sqlda database + +psql sqlda + +## Step 2: Compute the daily cumulative sum of sales and insert into bat_sales_growth + +SELECT *, sum(count) OVER (ORDER BY sales_transaction_date) AS cumulative_sum +INTO bat_sales_growth +FROM bat_sales_daily; + +## Step 3: Compute a 7-day lag of the cumulative sum and insert into bat_sales_daily_delay + +SELECT *, lag(cumulative_sum, 7) OVER (ORDER BY sales_transaction_date) AS lag_value +INTO bat_sales_daily_delay +FROM bat_sales_growth; + +## Step 4: Inspect the first 15 rows of bat_sales_daily_delay + +SELECT * +FROM bat_sales_daily_delay +LIMIT 15; + +## Step 5: Compute sales growth as a percentage and insert into bat_sales_delay_vol + +SELECT *, (cumulative_sum - lag_value) / lag_value AS volume +INTO bat_sales_delay_vol +FROM bat_sales_daily_delay; + +## Step 6: Display the first 22 records of bat_sales_delay_vol + +SELECT * +FROM bat_sales_delay_vol +LIMIT 22; + +#### ---------------------------------------------------------------------------------- diff --git a/2025-03-30_sqlda.docx b/DAT375_Week2/2025-03-30_sqlda.docx similarity index 100% rename from 2025-03-30_sqlda.docx rename to DAT375_Week2/2025-03-30_sqlda.docx diff --git "a/DAT375_Week2/Lesson Using SQL to Uncover the Truth \342\200\223 a Case Study en-uCertify.ipynb" "b/DAT375_Week2/Lesson Using SQL to Uncover the Truth \342\200\223 a Case Study en-uCertify.ipynb" new file mode 100644 index 0000000..3f46eff --- /dev/null +++ "b/DAT375_Week2/Lesson Using SQL to Uncover the Truth \342\200\223 a Case Study en-uCertify.ipynb" @@ -0,0 +1,1488 @@ + +In this case study, we will be following the scientific method to help solve our problem, which, at its heart, is about testing guesses (or hypotheses) using objectively collected data. We can decompose the scientific method into the following key steps: + +1. Define the question to answer what caused the drop-in sales of the Bat Scooter after approximately 2 weeks. +2. Complete background research to gather sufficient information to propose an initial hypothesis for the event or phenomenon. +3. Construct a hypothesis to explain the event or answer the question. +4. Define and execute an objective experiment to test the hypothesis. In an ideal scenario, all aspects of the experiment should be controlled and fixed, except for the phenomenon that is being tested under the hypothesis. +5. Analyze the data collected during the experiment. +6. Report the result of the analysis, which will hopefully explain why there was a drop in the sale of Bat Scooters. + +It is to be noted that in this lesson, we are completing a post-hoc analysis of the data, that is, the event has happened, and all available data has been collected. Post-hoc data analysis is particularly useful when events have been recorded that cannot be repeated or when certain external factors cannot be controlled. It is with this data that we are able to perform our analysis, and, as such, we will extract information to support or refute our hypothesis. We will, however, be unable to definitively confirm or reject the hypothesis without practical experimentation. The question that will be the subject of this lesson and that we need to answer is this: why did the sales of the ZoomZoom Bat Scooter drop by approximately 20% after about 2 weeks? + +So, let's start with the absolute basics. + +Exercise 9.1: Preliminary Data Collection Using SQL Techniques + +In this exercise, we will collect preliminary data using SQL techniques. We have been told that the pre-orders for the ZoomZoom Bat Scooter were good, but the orders suddenly dropped by 20%. So, when was production started on the scooter, and how much was it selling for? How does the Bat Scooter compare with other types of scooters in terms of price? The goal of this exercise is to answer these questions: + +1. Load the `sqlda` database from the accompanying source code is located [here](https://github.com/TrainingByPackt/SQL-for-Data-Analytics/tree/master/Datasets): + + ```javascript + psql sqlda + ``` + +2. List the model, `base_msrp` (MSRP: manufacturer's suggested retail price) and `production_start_date` fields within the product table for product types matching `scooter`: + + ```javascript + SELECT model, base_msrp, production_start_date FROM products WHERE product_type='scooter'; + ``` + + The following table shows the details of all the products for the `scooter` product type: + + | model | base\_msrp | production\_start\_date | + | --- | --- | --- | + | Lemon | 399.99 | 2010-03-03 00:00:00 | + | Lemon Limited Edition | 799.99 | 2011-01-03 00:00:00 | + | Lemon | 499.99 | 2013-05-01 00:00:00 | + | Blade | 699.99 | 2014-06-23 00:00:00 | + | Bat | 599.99 | 2016-10-10 00:00:00 | + | Bat Limited Edition | 699.99 | 2017-02-15 00:00:00 | + | Lemon Zester (7 rows) | 349.99 | 2019-02-04 00:00:00 | + + Figure 9.1: Basic list of scooters with a base manufacturer suggesting a retail price and production date + + Looking at the results from the search, we can see that we have two scooter products with **Bat** in the name; **Bat** and **Bat Limited Edition**. The **Bat** Scooter, which started production on October 10, 2016, with a suggested retail price of $599.99; and the **Bat Limited Edition** Scooter, which started production approximately 4 months later, on February 15, 2017, at a price of $699.99. + + Looking at the product information supplied, we can see that the Bat Scooter is somewhat unique from a price perspective, being the only scooter with a suggested retail price of $599.99. There are two others at $699.99 and one at $499.99. + + Similarly, if we consider the production start date in isolation, the original Bat Scooter is again unique in that it is the only scooter starting production in the last quarter or even half of the year (date format: _YYYY-MM-DD_). All other scooters start production in the first half of the year, with only the Blade scooter starting production in June. + + In order to use the sales information in conjunction with the product information available, we also need to get the product ID for each of the scooters. + +3. Extract the model name and product IDs for the scooters available within the database. We will need this information to reconcile the product information with the available sales information: + + ```javascript + SELECT model, product_id FROM products WHERE product_type='scooter'; + ``` + + The query yields the product IDs shown in the following table: + + | model | product\_id | + | --- | --- | + | Lemon | 1 | + | Lemon Limited Edition | 2 | + | Lemon | 3 | + | Blade | 5 | + | Bat | 7 | + | Bat Limited Edition | 8 | + | Lemon Zester (7 rows) | 12 | + + Figure 9.2: Scooter product ID codes + +4. Insert the results of this query into a new table called `product_names`: + + ```javascript + SELECT model, product_id INTO product_names FROM products WHERE product_type='scooter'; + ``` + + Inspect the contents of the `product_names` table shown in the following figure: + + | model | product\_id | + | --- | --- | + | Lemon | 1 | + | Lemon Limited Edition | 2 | + | Lemon | 3 | + | Blade | 5 | + | Bat | 7 | + | Bat Limited Edition | 8 | + | Lemon Zester (7 rows) | 12 | + + Figure 9.3: Contents of the new product\_names table + +As described in the output, we can see that the Bat Scooter lies between the price points of some of the other scooters and that it was also manufactured a lot later in the year compared to the others. + +By completing this very preliminary data collection step, we have the information required to collect sales data on the Bat Scooter as well as other scooter products for comparison. While this exercise involved using the simplest SQL commands, it has already yielded some useful information. + +This exercise has also demonstrated that even the simplest SQL commands can reveal useful information and that they should not be underestimated. In the next exercise, we will try to extract the sales information related to the reduction in sales of the Bat Scooter. + +Exercise 9.2: Extracting the Sales Information + +In this exercise, we will use a combination of simple `SELECT` statements, as well as aggregate and window functions, to examine the sales data. With the preliminary information at hand, we can use it to extract the Bat Scooter sales records and discover what is actually going on. We have a table, `product_names`, that contains both the model names and product IDs. We will need to combine this information with the sales records and extract only those for the Bat Scooter: + +1. Load the `sqlda` database: + + ```javascript + psql sqlda + ``` + +2. List the available fields in the `sqlda` database: + + ```javascript + \d + ``` + + The preceding query yields the following fields present in the database: + + | Column | Table "public.sales" Type | Collation | Nullable | Default | + | --- | --- | --- | --- | --- | + | customer\_id | bigint | | | | + | product\_id | bigint | | | | + | sales\_transaction\_date | timestamp without time zone | | | | + | sales\_amount | double precision | | | | + | channel | text | | | | + | dealership\_id | double precision | | | | + + Figure 9.4: Structure of the sales table + + We can see that we have references to customer and product IDs, as well as the transaction date, sales information, the sales channel, and the dealership ID. +3. Use an inner join on the `product_id` columns of both the `product_names` table and the sales table. From the result of the inner join, select the model, `customer_id`, `sales_transaction_date`, `sales_amount`, channel, and `dealership_id`, and store the values in a separate table called `product_sales`: + + ```javascript + SELECT model, customer_id, sales_transaction_date, sales_amount, channel, dealership_id INTO products_sales FROM sales INNER JOIN product_names ON sales.product_id=product_names.product_id; + ``` + + The output of the preceding code can be seen in the next step. + + Note + + Throughout this lesson, we will be storing the results of queries and calculations in separate tables as this will allow you to look at the results of the individual steps in the analysis at any time. In a commercial/production setting, we would typically only store the end result in a separate table, depending upon the context of the problem being solved. + +4. Look at the first five rows of this new table by using the following query: + + ```javascript + SELECT * FROM products_sales LIMIT 5; + ``` + + The following table lists the top five customers who made a purchase. It shows the sale amount and the transaction details, such as the date and time: + + | model | customer\_id | sales\_transaction\_date | sales\_amount | channel | dealership\_id | + | --- | --- | --- | --- | --- | --- | + | Lemon | 41604 | 2012-03-30 22:45:29 | 399.99 | internet | | + | Lemon | 41531 | 2010-09-07 22:53:16 | 399.99 | internet | | + | Lemon | 41443 | 2011-05-24 02:19:11 | 399.99 | internet | | + | Lemon | 41291 | 2010-08-08 14:12:52 | 319.992 | internet | | + | Lemon | 41084 | 2012-01-09 03:34:52 | 319.992 | internet | | + | (5 rows) | + + Figure 9.5: The combined product sales table + +5. Select all the information from the `product_sales` table that is available for the Bat Scooter and order the sales information by `sales_transaction_date` in ascending order. By selecting the data in this way, we can look at the first few days of the sales records in detail: + + ```javascript + SELECT * FROM products_sales WHERE model='Bat' ORDER BY sales_transaction_date; + ``` + + The preceding query generates the following output: + + | model | customer\_id | sales\_transaction\_date | sales\_amount | channel | dealership\_id | + | --- | --- | --- | --- | --- | --- | + | Bat | 4319 | 2016-10-10 00:41:57 | 599.99 | Internet | | + | Bat | 40250 | 2016-10-10 02:47:28 | 599.99 | dealership | 4 | + | Bat | 35497 | 2016-10-10 04:21:08 | 599.99 | dealership | 2 | + | Bat | 4553 | 2016-10-10 07:42:59 | 599.99 | dealership | 11 | + | Bat | 11678 | 2016-10-10 09:21:08 | 599.99 | internet | | + | Bat | 45868 | 2016-10-10 10:29:29 | 599.99 | internet | + | Bat | 24125 | 2016-10-10 18:57:25 | 599.99 | dealership | 1 | + | Bat | 31307 | 2016-10-10 21:22:38 | 599.99 | internet | | + | Bat | 42213 | 2016-10-10 21:27:36 | 599.99 | internet | | + | Bat | 47790 | 2016-10-11 01:28:58 | 599.99 | dealership | 20 | + | Bat | 6342 | 2016-10-11 03:04:57 | 599.99 | internet | | + | Bat | 45880 | 2016-10-11 04:09:19 | 599.99 | dealership | 7 | + | Bat | 43477 | 2016-10-11 05:24:50 | 599.99 | internet | | + | Bat | 6322 | 2016-10-11 08:48:07 | 599.99 | internet | + | Bat | 46653 | 2016-10-11 15:47:01 | 599.99 | dealership | 6 | + | Bat | 9045 | 2016-10-12 00:15:20 | 599.99 | dealership | 19 | + | Bat | 23679 | 2016-10-12 00:17:53 | 539.991 | internet | | + | Bat | 49856 | 2016-10-12 00:26:15 | 599.99 | dealership | 10 | + | Bat | 45256 | 2016-10-12 02:08:01 | 539.991 | dealership | 7 | + | Bat | 48809 | 2016-10-12 05:08:43 | 599.99 | internet | | + | Bat | 42625 | 2016-10-12 06:17:55 | 599.99 | internet | | + | Bat | 39653 | 2016-10-12 06:28:25 | 599.99 | dealership | 7 | + | Bat | 49226 | 2016-10-12 10:26:13 | 539.991 | internet | | + | Bat | 18602 | 2016-10-12 15:09:53 | 599.99 | internet | | + + Figure 9.6: Ordered sales records + +6. Count the number of records available by using the following query: + + ```javascript + SELECT COUNT(model) FROM products_sales WHERE model='Bat'; + ``` + + The model count for the `'Bat'` model is as shown here: + + ```javascript + count --------- 7328 (1 row) + ``` + + **Figure 9.7: Count of the number of sales records** + + So, we have **7328** sales, beginning October 10, 2016. Check the date of the final sales record by performing the next step. +7. Determine the last sale date for the Bat Scooter by selecting the maximum (using the `MAX` function) for `sales_transaction_date`: + + ```javascript + SELECT MAX(sales_transaction_date) FROM products_sales WHERE model='Bat'; + ``` + + The last sale date is shown here: + + ```javascript + Max ------------------ 2019-05-31 22:15:30 (1 row) + ``` + + Figure 9.8: Last sale date + + The last sale in the database occurred on May 31, 2019. +8. Collect the daily sales volume for the Bat Scooter and place it in a new table called `bat_sales` to confirm the information provided by the sales team stating that sales dropped by 20% after the first 2 weeks: + + ```javascript + SELECT * INTO bat_sales FROM products_sales WHERE model='Bat' ORDER BY sales_transaction_date; + ``` + +9. Remove the time information to allow tracking of sales by date, since, at this stage, we are not interested in the time at which each sale occurred. To do so, run the following query: + + ```javascript + UPDATE bat_sales SET sales_transaction_date=DATE(sales_transaction_date); + ``` + +10. Display the first five records of `bat_sales` ordered by `sales_transaction_date`: + + ```javascript + SELECT * FROM bat_sales ORDER BY sales_transaction_date LIMIT 5; + ``` + + The following is the output of the preceding code: + + | model | customer\_id | sales\_transaction\_date | sales\_amount | channel | dealership\_id | + | --- | --- | --- | --- | --- | --- | + | Bat | 4553 | 2016-10-10 00:00:00 | 599.99 | dealership | 11 | + | Bat | 35497 | 2016-10-10 00:00:00 | 599.99 | dealership | 2 | + | Bat | 40250 | 2016-10-10 00:00:00 | 599.99 | dealership | 4 | + | Bat | 4319 | 2016-10-10 00:00:00 | 599.99 | internet | | + | Bat | 11678 | 2016-10-10 00:00:00 | 599.99 | internet | | + | (5 rows) | + + Figure 9.9: First five records of Bat Scooter sales + + Create a new table (`bat_sales_daily`) containing the sales transaction dates and a daily count of total sales: + + ```javascript + SELECT sales_transaction_date, COUNT(sales_transaction_date) INTO bat_sales_daily FROM bat_sales GROUP BY sales_transaction_date ORDER BY sales_transaction_date; + ``` + +11. Examine the first `22` records (a little over 3 weeks), as sales were reported to have dropped after approximately the first 2 weeks: + + ```javascript + SELECT * FROM bat_sales_daily LIMIT 22; + ``` + + This will display the following output: + + | sales\_transaction\_date | count | + | --- | --- | + | 2016-10-10 00:00:00 | 9 | + | 2016-10-11 00:00:00 | 6 | + | 2016-10-12 00:00:00 | 10 | + | 2016-10-13 00:00:00 | 10 | + | 2016-10-14 00:00:00 | 5 | + | 2016-10-15 00:00:00 | 10 | + | 2016-10-16 00:00:00 | 14 | + | 2016-10-17 00:00:00 | 9 | + | 2016-10-18 00:00:00 | 11 | + | 2016-10-19 00:00:00 | 12 | + | 2016-10-20 00:00:00 | 10 | + | 2016-10-21 00:00:00 | 6 | + | 2016-10-22 00:00:00 | 2 | + | 2016-10-23 00:00:00 | 5 | + | 2016-10-24 00:00:00 | 6 | + | 2016-10-25 00:00:00 | 9 | + | 2016-10-26 00:00:00 | 2 | + | 2016-10-27 00:00:00 | 4 | + | 2016-10-28 00:00:00 | 7 | + | 2016-10-29 00:00:00 | 5 | + | 2016-10-30 00:00:00 | 5 | + | 2016-10-31 00:00:00 | 3 | + | (22 rows) | + + Figure 9.10: First 3 weeks of sales + +We can see a drop-in sales after October 20, as there are 7 days in the first 11 rows that record double-digit sales, and none over the next 11 days. + +At this stage, we can confirm that there has been a drop off in sales, although we are yet to quantify precisely the extent of the reduction or the reason for the drop off in sales. + +Activity 9.1: Quantifying the Sales Drop + +In this activity, we will use our knowledge of the windowing methods that we learned in _Lesson 5_, _Window Functions for Data Analysis_. In the previous exercise, we identified the occurrence of the sales drop as being approximately 10 days after launch. Here, we will try to quantify the drop off in sales for the Bat Scooter. + +Perform the following steps to complete the activity: + +1. Load the `sqlda` database from the accompanying source code located at this [link](https://github.com/TrainingByPackt/SQL-for-Data-Analytics/tree/master/Datasets). +2. Using the `OVER` and `ORDER BY` statements, compute the daily cumulative sum of sales. This provides us with a discrete count of sales over time on a daily basis. Insert the results into a new table called `bat_sales_growth`. +3. Compute a 7-day `lag` of the `sum` column, and then insert all the columns of `bat_sales_daily` and the new `lag` column into a new table, `bat_sales_daily_delay`. This `lag` column indicates what sales were like 1 week prior to the given record, allowing us to compare sales with the previous week. +4. Inspect the first 15 rows of `bat_sales_growth`. +5. Compute the sales growth as a percentage, comparing the current sales volume to that of 1 week prior. Insert the resulting table into a new table called `bat_sales_delay_vol`. +6. Compare the first 22 values of the `bat_sales_delay_vol` table to ascertain a sales drop. + +**Solution** + +1. Load the sqlda database: + + ```javascript + psql sqlda + ``` + +2. Compute the daily cumulative sum of sales using the OVER and ORDER BY statements. Insert the results into a new table called bat\_sales\_growth: + + ```javascript + SELECT *, sum(count) OVER (ORDER BY sales_transaction_date) INTO bat_sales_growth FROM bat_sales_daily; + ``` + + The following table shows the daily cumulative sum of sales: + + | sales\_transaction\_date | count | sum | + | --- | --- | --- | + | 2016-10-10 00:00:00 | 9 | 9 | + | 2016-10-11 00:00:00 | 6 | 15 | + | 2016-10-12 00:00:00 | 10 | 25 | + | 2016-10-13 00:00:00 | 10 | 35 | + | 2016-10-14 00:00:00 | 5 | 40 | + | 2016-10-15 00:00:00 | 10 | 50 | + | 2016-10-16 00:00:00 | 14 | 64 | + | 2016-10-17 00:00:00 | 9 | 73 | + | 2016-10-18 00:00:00 | 11 | 84 | + | 2016-10-19 00:00:00 | 12 | 96 | + | 2016-10-20 00:00:00 | 10 | 106 | + | 2016-10-21 00:00:00 | 6 | 112 | + | 2016-10-22 00:00:00 | 2 | 114 | + | 2016-10-23 00:00:00 | 5 | 119 | + | 2016-10-24 00:00:00 | 6 | 125 | + | 2016-10-25 00:00:00 | 9 | 134 | + | 2016-10-26 00:00:00 | 2 | 136 | + | 2016-10-27 00:00:00 | 4 | 140 | + | 2016-10-28 00:00:00 | 7 | 147 | + | 2016-10-29 00:00:00 | 5 | 152 | + | 2016-10-30 00:00:00 | 5 | 157 | + | 2016-10-31 00:00:00 | 3 | 160 | + + Figure A: Daily sales count + +3. Compute a 7-day lag function of the sum column and insert all the columns of bat\_sales\_daily and the new lag column into a new table, bat\_sales\_daily\_delay. This lag column indicates what the sales were like 1 week before the given record: + + ```javascript + SELECT *, lag(sum, 7) OVER (ORDER BY sales_transaction_date) INTO bat_sales_daily_delay FROM bat_sales_growth; + ``` + +4. Inspect the first 15 rows of bat\_sales\_growth: + + ```javascript + SELECT * FROM bat_sales_daily_delay LIMIT 15; + ``` + + The following is the output of the preceding code: + + | sales\_transaction\_date | count | sum | lag | + | --- | --- | --- | --- | + | 2016-10-10 00:00:00 | 9 | 9 | | + | 2016-10-11 00:00:00 | 6 | 15 | | + | 2016-10-12 00:00:00 | 10 | 25 | | + | 2016-10-13 00:00:00 | 10 | 35 | | + | 2016-10-14 00:00:00 | 5 | 40 | | + | 2016-10-15 00:00:00 | 10 | 50 | | + | 2016-10-16 00:00:00 | 14 | 64 | | + | 2016-10-17 00:00:00 | 9 | 73 | 9 | + | 2016-10-18 00:00:00 | 11 | 84 | 15 | + | 2016-10-19 00:00:00 | 12 | 96 | 25 | + | 2016-10-20 00:00:00 | 10 | 106 | 35 | + | 2016-10-21 00:00:00 | 6 | 112 | 40 | + | 2016-10-22 00:00:00 | 2 | 114 | 50 | + | 2016-10-23 00:00:00 | 5 | 119 | 64 | + | 2016-10-24 00:00:00 | 6 | 125 | 73 | + | (15 rows) | + + Figure B: Daily sales delay with lag + +5. Compute the sales growth as a percentage, comparing the current sales volume to that of 1 week prior. Insert the resulting table into a new table called bat\_sales\_delay\_vol: + + ```javascript + SELECT *, (sum-lag)/lag AS volume INTO bat_sales_delay_vol FROM bat_sales_daily_delay ; + ``` + + Note + + The percentage sales volume can be calculated via the following equation: + + ```javascript + (new_volume – old_volume) / old_volume + ``` + +6. Compare the first 22 values of the bat\_sales\_delay\_vol table: + + ```javascript + SELECT * FROM bat_sales_daily_delay_vol LIMIT 22; + ``` + + The delay volume for the first 22 entries can be seen in the following: + + | sales\_transaction\_date | count | sum | lag | volume | + | --- | --- | --- | --- | --- | + | 2016-10-10 00:00:00 | 9 | 9 | | | + | 2016-10-11 00:00:00 | 6 | 15 | | | + | 2016-10-12 00:00:00 | 10 | 25 | | | + | 2016-10-13 00:00:00 | 10 | 35 | | | + | 2016-10-14 00:00:00 | 5 | 40 | | | + | 2016-10-15 00:00:00 | 10 | 50 | | | + | 2016-10-16 00:00:00 | 14 | 64 | | | + | 2016-10-17 00:00:00 | 9 | 73 | 9 | 7.1111111111111111 | + | 2016-10-18 00:00:00 | 11 | 84 | 15 | 4.6000000000000000 | + | 2016-10-19 00:00:00 | 12 | 96 | 25 | 2.8400000000000000 | + | 2016-10-20 00:00:00 | 10 | 106 | 35 | 2.0285714285714286 | + | 2016-10-21 00:00:00 | 6 | 112 | 40 | 1.8000000000000000 | + | 2016-10-22 00:00:00 | 2 | 114 | 50 | 1.2800000000000000 | + | 2016-10-23 00:00:00 | 5 | 119 | 64 | 0.85937500000000000000 | + | 2016-10-24 00:00:00 | 6 | 125 | 73 | 0.71232876712328767123 | + | 2016-10-25 00:00:00 | 9 | 134 | 84 | 0.59523809523809523810 | + | 2016-10-26 00:00:00 | 2 | 136 | 96 | 0.41666666666666666667 | + | 2016-10-27 00:00:00 | 4 | 140 | 106 | 0.32075471698113207547 | + | 2016-10-28 00:00:00 | 7 | 147 | 112 | 0.31250000000000000000 | + | 2016-10-29 00:00:00 | 5 | 152 | 114 | 0.33333333333333333333 | + | 2016-10-30 00:00:00 | 5 | 157 | 119 | 0.31932773109243697479 | + | 2016-10-31 00:00:00 | 3 | 160 | 125 | 0.28000000000000000000 | + | (22 rows) | | | | | + + Figure C: Relative sales volume of the scooter over 3 weeks + +Looking at the output table, we can see four sets of information: the daily sales count, the cumulative sum of the daily sales count, the cumulative sum offset by 1 week (the lag), and the relative daily sales volume. + +**Expected Output:** + +| sales\_transaction\_date | count | sum | lag | volume | +| --- | --- | --- | --- | --- | +| 2016-10-10 00:00:00 | 9 | 9 | | | +| 2016-10-11 00:00:00 | 6 | 15 | | | +| 2016-10-12 00:00:00 | 10 | 25 | | | +| 2016-10-13 00:00:00 | 10 | 35 | | | +| 2016-10-14 00:00:00 | 5 | 40 | | | +| 2016-10-15 00:00:00 | 10 | 50 | | | +| 2016-10-16 00:00:00 | 14 | 64 | | | +| 2016-10-17 00:00:00 | 9 | 73 | 9 | 7.1111111111111111 | +| 2016-10-18 00:00:00 | 11 | 84 | 15 | 4.6000000000000000 | +| 2016-10-19 00:00:00 | 12 | 96 | 25 | 2.8400000000000000 | +| 2016-10-20 00:00:00 | 10 | 106 | 35 | 2.0285714285714286 | +| 2016-10-21 00:00:00 | 6 | 112 | 40 | 1.8000000000000000 | +| 2016-10-22 00:00:00 | 2 | 114 | 50 | 1.2800000000000000 | +| 2016-10-23 00:00:00 | 5 | 119 | 64 | 0.85937500000000000000 | +| 2016-10-24 00:00:00 | 6 | 125 | 73 | 0.71232876712328767123 | +| 2016-10-25 00:00:00 | 9 | 134 | 84 | 0.59523809523809523810 | +| 2016-10-26 00:00:00 | 2 | 136 | 96 | 0.41666666666666666667 | +| 2016-10-27 00:00:00 | 4 | 140 | 106 | 0.32075471698113207547 | +| 2016-10-28 00:00:00 | 7 | 147 | 112 | 0.31250000000000000000 | +| 2016-10-29 00:00:00 | 5 | 152 | 114 | 0.33333333333333333333 | +| 2016-10-30 00:00:00 | 5 | 157 | 119 | 0.31932773109243697479 | +| 2016-10-31 00:00:00 | 3 | 160 | 125 | 0.28000000000000000000 | +| (22 rows) | + +Figure 9.11: Relative sales volume of the Bat Scooter over 3 weeks + +While the count and cumulative `sum` columns are reasonably straightforward, why do we need the `lag` and `volume` columns? This is because we are looking for drops in sales growth over the first couple of weeks, hence, we compare the daily sum of sales to the same values 7 days earlier (the lag). By subtracting the sum and lag values and dividing by the lag, we obtain the volume value and can determine sales growth compared to the previous week. + +Notice that the sales volume on October 17 is 700% above that of the launch date of October 10. By October 22, the volume is over double that of the week prior. As time passes, this relative difference begins to decrease dramatically. By the end of October, the volume is 28% higher than the week prior. At this stage, we have observed and confirmed the presence of a reduction in sales growth after the first 2 weeks. The next stage is to attempt to explain the causes of the reduction. + +Exercise 9.3: Launch Timing Analysis + +In this exercise, we will try to identify the causes of a sales drop. Now that we have confirmed the presence of the sales growth drop, we will try to explain the cause of the event. We will test the hypothesis that the timing of the scooter launch attributed to the reduction in sales. Remember, in _Exercise 9.1, Preliminary Data Collection Using SQL Techniques_, that the ZoomZoom Bat Scooter launched on October 10, 2016. Observe the following steps to complete the exercise: + +1. Load the `sqlda` database: + + ```javascript + psql sqlda + ``` + +2. Examine the other products in the database. In order to determine whether the launch date attributed to the sales drop, we need to compare the ZoomZoom Bat Scooter to other scooter products according to the launch date. Execute the following query to check the launch dates: + + ```javascript + SELECT * FROM products; + ``` + + The following figure shows the launch dates for all the products: + + | product\_id | model | year | product\_type | base\_msrp | production\_start\_date | production\_end\_date | + | --- | --- | --- | --- | --- | --- | --- | + | 1 | Lemon | 2010 | scooter | 399.99 | 2010-03-03 00:00:00 | 2012-06-08 00:00:00 | + | 2 | Lemon Limited Edition | 2011 | scooter | 799.99 | 2011-01-03 00:00:00 | 2011-03-30 00:00:00 | + | 3 | Lemon | 2013 | scooter | 499.99 | 2013-05-01 00:00:00 | 2018-12-28 00:00:00 | + | 4 | Model Chi | 2014 | automobile | 115,000.00 | 2014-06-23 00:00:00 | 2018-12-28 00:00:00 | + | 5 | Blade | 2014 | scooter | 699.99 | 2014-06-23 00:00:00 | 2015-01-27 00:00:00 | + | 6 | Model Sigma | 2015 | automobile | 65,500.00 | 2015-04-15 00:00:00 | 2018-10-01 00:00:00 | + | 7 | Bat | 2016 | scooter | 599.99 | 2016-10-10 00:00:00 | | + | 8 | Bat Limited Edition | 2017 | scooter | 699.99 | 2017-02-15 00:00:00 | | + | 9 | Model Epsilon | 2017 | automobile | 35,000.00 | 2017-02-15 00:00:00 | | + | 10 | Model Gamma | 2017 | automobile | 85,750.00 | 2017-02-15 00:00:00 | | + | 11 | Model Chi | 2019 | automobile | 95,000.00 | 2019-02-04 00:00:00 | | + | 12 | Lemon Zester | 2019 | scooter | 349.99 | 2019-02-04 00:00:00 | | + | (12 rows) | + + Figure 9.12: Products with launch dates + + All the other products launched before July, compared to the Bat Scooter, which launched in October. +3. List all scooters from the `products` table, as we are only interested in comparing scooters: + + ```javascript + SELECT * FROM products WHERE product_type='scooter'; + ``` + + The following table shows all the information for products with the product type of `scooter`: + + | product\_id | model | year | product\_type | base\_msrp | production\_start\_date | production\_end\_date | + | --- | --- | --- | --- | --- | --- | --- | + | 1 | Lemon | 2010 | scooter | 399.99 | 2010-03-03 00:00:00 | 2012-06-08 00:00:00 | + | 2 | Lemon Limited Edition | 2011 | scooter | 799.99 | 2011-01-03 00:00:00 | 2011-03-30 00:00:00 | + | 3 | Lemon | 2013 | scooter | 499.99 | 2013-05-01 00:00:00 | 2018-12-28 00:00:00 | + | 5 | Blade | 2014 | scooter | 699.99 | 2014-06-23 00:00:00 | 2015-01-27 00:00:00 | + | 7 | Bat | 2016 | scooter | 599.99 | 2016-10-10 00:00:00 | | + | 8 | Bat Limited Edition | 2017 | scooter | 699.99 | 2017-02-15 00:00:00 | | + | 12 | Lemon Zester | 2019 | scooter | 349.99 | 2019-02-04 00:00:00 | | + | (7 rows) | + + Figure 9.13: Scooter product launch dates + + To test the hypothesis that the time of year had an impact on sales performance, we require a scooter model to use as the control or reference group. In an ideal world, we could launch the ZoomZoom Bat Scooter in a different location or region, for example, but just at a different time, and then compare the two. However, we cannot do this here. Instead, we will choose a similar scooter launched at a different time. There are several different options in the product database, each with its own similarities and differences to the experimental group (ZoomZoom Bat Scooter). In our opinion, the Bat Limited Edition Scooter is suitable for comparison (the control group). It is slightly more expensive, but it was launched only 4 months after the Bat Scooter. Looking at its name, the Bat Limited Edition Scooter seems to share most of the same features, with a number of extras given that it's a "limited edition." +4. Select the first five rows of the `sales` database: + + ```javascript + SELECT * FROM sales LIMIT 5; + ``` + + The sales information for the first five customers is as follows: + + | customer\_id | product\_id | sales\_transaction\_date | sales\_amount | channel | dealership\_id | + | --- | --- | --- | --- | --- | --- | + | 1 | 7 | 2017-07-19 08:38:41 | 479.992 | internet | | + | 22 | 7 | 2017-08-14 09:59:02 | 599.99 | dealership | 20 | + | 145 | 7 | 2019-01-20 10:40:11 | 479.992 | internet | | + | 289 | 7 | 2017-05-09 14:20:04 | 539.991 | dealership | 7 | + | 331 | 7 | 2019-05-21 20:03:21 | 539.991 | dealership | 4 | + | (5 rows) | + + Figure 9.14: First five rows of sales data + +5. Select the `model` and `sales_transaction_date` columns from both the products and sales tables for the Bat Limited Edition Scooter. Store the results in a table, `bat_ltd_sales`, ordered by the `sales_transaction_date` column, from the earliest date to the latest: + + ```javascript + SELECT products.model, sales.sales_transaction_date INTO bat_ltd_sales FROM sales INNER JOIN products ON sales.product_id=products.product_id WHERE sales.product_id=8 ORDER BY sales.sales_transaction_date; + ``` + +6. Select the first five lines of `bat_ltd_sales`, using the following query: + + ```javascript + SELECT * FROM bat_ltd_sales LIMIT 5; + ``` + + The following table shows the transaction details for the first five entries of `Bat Limited Edition`: + + | model | sales\_transaction\_date | + | --- | --- | + | Bat Limited Edition | 2017-02-15 01:49:02 | + | Bat Limited Edition | 2017-02-15 89:42:37 | + | Bat Limited Edition | 2017-02-15 10:48:31 | + | Bat Limited Edition | 2017-02-15 12:22:41 | + | Bat Limited Edition | 2017-02-15 13:51:34 | + | (5 rows) | + + Figure 9.15: First five sales of the Bat Limited Edition Scooter + +7. Calculate the total number of sales for `Bat Limited Edition`. We can check this by using the `COUNT` function: + + ```javascript + SELECT COUNT(model) FROM bat_ltd_sales; + ``` + + The total sales count can be seen in the following figure: + + ```javascript + count ----------- 5803 (1 row) + ``` + + Figure 9.16: Count of Bat Limited Edition sales + + This is compared to the original Bat Scooter, which sold 7,328 items. +8. Check the transaction details of the last Bat Limited Edition sale. We can check this by using the `MAX` function: + + ```javascript + SELECT MAX(sales_transaction_date) FROM bat_ltd_sales; + ``` + + The transaction details of the last `Bat Limited Edition` product are as follows: + + ```javascript + max ------------------- 2019-05-31 15:08:03 (1 row) + ``` + + Figure 9.17: Last date (MAX) of the Bat Limited Edition sale + +9. Adjust the table to cast the transaction date column as a date, discarding the time information. As with the original Bat Scooter, we are only interested in the date of the sale, not the date and time of the sale. Write the following query: + + ```javascript + ALTER TABLE bat_ltd_sales ALTER COLUMN sales_transaction_date TYPE date; + ``` + +10. Again, select the first five records of `bat_ltd_sales`: + + ```javascript + SELECT * FROM bat_ltd_sales LIMIT 5; + ``` + + The following table shows the first five records of `bat_ltd_sales`: + + | model | sales\_transaction\_date | + | --- | --- | + | Bat Limited Edition | 2017-02-15 | + | Bat Limited Edition | 2017-02-15 | + | Bat Limited Edition | 2017-02-15 | + | Bat Limited Edition | 2017-02-15 | + | Bat Limited Edition | 2017-02-15 | + | (5 rows) | + + Figure 9.18: Select the first five Bat Limited Edition sales by date + +11. In a similar manner to the standard Bat Scooter, create a count of sales on a daily basis. Insert the results into the `bat_ltd_sales_count` table by using the following query: + + ```javascript + SELECT sales_transaction_date, count(sales_transaction_date) INTO bat_ltd_sales_count FROM bat_ltd_sales GROUP BY sales_transaction_date ORDER BY sales_transaction_date; + ``` + +12. List the sales count of all the `Bat Limited` products using the following query: + + ```javascript + SELECT * FROM bat_ltd_sales_count; + ``` + + + The sales count is shown in the following figure: + + | sales\_transaction\_date | count | + | --- | --- | + | 2017-02-15 | 6 | + | 2017-02-16 | 2 | + | 2017-02-17 | 1 | + | 2017-02-18 | 4 | + | 2017-02-19 | 5 | + | 2017-02-20 | 6 | + | 2017-02-21 | 5 | + | 2017-02-22 | 4 | + | 2017-02-23 | 6 | + | 2017-02-24 | 2 | + | 2017-02-25 | 2 | + | 2017-02-26 | 2 | + | 2017-02-27 | 4 | + | 2017-02-28 | 4 | + | 2017-03-01 | 5 | + | 2017-03-02 | 1 | + + Figure 9.19: Bat Limited Edition daily sales + +13. Compute the cumulative sum of the daily sales figures and insert the resulting table into `bat_ltd_sales_growth`: + + ```javascript + SELECT *, sum(count) OVER (ORDER BY sales_transaction_date) INTO bat_ltd_sales_growth FROM bat_ltd_sales_count; + ``` + +14. Select the first 22 days of sales records from `bat_ltd_sales_growth`: + + ```javascript + SELECT * FROM bat_ltd_sales_growth LIMIT 22; + ``` + + + The following table displays the first 22 records of sales growth: + + | sales\_transaction\_date | count | sum | + | --- | --- | --- | + | 2017-02-15 | 6 | 6 | + | 2017-02-16 | 2 | 8 | + | 2017-02-17 | 1 | 9 | + | 2017-02-18 | 4 | 13 | + | 2017-02-19 | 5 | 18 | + | 2017-02-20 | 6 | 24 | + | 2017-02-21 | 5 | 29 | + | 2017-02-22 | 4 | 33 | + | 2017-02-23 | 6 | 39 | + | 2017-02-24 | 2 | 41 | + | 2017-02-25 | 2 | 43 | + | 2017-02-26 | 2 | 45 | + | 2017-02-27 | 4 | 49 | + | 2017-02-28 | 4 | 53 | + | 2017-03-01 | 5 | 58 | + | 2017-03-02 | 1 | 59 | + | 2017-03-03 | 3 | 62 | + | 2017-03-04 | 8 | 70 | + | 2017-03-05 | 4 | 74 | + | 2017-03-06 | 7 | 81 | + | 2017-03-07 | 7 | 88 | + | 2017-03-08 | 8 | 96 | + | (22 rows) | + + Figure 9.20: Bat Limited Edition sales – cumulative sum + +15. Compare this sales record with the one for the original Bat Scooter sales, as shown in the following code: + + ```javascript + SELECT * FROM bat_sales_growth LIMIT 22; + ``` + + + The following table shows the sales details for the first 22 records of the `bat_sales_growth` table: + + | sales\_transaction\_date | count | sum | + | --- | --- | --- | + | 2016-10-10 00:00:00 | 9 | 9 | + | 2016-10-11 00:00:00 | 6 | 15 | + | 2016-10-12 00:00:00 | 10 | 25 | + | 2016-10-13 00:00:00 | 10 | 35 | + | 2016-10-14 00:00:00 | 5 | 40 | + | 2016-10-15 00:00:00 | 10 | 50 | + | 2016-10-16 00:00:00 | 14 | 64 | + | 2016-10-17 00:00:00 | 9 | 73 | + | 2016-10-18 00:00:00 | 11 | 84 | + | 2016-10-19 00:00:00 | 12 | 96 | + | 2016-10-20 00:00:00 | 10 | 106 | + | 2016-10-21 00:00:00 | 6 | 112 | + | 2016-10-22 00:00:00 | 2 | 114 | + | 2016-10-23 00:00:00 | 5 | 119 | + | 2016-10-24 00:00:00 | 6 | 125 | + | 2016-10-25 00:00:00 | 9 | 134 | + | 2016-10-26 00:00:00 | 2 | 136 | + | 2016-10-27 00:00:00 | 4 | 140 | + | 2016-10-28 00:00:00 | 7 | 147 | + | 2016-10-29 00:00:00 | 5 | 152 | + | 2016-10-30 00:00:00 | 5 | 157 | + | 2016-10-31 00:00:00 | 3 | 160 | + | (22 rows) | + + Figure 9.21: Bat Scooter cumulative sales for 22 rows + + Sales of the limited-edition scooter did not reach double digits during the first 22 days, nor did the daily volume of sales fluctuate as much. In keeping with the overall sales figure, the limited edition sold 64 fewer units over the first 22 days. +16. Compute the 7-day `lag` function for the `sum` column and insert the results into the `bat_ltd_sales_delay` table: + + ```javascript + SELECT *, lag(sum , 7) OVER (ORDER BY sales_transaction_date) INTO bat_ltd_sales_delay FROM bat_ltd_sales_growth; + ``` + +17. Compute the sales growth for `bat_ltd_sales_delay` in a similar manner to the exercise completed in _Activity 9.1_, _Quantifying the Sales Drop_. Label the column for the results of this calculation as `volume` and store the resulting table in `bat_ltd_sales_vol`: + + ```javascript + SELECT *, (sum-lag)/lag AS volume INTO bat_ltd_sales_vol FROM bat_ltd_sales_delay; + ``` + +18. Look at the first 22 records of sales in `bat_ltd_sales_vol`: + + ```javascript + SELECT * FROM bat-ltd_sales_vol LIMIT 22; + ``` + + + The sales volume can be seen in the following figure: + + | sales\_transaction\_date | count | sum | lag | volume | + | --- | --- | --- | --- | --- | + | 2017-02-15 | 6 | 6 | | | + | 2017-02-16 | 2 | 8 | | | + | 2017-02-17 | 1 | 9 | | | + | 2017-02-18 | 4 | 13 | | | + | 2017-02-19 | 5 | 18 | | | + | 2017-02-20 | 6 | 24 | | | + | 2017-02-21 | 5 | 29 | | | + | 2017-02-23 | 4 | 33 | | | + | 2017-02-24 | 2 | 41 | 9 | 3.5555555555555556 | + | 2017-02-25 | 2 | 43 | 13 | 2.3076923076923077 | + | 2017-02-26 | 2 | 45 | 18 | 1.5000000000000000 | + | 2017-02-27 | 4 | 49 | 24 | 1.0416666666666667 | + | 2017-02-28 | 4 | 53 | 29 | 0.82758620689655172414 | + | 2017-03-01 | 5 | 58 | 33 | 0.75757575757575757576 | + | 2017-03-02 | 1 | 59 | 39 | 0.51282051282051282051 | + | 2017-03-03 | 3 | 62 | 41 | 0.51219512195121951220 | + | 2017-03-04 | 8 | 70 | 43 | 0.62790697674418604651 | + | 2017-03-05 | 4 | 74 | 45 | 0.64444444444444444444 | + | 2017-03-06 | 7 | 81 | 49 | 0.65306122448979591837 | + | 2017-03-07 | 7 | 88 | 53 | 0.66037735849056603774 | + | 2017-03-08 | 8 | 96 | 58 | 0.65517241379310344828 | + | (22 rows) | + + Figure 9.22: Bat Scooter cumulative sales showing volume + + +Looking at the `volume` column in the preceding diagram, we can again see that the sales growth is more consistent than the original Bat Scooter. The growth within the first week is less than that of the original model, but it is sustained over a longer period. After 22 days of sales, the sales growth of the limited-edition scooter is 65% compared to the previous week, as compared with the 28% growth identified in the second activity of the lesson. + +At this stage, we have collected data from two similar products launched at different time periods and found some differences in the trajectory of the sales growth over the first 3 weeks of sales. In a professional setting, we may also consider employing more sophisticated statistical comparison methods, such as tests for differences of mean, variance, survival analysis, or other techniques. These methods lie outside the scope of this course and, as such, limited comparative methods will be used. + +While we have shown there to be a difference in sales between the two Bat Scooters, we also cannot rule out the fact that the sales differences can be attributed to the difference in the sales price of the two scooters, with the limited-edition scooter being $100 more expensive. In the next activity, we will compare the sales of the Bat Scooter to the 2013 Lemon, which is $100 cheaper, was launched 3 years prior, is no longer in production, and started production in the first half of the calendar year. + +Activity 9.2: Analyzing the Difference in the Sales Price Hypothesis + +In this activity, we are going to investigate the hypothesis that the reduction in sales growth can be attributed to the price point of the Bat Scooter. Previously, we considered the launch date. However, there could be another factor – the sales price included. If we consider the product list of scooters shown in _Figure 9.23_, and exclude the Bat model scooter, we can see that there are two price categories, $699.99 and above, or $499.99 and below. The Bat Scooter sits exactly between these two groups; perhaps the reduction in sales growth can be attributed to the different pricing model. In this activity, we will test this hypothesis by comparing Bat sales to the 2013 Lemon: + +| product\_id | model | year | product\_type | base\_msrp | production\_start\_date | production\_end\_date | +| --- | --- | --- | --- | --- | --- | --- | +| 12 | Lemon Zester | 2019 | scooter | 349.99 | 2019-02-04 00:00:00 | | +| 1 | Lemon | 2010 | scooter | 399.99 | 2010-03-03 00:00:00 | 2012-06-08 00:00:00 | +| 3 | Lemon | 2013 | scooter | 499.99 | 2013-05-01 00:00:00 | 2018-12-28 00:00:00 | +| 7 | Bat | 2016 | scooter | 599.99 | 2016-10-10 00:00:00 | | +| 5 | Blade | 2014 | scooter | 699.99 | 2014-06-23 00:00:00 | 2015-01-27 00:00:00 | +| 8 | Bat Limited Edition | 2017 | scooter | 699.99 | 2017-02-15 00:00:00 | | +| 2 | Lemon Limited Edition | 2011 | scooter | 799.99 | 2011-01-03 00:00:00 | 2011-03-30 00:00:00 | +| (7 rows) | + +Figure 9.23: List of scooter models + +The following are the steps to perform: + +1. Load the `sqlda` database from the accompanying source code located at this [link](https://github.com/TrainingByPackt/SQL-for-Data-Analytics/tree/master/Datasets). +2. Select the `sales_transaction_date` column from the year 2013 for `Lemon` model sales and insert the column into a table called `lemon_sales`. +3. Count the sales records available for 2013 for the `Lemon` model. +4. Display the latest `sales_transaction_date` column. +5. Convert the `sales_transaction_date` column to a date type. +6. Count the number of sales per day within the `lemon_sales` table and insert the data into a table called `lemon_sales_count`. +7. Calculate the cumulative sum of sales and insert the corresponding table into a new table labeled `lemon_sales_sum`. +8. Compute the 7-day `lag` function on the `sum` column and save the result to `lemon_sales_delay`. +9. Calculate the growth rate using the data from `lemon_sales_delay` and store the resulting table in `lemon_sales_growth`. +10. Inspect the first 22 records of the `lemon_sales_growth` table by examining the `volume` data. + +**Solution** + +1. Load the sqlda database: + + ```javascript + psql sqlda + ``` + +2. Select the sales\_transaction\_date column from the 2013 Lemon sales and insert the column into a table called lemon\_sales: + + ```javascript + SELECT sales_transaction_date INTO lemon_sales FROM sales WHERE product_id=3; + ``` + +3. Count the sales records available for the 2013 Lemon by running the following query: + + ```javascript + SELECT count(sales_transaction_date) FROM lemon_sales; + ``` + + We can see that **16558** records are available: + + ```javascript + count -------------------- 16558 (1 row) + ``` + + Figure A: Sales records for the 2013 Lemon Scooter + +4. Use the max function to check the latest sales\_transaction\_date column: + + ```javascript + SELECT max(sales_transaction_date) FROM lemon_sales; + ``` + + The following figure displays the sales\_transaction\_date column: + + ```javascript + max ---------------- 2018-12-27 19:12:!0 (1 row) + ``` + + Figure B: Production between May 2013 and December 2018 + +5. Convert the sales\_transaction\_date column to a date type using the following query: + + ```javascript + ALTER TABLE lemon_sales ALTER COLUMN sales_transaction_date TYPE DATE; + ``` + + We are converting the datatype from DATE\_TIME to DATE so as to remove the time information from the field. We are only interested in accumulating numbers, but just the date and not the time. Hence, it is easier just to remove the time information from the field. +6. Count the number of sales per day within the lemon\_sales table and insert this figure into a table called lemon\_sales\_count: + + ```javascript + SELECT *, COUNT(sales_transaction_date) INTO lemon_sales_count FROM lemon_sales GROUP BY sales_transaction_date,lemon_sales.customer_id ORDER BY sales_transaction_date; + ``` + +7. Calculate the cumulative sum of sales and insert the corresponding table into a new table labeled lemon\_sales\_sum: + + ```javascript + SELECT *, sum(count) OVER (ORDER BY sales_transaction_date) INTO lemon_sales_sum FROM lemon_sales_count; + ``` + +8. Compute the 7-day lag function on the sum column and save the result to lemon\_sales\_delay: + + ```javascript + SELECT *, lag(sum, 7) OVER (ORDER BY sales_transaction_date) INTO lemon_sales_delay FROM lemon_sales_sum; + ``` + +9. Calculate the growth rate using the data from lemon\_sales\_delay and store the resulting table in lemon\_sales\_growth. Label the growth rate column as volume: + + ```javascript + SELECT *, (sum-lag)/lag AS volume INTO lemon_sales_growth FROM lemon_sales_delay; + ``` + +10. Inspect the first 22 records of the lemon\_sales\_growth table by examining the volume data: + + ```javascript + SELECT * FROM lemon_sales_growth LIMIT 22; + ``` + + The following table shows the sales growth: + + | sales\_transaction\_date | count | sum | lag | volume | + | --- | --- | --- | --- | --- | + | 2013-05-01 | 6 | 6 | | | + | 2013-05-02 | 8 | 14 | | | + | 2013-05-03 | 4 | 18 | | | + | 2013-05-04 | 9 | 27 | | | + | 2013-05-05 | 9 | 36 | | | + | 2013-05-06 | 6 | 42 | | | + | 2013-05-07 | 8 | 50 | | | + | 2013-05-08 | 6 | 56 | 6 | 8.3333333333333333 | + | 2013-05-09 | 6 | 62 | 14 | 3.4285714285714286 | + | 2013-05-10 | 9 | 71 | 18 | 2.9444444444444444 | + | 2013-05-11 | 3 | 74 | 27 | 1.7407407407407407 | + | 2013-05-12 | 4 | 78 | 36 | 1.1666666666666667 | + | 2013-05-13 | 7 | 85 | 42 | 1.0238095238095238 | + | 2013-05-14 | 3 | 88 | 50 | 0.76000000000000000000 | + | 2013-05-15 | 3 | 91 | 56 | 0.62500000000000000000 | + | 2013-05-16 | 4 | 95 | 62 | 0.53225806451612903226 | + | 2013-05-17 | 6 | 101 | 71 | 0.42253521126760563380 | + | 2013-05-18 | 9 | 110 | 74 | 0.48648648648648648649 | + | 2013-05-19 | 6 | 116 | 78 | 0.48717948717948717949 | + | 2013-05-20 | 6 | 122 | 85 | 0.43529411764705882353 | + | 2013-05-21 | 11 | 133 | 88 | 0.51136363636363636364 | + | 2013-05-22 | 8 | 141 | 91 | 0.54945054945054945055 | + | (22 rows) | + + Figure C: Sales growth of the Lemon Scooter + +Similar to the previous exercise, we have calculated the cumulative sum, lag, and relative sales growth of the Lemon Scooter. We can see that the initial sales volume is much larger than the other scooters, at over 800%, and again finishes higher at 55% + +**Expected Output:** + +| sales\_transaction\_date | count | sum | lag | volume | +| --- | --- | --- | --- | --- | +| 2013-05-01 6 | 6 | 6 | | | +| 2013-05-02 | 8 | 14 | | | +| 2013-05-03 | 4 | 18 | | | +| 2013-05-04 | 9 | 27 | | | +| 2013-05-05 | 9 | 36 | | | +| 2013-05-06 | 6 | 42 | | | +| 2013-05-07 | 8 | 50 | | | +| 2013-05-08 | 6 | 56 | 6 | 8.3333333333333333 | +| 2013-05-09 | 6 | 62 | 14 | 3.4285714285714286 | +| 2013-05-10 | 9 | 71 | 18 | 2.9444444444444444 | +| 2013-05-11 | 3 | 74 | 27 | 1.7407407407407407 | +| 2013-05-12 | 4 | 78 | 36 | 1.1666666666666667 | +| 2013-05-13 | 7 | 85 | 42 | 1.0238095238095238 | +| 2013-05-14 | 3 | 88 | 50 | 0.76000000000000000000 | +| 2013-05-15 | 3 | 91 | 56 | 0.62500000000000000000 | +| 2013-05-16 | 4 | 95 | 62 | 0.53225806451612903226 | +| 2013-05-17 | 6 | 101 | 71 | 0.42253521126760563380 | +| 2013-05-18 | 9 | 110 | 74 | 0.48648648648648648649 | +| 2013-05-19 | 6 | 116 | 78 | 0.48717948717948717949 | +| 2013-05-20 | 6 | 122 | 85 | 0.43529411764705882353 | +| 2013-05-21 | 11 | 133 | 88 | 0.51136363636363636364 | +| 2013-05-22 | 8 | 141 | 91 | 0.54945054945054945055 | +| (22 rows) | + +Figure 9.54: Sales growth of the Lemon Scooter + +Now that we have collected data to test the two hypotheses of timing and cost, what observations can we make and what conclusions can we draw? The first observation that we can make is regarding the total volume of sales for the three different scooter products. The Lemon Scooter, over its production life cycle of 4.5 years, sold 16,558 units, while the two Bat Scooters, the Original and Limited Edition models, sold 7,328 and 5,803 units, respectively, and are still currently in production, with the Bat Scooter launching about 4 months earlier and with approximately 2.5 years of sales data available. Looking at the sales growth of the three different scooters, we can also make a few different observations: + +- The original Bat Scooter, which launched in October at a price of $599.99, experienced a 700% sales growth in its second week of production and finished the first 22 days with 28% growth and a sales figure of 160 units. +- The Bat Limited Edition Scooter, which launched in February at a price of $699.99, experienced 450% growth at the start of its second week of production and finished with 96 sales and 66% growth over the first 22 days. +- The 2013 Lemon Scooter, which launched in May at a price of $499.99, experienced 830% growth in the second week of production and ended its first 22 days with 141 sales and 55% growth. + +Based on this information, we can make a number of different conclusions: + +- The initial growth rate starting in the second week of sales correlates to the cost of the scooter. As the cost increased to $699.99, the initial growth rate dropped from 830% to 450%. +- The number of units sold in the first 22 days does not directly correlate to the cost. The $599.99 Bat Scooter sold more than the 2013 Lemon Scooter in that first period despite the price difference. +- There is some evidence to suggest that the reduction in sales can be attributed to seasonal variations given the significant reduction in growth and the fact that the original Bat Scooter is the only one released in October. So far, the evidence suggests that the drop can be attributed to the difference in launch timing. + +Before we draw the conclusion that the difference can be attributed to seasonal variations and launch timing, let's ensure that we have extensively tested a range of possibilities. Perhaps marketing work, such as email campaigns, that is, when the emails were sent, and the frequency with which the emails were opened, made a difference. + +Now that we have considered both the launch timing and the suggested retail price of the scooter as a possible cause of the reduction in sales, we will direct our efforts to other potential causes, such as the rate of opening of marketing emails. Does the marketing email opening rate have an effect on sales growth throughout the first 3 weeks? We will find this out in our next exercise. + +Exercise 9.4: Analyzing Sales Growth by Email Opening Rate + +In this exercise, we will analyze the sales growth using the email opening rate. To investigate the hypothesis that a decrease in the rate of opening emails impacted the Bat Scooter sales rate, we will again select the Bat and Lemon Scooters and will compare the email opening rate. + +Perform the following steps to complete the exercise: + +1. Load the `sqlda` database: + + ```javascript + psql sqlda + ``` + +2. Firstly, look at the `emails` table to see what information is available. Select the first five rows of the `emails` table: + + ```javascript + SELECT * FROM emails LIMIT 5; + ``` + + The following table displays the email information for the first five rows: + + | email\_id | customer\_id | email\_subject | opened | clicked | bounced | sent\_date | opened\_date | clicked\_date | + | --- | --- | --- | --- | --- | --- | --- | --- | --- | + | 1 | 18 | Introducing A Limited Edition | f | f | f | 2011-01-03 15:00:00 | | | + | 2 | 30 | Introducing A Limited Edition | f | f | f | 2011-01-03 15:00:00 | | | + | 3 | 41 | Introducing A Limited Edition | t | f | f | 2011-01-03 15:00:00 | 2011-01-04 10:41:11 | | + | 4 | 52 | Introducing A Limited Edition | f | f | f | 2011-01-03 15:00:00 | | + | 5 | 59 | Introducing A Limited Edition | f | f | f | 2011-01-03 15:00:00 | | | + | (5 rows) | + + Figure 9.55: Sales growth of the Lemon Scooter + + To investigate our hypothesis, we need to know whether an email was opened, and when it was opened, as well as who the customer was who opened the email and whether that customer purchased a scooter. If the email marketing campaign was successful in maintaining the sales growth rate, we would expect a customer to open an email soon before a scooter was purchased. + + The period in which the emails were sent, as well as the ID of customers who received and opened an email, can help us to determine whether a customer who made a sale may have been encouraged to do so following the receipt of an email. + +3. To determine the hypothesis, we need to collect the `customer_id` column from both the `emails` table and the `bat_sales` table for the Bat Scooter, the `opened`, `sent_date`, `opened_date`, and `email_subject` columns from `emails` table, as well as the `sales_transaction_date` column from the `bat_sales` table. As we only want the email records of customers who purchased a Bat Scooter, we will join the `customer_id` column in both tables. Then, insert the results into a new table – `bat_emails`: + + ```javascript + SELECT emails.email_subject, emails.customer_id, emails.opened, emails.sent_date, emails.opened_date, bat_sales.sales_transaction_date INTO bat_emails FROM emails INNER JOIN bat_sales ON bat_sales.customer_id=emails.customer_id ORDER BY bat_sales.sales_transaction_date; + ``` + +4. Select the first 10 rows of the `bat_emails` table, ordering the results by `sales_transaction_date`: + + ```javascript + SELECT * FROM bat_emails LIMIT 10; + ``` + + The following table shows the first 10 rows of the `bat_emails` table ordered by `sales_transaction_date`: + + | email\_subject | customer\_id | opened | sent\_date | opened\_date | sales\_transaction\_date | + | --- | --- | --- | --- | --- | --- | + | A New Year, And Some New EVs | 11678 | f | 2019-01-07 15:00:00 | | 2016-10-10 00:00:00 | + | A Brand New Scooter...and Car | 40250 | f | 2014-05-06 15:00:00 | | 2016-10-10 00:00:00 | + | We Really Outdid Ourselves this Year | 24125 | f | 2017-01-15 15:00:00 | | 2016-10-10 00:00:00 | + | Tis' the Season for Savings | 31307 | t | 2015-11-26 15:00:00 | 2015-11-27 04:55:07 | 2016-10-10 00:06:00 | + | 25% off all EVs. Its a Christmas Miracle! | 42213 | f | 2016-11-25 15:00:00 | | 2016-10-10 00:00:00 | + | Zoom zoom Black Friday Sale | 40250 | f | 2014-11-28 15:00:00 | | 2016-10-10 00:00:00 | + | Save the Planet with same Holiday Savings. | 4553 | f | 2016-11-23 15:00:00 | | 2016 10 10 00:00:00 | + | The 2013 Lemon Scooter is Here | 24125 | t | 2013-03-01 15:00:00 | 2013-03-02 14:43:34 | 2016 10 10 00:00:00 | + | The 2013 Lemon Scooter is Here | 40250 | f | 2013-03-01 15:00:00 | | 2016-10-10 00:00:00 | + | Save the Planet with some Holiday Savings. | 40250 | f | 2018-11-23 15:00:00 | | 2016-10-10 00:00:00 | + | (10 rows) | + + Figure 9.56: Email and sales information joined on customer\_id + + We can see here that there are several emails unopened, over a range of sent dates, and that some customers have received multiple emails. Looking at the subjects of the emails, some of them don't seem related to the Zoom scooters at all. +5. Select all rows where the `sent_date` email predates the `sales_transaction_date` column, order by `customer_id`, and limit the output to the first 22 rows. This will help us to know which emails were sent to each customer before they purchased their scooter. Write the following query to do so: + + ```javascript + SELECT * FROM bat_emails WHERE sent_date < sales_transaction_date ORDER BY customer_id LIMIT 22; + ``` + + The following table lists the emails sent to the customers before the `sales_transaction_date` column: + + ![The figure shows the output of the above query. ](https://s3.amazonaws.com/jigyaasa_content_static/sql-data-anal/C11861_09_27.jpg) + + Figure 9.57: Emails sent to customers before the sale transaction date + +6. Delete the rows of the `bat_emails` table where emails were sent more than 6 months prior to production. As we can see, there are some emails that were sent years before the transaction date. We can easily remove some of the unwanted emails by removing those sent before the Bat Scooter was in production. From the products table, the production start date for the Bat Scooter is October 10, 2016: + + ```javascript + DELETE FROM bat_emails WHERE sent_date < '2016-04-10'; + ``` + + Note + + In this exercise, we are removing information that we no longer require from an existing table. This differs from the previous exercises, where we created multiple tables each with slightly different information from other. The technique you apply will differ depending upon the requirements of the problem being solved; do you require a traceable record of analysis, or is efficiency and reduced storage key? + +7. Delete the rows where the sent date is after the purchase date, as they are not relevant to the sale: + + ```javascript + DELETE FROM bat_emails WHERE sent_date > sales_transaction_date; + ``` + +8. Delete those rows where the difference between the transaction date and the sent date exceeds 30, as we also only want those emails that were sent shortly before the scooter purchase. An email 1 year beforehand is probably unlikely to influence a purchasing decision, but one closer to the purchase date may have influenced the sales decision. We will set a limit of 1 month (30 days) before the purchase. Write the following query to do so: + + ```javascript + DELETE FROM bat_emails WHERE (sales_transaction_date-sent_date) > '30 days'; + ``` + +9. Examine the first 22 rows again ordered by `customer_id` by running the following query: + + ```javascript + SELECT * FROM bat_emails ORDER BY customer_id LIMIT 22; + ``` + + The following table shows the emails where the difference between the transaction date and the sent date is less than 30: + + ![The figure shows the output of the above query.](https://s3.amazonaws.com/jigyaasa_content_static/sql-data-anal/C11861_09_28.jpg) + + Figure 9.58: Emails sent close to the date of sale + + At this stage, we have reasonably filtered the available data based on the dates the email was sent and opened. Looking at the preceding `email_subject` column, it also appears that there are a few emails unrelated to the Bat Scooter, for example, **25% of all EVs. It's a Christmas Miracle!** and **Black Friday. Green Cars**. These emails seem more related to electric car production instead of scooters, and so we can remove them from our analysis. +10. Select the distinct value from the `email_subject` column to get a list of the different emails sent to the customers: + + ```javascript + SELECT DISTINCT(email_subject) FROM bat_emails; + ``` + + The following table shows a list of distinct email subjects: + + ```javascript + email subject --------------------------------------- Black Friday. Green Cars. 25% off all EVs. It's a Christmas Miracle! A New Year, And Some New EVs Like a Bat out of Heaven Save the Planet with sme Holiday Savings. We Really Outdid Ourselves this Year (6 rows) + ``` + + Figure 9.59: Unique email subjects sent to potential customers of the Bat Scooter + +11. Delete all records that have `Black Friday` in the email subject. These emails do not appear relevant to the sale of the Bat Scooter: + + ```javascript + DELETE FROM bat_emails WHERE position('Black Friday' in email_subject)>0; + ``` + + Note + + The `position` function in the preceding example is used to find any records where the `Black Friday` string is at the first character in the mail or more in `email_structure`. Thus, we are deleting any rows where `Black Friday` is in the email subject. For more information on PostgreSQL, refer to the documentation regarding [string functions](https://www.postgresql.org/docs/current/functions-string.html). + +12. Delete all rows where **25% off all EVs. It's a Christmas Miracle!** and **A New Year, And Some New EVs** can be found in the `email_subject`: + + ```javascript + DELETE FROM bat_emails WHERE position('25% off all EV' in email_subject)>0; DELETE FROM bat_emails WHERE position('Some New EV' in email_subject)>0; + ``` + +13. At this stage, we have our final dataset of emails sent to customers. Count the number of rows that are left in the sample by writing the following query: + + ```javascript + SELECT count(sales_transaction_date) FROM bat_emails; + ``` + + + We can see that **401** rows are left in the sample: + + ```javascript + count ----------- 401 (1 row) + ``` + + Figure 9.60: Count of the final Bat Scooter email dataset + +14. We will now compute the percentage of emails that were opened relative to sales. Count the emails that were opened by writing the following query: + + ```javascript + SELECT count(opened) FROM bat_emails WHERE opened='t' + ``` + + + We can see that **98** emails were opened: + + ```javascript + count ------------ 98 (1 row) + ``` + + Figure 9.61: Count of opened Bat Scooter campaign emails + +15. Count the customers who received emails and made a purchase. We will determine this by counting the number of unique (or distinct) customers that are in the `bat_emails` table: + + ```javascript + SELECT COUNT(DISTINCT(customer_id)) FROM bat_emails; + ``` + + + We can see that **396** customers who received an email made a purchase: + + ```javascript + count ----------- 396 (1 row) + ``` + + Figure 9.62: Count of unique customers who received a Bat Scooter campaign email + +16. Count the unique (or distinct) customers who made a purchase by writing the following query: + + ```javascript + SELECT COUNT(DISTINCT(customer_id)) FROM bat_sales; + ``` + + + Following is the output of the preceding code: + + ```javascript + count ---------- 6659 (1 row) + ``` + + Figure 9.63: Count of unique customers + +17. Calculate the percentage of customers who purchased a Bat Scooter after receiving an email: + + ```javascript + SELECT 396.0/6659.0 AS email_rate; + ``` + + + The output of the preceding query is displayed as follows: + + ```javascript + email_rate ---------------------- 0.05946838864694398558 (1 row) + ``` + + Figure 9.64: Percentage of customers who received an email + + Note + + In the preceding calculation, you can see that we included a decimal place in the figures, for example, 396.0 instead of a simple integer value (396). This is because the resulting value will be represented as less than 1 percentage point. If we excluded these decimal places, the SQL server would have completed the division operation as integers and the result would be 0. + + Just under 6% of customers who made a purchase received an email regarding the Bat Scooter. Since 18% of customers who received an email made a purchase, there is a strong argument to be made that actively increasing the size of the customer base who receive marketing emails could increase Bat Scooter sales. +18. Limit the scope of our data to be all sales prior to November 1, 2016 and put the data in a new table called `bat_emails_threewks`. So far, we have examined the email opening rate throughout all available data for the Bat Scooter. Check the rate throughout for the first 3 weeks, where we saw a reduction in sales: + + ```javascript + SELECT * INTO bat_emails_threewks FROM bat_emails WHERE sales_transaction_date < '2016-11-01'; + ``` + +19. Now, count the number of emails opened during this period: + + ```javascript + SELECT COUNT(opened) FROM bat_emails_threewks; + ``` + + + We can see that we have sent **82** emails during this period: + + ```javascript + count ---------------------- 82 (1 row) + ``` + + Figure 9.65: Count of emails opened in the first 3 weeks + +20. Now, count the number of emails opened in the first 3 weeks: + + ```javascript + SELECT COUNT(opened) FROM bat_emails_threewks WHERE opened='t'; + ``` + + + The following is the output of the preceding code: + + ```javascript + count ---------------------- 15 (1 row) + ``` + + Figure 9.66: Count of emails opened + + + We can see that **15** emails were opened in the first 3 weeks. +21. Count the number of customers who received emails during the first 3 weeks of sales and who then made a purchase by using the following query: + + ```javascript + SELECT COUNT(DISTINCT(customer_id)) FROM bat_emails_threewks; + ``` + + + We can see that **82** customers received emails during the first 3 weeks: + + ```javascript + count ---------------------- 82 (1 row) + ``` + + Figure 9.67: Customers who made a purchase in the first 3 weeks + +22. Calculate the percentage of customers who opened emails pertaining to the Bat Scooter and then made a purchase in the first 3 weeks by using the following query: + + ```javascript + SELECT 15.0/82.0 AS sale_rate; + ``` + + + The following table shows the calculated percentage: + + ```javascript + sale_rate 0.18292682926829268293 (1 row) + ``` + + Figure 9.68: Percentage of customers in the first 3 weeks who opened emails + + + Approximately 18% of customers who received an email about the Bat Scooter made a purchase in the first 3 weeks. This is consistent with the rate for all available data for the Bat Scooter. +23. Calculate how many unique customers we have in total throughout the first 3 weeks. This information is useful context when considering the percentages, we just calculated. 3 sales out of 4 equate to 75% but, in this situation, we would prefer a lower rate of the opening but for a much larger customer base. Information on larger customer bases is generally more useful as it is typically more representative of the entire customer base, rather than a small sample of it. We already know that 82 customers received emails: + + ```javascript + SELECT COUNT(DISTINCT(customer_id)) FROM bat_sales WHERE sales_transaction_date < '2016-11-01'; + ``` + + + The following output reflects **160** customers where the transaction took place before November 1, 2016: + + ```javascript + count ------------ 160 (1 row) + ``` + + Figure 9.69: Number of distinct customers from bat\_sales + + +There were 160 customers in the first 3 weeks, 82 of whom received emails, which is slightly over 50% of customers. This is much more than 6% of customers over the entire period of availability of the scooter. + +Now that we have examined the performance of the email marketing campaign for the Bat Scooter, we need a control or comparison group to establish whether the results were consistent with that of other products. Without a group to compare against, we simply do not know whether the email campaign of the Bat Scooter was good, bad, or neither. We will perform the next exercise to investigate performance. + +Exercise 9.5: Analyzing the Performance of the Email Marketing Campaign + +In this exercise, we will investigate the performance of the email marketing campaign for the Lemon Scooter to allow for a comparison with the Bat Scooter. Our hypothesis is that if the email marketing campaign performance of the Bat Scooter is consistent with another, such as the 2013 Lemon, then the reduction in sales cannot be attributed to differences in the email campaigns. + +Perform the following steps to complete the exercise: + +1. Load the `sqlda` database: + + ```javascript + psql sqlda + ``` + +2. Drop the existing `lemon_sales` table: + + ```javascript + DROP TABLE lemon_sales; + ``` + +3. The 2013 Lemon Scooter is `product_id = 3`. Select `customer_id` and `sales_transaction_date` from the sales table for the 2013 Lemon Scooter. Insert the information into a table called `lemon_sales`: + + ```javascript + SELECT customer_id, sales_transaction_date INTO lemon_sales FROM sales WHERE product_id=3; + ``` + +4. Select all information from the `emails` database for customers who purchased a 2013 Lemon Scooter. Place the information in a new table called `lemon_emails`: + + ```javascript + SELECT emails.customer_id, emails.email_subject, emails.opened, emails.sent_date, emails.opened_date, lemon_sales.sales_transaction_date INTO lemon_emails FROM emails INNER JOIN lemon_sales ON emails.customer_id=lemon_sales.customer_id; + ``` + +5. Remove all emails sent before the start of production of the 2013 Lemon Scooter. For this, we first require the date when production started: + + ```javascript + SELECT production_start_date FROM products Where product_id=3; + ``` + + The following table shows the `production_start_date` column: + + ```javascript + production_start_data --------------------------------- 2013-5-01 00:00:00 (1 row) + ``` + + Figure 9.70: Production start date of the Lemon Scooter + + Now, delete the emails that were sent before the start of production of the 2013 Lemon Scooter: + + ```javascript + DELETE FROM lemon_emails WHERE sent_date < '2013-05-01'; + ``` + +6. Remove all rows where the sent date occurred after the `sales_transaction_date` column: + + ```javascript + DELETE FROM lemon_emails WHERE sent_date > sales_transaction_date; + ``` + +7. Remove all rows where the sent date occurred more than 30 days before the `sales_transaction_date` column: + + ```javascript + DELETE FROM lemon_emails WHERE (sales_transaction_date - sent_date) > '30 days'; + ``` + +8. Remove all rows from `lemon_emails` where the email subject is not related to a Lemon Scooter. Before doing this, we will search for all distinct emails: + + ```javascript + SELECT DISTINCT(email_subject) FROM lemon_emails; + ``` + + The following table shows the distinct email subjects: + + ```javascript + email_subject ---------------------------------------------------------- Tis' the Season for Savings 25% off all EVs. It's a Christmas Miracle! A Brand New Scooter...and Car Like a Bat out of Heaven Save the Planet with some Holiday Savings. Shocking Holiday Savings on Electric Scooters An Electric Car fr a New Age We cut you a deal: 20% off a Blade Black Friday. Green Cars. Zoom Zoom Back Friday Sale (11 rows) + ``` + + Figure 9.71: Lemon Scooter campaign emails sent + + Now, delete the email subject not related to the Lemon Scooter using the `DELETE` command: + + ```javascript + DELETE FROM lemon_emails WHERE POSITION('25% off all EVs.' in email_subject)>0; DELETE FROM lemon_emails WHERE POSITION('Like a Bat out of Heaven' in email_subject)>0; DELETE FROM lemon_emails WHERE POSITION('Save the Planet' in email_subject)>0; DELETE FROM lemon_emails WHERE POSITION('An Electric Car' in email_subject)>0; DELETE FROM lemon_emails WHERE POSITION('We cut you a deal' in email_subject)>0; DELETE FROM lemon_emails WHERE POSITION('Black Friday. Green Cars.' in email_subject)>0; DELETE FROM lemon_emails WHERE POSITION('Zoom' in email_subject)>0; + ``` + +9. Now, check how many emails of `lemon_scooter` customers were opened: + + ```javascript + SELECT COUNT(opened) FROM lemon_emails WHERE opened='t'; + ``` + + We can see that **128** emails were opened: + + ```javascript + count --------- 128 (1 rows) + ``` + + Figure 9.72: Lemon Scooter campaign emails opened + +10. List the number of customers who received emails and made a purchase: + + ```javascript + SELECT COUNT(DISTINCT(customer_id)) FROM lemon_emails; + ``` + + The following figure shows that **506** customers made a purchase after receiving emails: + + ```javascript + count --------- 506 (1 rows) + ``` + + Figure 9.73: Unique customers who purchased a Lemon Scooter + +11. Calculate the percentage of customers who opened the received emails and made a purchase: + + ```javascript + SELECT 128.0/506.0 AS email_rate; + ``` + + We can see that 25% of customers opened the emails and made a purchase: + + ```javascript + email_rate ------------------------------- 0.25296442687747035573 (1 row) + ``` + + Figure 9.74: Lemon Scooter customer email rate + +12. Calculate the number of unique customers who made a purchase: + + ```javascript + SELECT COUNT(DISTINCT(customer_id)) FROM lemon_sales; + ``` + + + We can see that **13854** customers made a purchase: + + ```javascript + count ------------------------------- 13854 (1 row) + ``` + + Figure 9.75: Count of unique Lemon Scooter customers + +13. Calculate the percentage of customers who made a purchase having received an email. This will enable a comparison with the corresponding figure for the Bat Scooter: + + ```javascript + SELECT 506.0/13854.0 AS email_sales; + ``` + + + The preceding calculation generates a 36% output: + + ```javascript + email_sales ------------------------- 0.03652374765410711708 (1 row) + ``` + + Figure 9.76: Lemon Scooter customers who received an email + +14. Select all records from `lemon_emails` where a sale occurred within the first 3 weeks of the start of production. Store the results in a new table – `lemon_emails_threewks`: + + ```javascript + SELECT * INTO lemon_emails_threewks FROM lemon_emails WHERE sales_transaction_date < '2013-06-01'; + ``` + +15. Count the number of emails that were made for Lemon Scooters in the first 3 weeks: + + ```javascript + SELECT COUNT(sales_transaction_date) FROM lemon_emails_threewks; + ``` + + + The following is the output of the preceding code: + + ```javascript + count ----------- 0 (1 row) + ``` + + Figure 9.77: Unique sales of the Lemon Scooter in the first 3 weeks + + +There is a lot of interesting information here. We can see that 25% of customers who opened an email made a purchase, which is a lot higher than the 18% figure for the Bat Scooter. We have also calculated that just over 3.6% of customers who purchased a Lemon Scooter were sent an email, which is much lower than the almost 6% of Bat Scooter customers. The final interesting piece of information we can see is that none of the Lemon Scooter customers received an email during the first 3 weeks of product launch compared with the 82 Bat Scooter customers, which is approximately 50% of all customers in the first 3 weeks! + +In this exercise, we investigated the performance of an email marketing campaign for the Lemon Scooter to allow for a comparison with the Bat Scooter using various SQL techniques. diff --git "a/Lesson Using SQL to Uncover the Truth \342\200\223 a Case Study en-uCertify.md" "b/DAT375_Week2/Lesson Using SQL to Uncover the Truth \342\200\223 a Case Study en-uCertify.md" similarity index 100% rename from "Lesson Using SQL to Uncover the Truth \342\200\223 a Case Study en-uCertify.md" rename to "DAT375_Week2/Lesson Using SQL to Uncover the Truth \342\200\223 a Case Study en-uCertify.md" diff --git "a/Lesson Using SQL to Uncover the Truth \342\200\223 a Case Study en-uCertify.sql" "b/DAT375_Week2/Lesson Using SQL to Uncover the Truth \342\200\223 a Case Study en-uCertify.sql" similarity index 100% rename from "Lesson Using SQL to Uncover the Truth \342\200\223 a Case Study en-uCertify.sql" rename to "DAT375_Week2/Lesson Using SQL to Uncover the Truth \342\200\223 a Case Study en-uCertify.sql" diff --git a/DAT375_Week2/W2_case_study.ipynb b/DAT375_Week2/W2_case_study.ipynb new file mode 100644 index 0000000..afc424b --- /dev/null +++ b/DAT375_Week2/W2_case_study.ipynb @@ -0,0 +1,1450 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 13, + "id": "6fe15880", + "metadata": { + "vscode": { + "languageId": "sql" + } + }, + "outputs": [], + "source": [ + "%reload_ext sql\n", + "%config SqlMagic.style = 'MARKDOWN'\n", + "%sql postgresql://postgres:securepassword@localhost:54321/sqlda" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c494da67", + "metadata": { + "lines_to_next_cell": 2 + }, + "outputs": [], + "source": [ + "\n", + "# Exercise 9.1: Preliminary Data Collection Using SQL Techniques\n", + "This exercise collects preliminary data. We will load the database, list scooter product details, extract product IDs, and store the results in a new table." + ] + }, + { + "cell_type": "markdown", + "id": "e84ea60d", + "metadata": { + "lines_to_next_cell": 2 + }, + "source": [ + "# Step 1: Load the sqlda database" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "40610ba5", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "id": "24dbb526", + "metadata": { + "lines_to_next_cell": 2 + }, + "source": [ + "# Step 2: List the model, base_msrp, and production_start_date for scooter products" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "d23ffa3e", + "metadata": { + "vscode": { + "languageId": "sql" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " * postgresql://postgres:***@localhost:54321/sqlda\n", + "7 rows affected.\n" + ] + }, + { + "ename": "KeyError", + "evalue": "'MARKDOWN'", + "output_type": "error", + "traceback": [ + "\u001b[31m---------------------------------------------------------------------------\u001b[39m", + "\u001b[31mKeyError\u001b[39m Traceback (most recent call last)", + "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[14]\u001b[39m\u001b[32m, line 1\u001b[39m\n\u001b[32m----> \u001b[39m\u001b[32m1\u001b[39m \u001b[43mget_ipython\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m.\u001b[49m\u001b[43mrun_cell_magic\u001b[49m\u001b[43m(\u001b[49m\u001b[33;43m'\u001b[39;49m\u001b[33;43msql\u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[33;43m'\u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mSELECT model, base_msrp, production_start_date \u001b[39;49m\u001b[38;5;130;43;01m\\n\u001b[39;49;00m\u001b[33;43mFROM products \u001b[39;49m\u001b[38;5;130;43;01m\\n\u001b[39;49;00m\u001b[33;43mWHERE product_type = \u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[33;43mscooter\u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[33;43m;\u001b[39;49m\u001b[38;5;130;43;01m\\n\u001b[39;49;00m\u001b[33;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n", + "\u001b[36mFile \u001b[39m\u001b[32m/Volumes/Samsung990Pro/DEV/SQL-for-Data-Analytics/.venv/lib/python3.13/site-packages/IPython/core/interactiveshell.py:2542\u001b[39m, in \u001b[36mInteractiveShell.run_cell_magic\u001b[39m\u001b[34m(self, magic_name, line, cell)\u001b[39m\n\u001b[32m 2540\u001b[39m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mself\u001b[39m.builtin_trap:\n\u001b[32m 2541\u001b[39m args = (magic_arg_s, cell)\n\u001b[32m-> \u001b[39m\u001b[32m2542\u001b[39m result = \u001b[43mfn\u001b[49m\u001b[43m(\u001b[49m\u001b[43m*\u001b[49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 2544\u001b[39m \u001b[38;5;66;03m# The code below prevents the output from being displayed\u001b[39;00m\n\u001b[32m 2545\u001b[39m \u001b[38;5;66;03m# when using magics with decorator @output_can_be_silenced\u001b[39;00m\n\u001b[32m 2546\u001b[39m \u001b[38;5;66;03m# when the last Python token in the expression is a ';'.\u001b[39;00m\n\u001b[32m 2547\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mgetattr\u001b[39m(fn, magic.MAGIC_OUTPUT_CAN_BE_SILENCED, \u001b[38;5;28;01mFalse\u001b[39;00m):\n", + "\u001b[36mFile \u001b[39m\u001b[32m/Volumes/Samsung990Pro/DEV/SQL-for-Data-Analytics/.venv/lib/python3.13/site-packages/sql/magic.py:219\u001b[39m, in \u001b[36mSqlMagic.execute\u001b[39m\u001b[34m(self, line, cell, local_ns)\u001b[39m\n\u001b[32m 216\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m\n\u001b[32m 218\u001b[39m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m219\u001b[39m result = \u001b[43msql\u001b[49m\u001b[43m.\u001b[49m\u001b[43mrun\u001b[49m\u001b[43m.\u001b[49m\u001b[43mrun\u001b[49m\u001b[43m(\u001b[49m\u001b[43mconn\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mparsed\u001b[49m\u001b[43m[\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43msql\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43muser_ns\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 221\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m (\n\u001b[32m 222\u001b[39m result \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[32m 223\u001b[39m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(result, \u001b[38;5;28mstr\u001b[39m)\n\u001b[32m (...)\u001b[39m\u001b[32m 226\u001b[39m \u001b[38;5;66;03m# Instead of returning values, set variables directly in the\u001b[39;00m\n\u001b[32m 227\u001b[39m \u001b[38;5;66;03m# user's namespace. Variable names given by column names\u001b[39;00m\n\u001b[32m 229\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m.autopandas:\n", + "\u001b[36mFile \u001b[39m\u001b[32m/Volumes/Samsung990Pro/DEV/SQL-for-Data-Analytics/.venv/lib/python3.13/site-packages/sql/run.py:374\u001b[39m, in \u001b[36mrun\u001b[39m\u001b[34m(conn, sql, config, user_namespace)\u001b[39m\n\u001b[32m 372\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m result \u001b[38;5;129;01mand\u001b[39;00m config.feedback:\n\u001b[32m 373\u001b[39m \u001b[38;5;28mprint\u001b[39m(interpret_rowcount(result.rowcount))\n\u001b[32m--> \u001b[39m\u001b[32m374\u001b[39m resultset = \u001b[43mResultSet\u001b[49m\u001b[43m(\u001b[49m\u001b[43mresult\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mconfig\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 375\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m config.autopandas:\n\u001b[32m 376\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m resultset.DataFrame()\n", + "\u001b[36mFile \u001b[39m\u001b[32m/Volumes/Samsung990Pro/DEV/SQL-for-Data-Analytics/.venv/lib/python3.13/site-packages/sql/run.py:116\u001b[39m, in \u001b[36mResultSet.__init__\u001b[39m\u001b[34m(self, sqlaproxy, config)\u001b[39m\n\u001b[32m 114\u001b[39m \u001b[38;5;28mlist\u001b[39m.\u001b[34m__init__\u001b[39m(\u001b[38;5;28mself\u001b[39m, sqlaproxy.fetchall())\n\u001b[32m 115\u001b[39m \u001b[38;5;28mself\u001b[39m.field_names = unduplicate_field_names(\u001b[38;5;28mself\u001b[39m.keys)\n\u001b[32m--> \u001b[39m\u001b[32m116\u001b[39m \u001b[38;5;28mself\u001b[39m.pretty = PrettyTable(\u001b[38;5;28mself\u001b[39m.field_names, style=\u001b[43mprettytable\u001b[49m\u001b[43m.\u001b[49m\u001b[34;43m__dict__\u001b[39;49m\u001b[43m[\u001b[49m\u001b[43mconfig\u001b[49m\u001b[43m.\u001b[49m\u001b[43mstyle\u001b[49m\u001b[43m.\u001b[49m\u001b[43mupper\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m]\u001b[49m)\n\u001b[32m 117\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m 118\u001b[39m \u001b[38;5;28mlist\u001b[39m.\u001b[34m__init__\u001b[39m(\u001b[38;5;28mself\u001b[39m, [])\n", + "\u001b[31mKeyError\u001b[39m: 'MARKDOWN'" + ] + } + ], + "source": [ + "%%sql\n", + "SELECT model, base_msrp, production_start_date \n", + "FROM products \n", + "WHERE product_type = 'scooter';" + ] + }, + { + "cell_type": "markdown", + "id": "3b9621db", + "metadata": { + "lines_to_next_cell": 2 + }, + "source": [ + "\n", + "## Step 3: Extract the model name and product IDs for scooter products" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5cc90d0b", + "metadata": {}, + "outputs": [], + "source": [ + "*/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6d0fafc5", + "metadata": {}, + "outputs": [], + "source": [ + "SELECT model, product_id \n", + "FROM products \n", + "WHERE product_type = 'scooter';" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "943872ba", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6fd2f7cb", + "metadata": { + "lines_to_next_cell": 2 + }, + "outputs": [], + "source": [ + "\n", + "## Step 4: Insert the above results into a new table called product_names" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b21b3c21", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "*/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "62d38961", + "metadata": {}, + "outputs": [], + "source": [ + "SELECT model, product_id \n", + "INTO product_names \n", + "FROM products \n", + "WHERE product_type = 'scooter';" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7ab8aff3", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f288ce91", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "610bfeae", + "metadata": { + "lines_to_next_cell": 2 + }, + "outputs": [], + "source": [ + "# Exercise 9.2: Extracting the Sales Information\n", + "In this exercise we join sales data with the product names and then isolate Bat Scooter sales." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "13d08097", + "metadata": {}, + "outputs": [], + "source": [ + "*/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "245e8d09", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "id": "f6278281", + "metadata": { + "lines_to_next_cell": 2 + }, + "source": [ + "# Step 1: Load the sqlda database" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6859f35a", + "metadata": {}, + "outputs": [], + "source": [ + "*/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "009996ce", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "75f9b534", + "metadata": { + "lines_to_next_cell": 2 + }, + "outputs": [], + "source": [ + "psql sqlda" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3aa81367", + "metadata": {}, + "outputs": [], + "source": [ + "*/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3cf725ec", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "id": "adc91e33", + "metadata": { + "lines_to_next_cell": 2 + }, + "source": [ + "# Step 2: List the available fields in the database" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a8af02e9", + "metadata": {}, + "outputs": [], + "source": [ + "*/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "996e6070", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "id": "1f8388f7", + "metadata": { + "lines_to_next_cell": 2 + }, + "source": [ + "# Step 3: Create a new table (products_sales) by joining sales and product_names on product_id" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4e8885e5", + "metadata": {}, + "outputs": [], + "source": [ + "*/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2a2cc65d", + "metadata": {}, + "outputs": [], + "source": [ + "SELECT model, customer_id, sales_transaction_date, sales_amount, channel, dealership_id \n", + "INTO products_sales \n", + "FROM sales \n", + "INNER JOIN product_names \n", + " ON sales.product_id = product_names.product_id;" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9c9683fb", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6e593559", + "metadata": { + "lines_to_next_cell": 2 + }, + "outputs": [], + "source": [ + "\n", + "## Step 4: Display the first five rows of products_sales" + ] + }, + { + "cell_type": "markdown", + "id": "08fa49c4", + "metadata": {}, + "source": [ + "# Exercise 9.1: Preliminary Data Collection Using SQL Techniques\n", + "This exercise collects preliminary data. We will load the database, list scooter product details, extract product IDs, and store the results in a new table." + ] + }, + { + "cell_type": "markdown", + "id": "cd9e9cec", + "metadata": {}, + "source": [ + "## Step 1: Load the sqlda database" + ] + }, + { + "cell_type": "markdown", + "id": "5c904f26", + "metadata": {}, + "source": [ + "psql sqlda" + ] + }, + { + "cell_type": "markdown", + "id": "735c1e30", + "metadata": {}, + "source": [ + "## Step 2: List the model, base_msrp, and production_start_date for scooter products" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a1f56bd4", + "metadata": {}, + "outputs": [], + "source": [ + "SELECT model, base_msrp, production_start_date \n", + "FROM products \n", + "WHERE product_type = 'scooter';" + ] + }, + { + "cell_type": "markdown", + "id": "61f20e2e", + "metadata": {}, + "source": [ + "## Step 3: Extract the model name and product IDs for scooter products" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6f1d9fe8", + "metadata": {}, + "outputs": [], + "source": [ + "SELECT model, product_id \n", + "FROM products \n", + "WHERE product_type = 'scooter';" + ] + }, + { + "cell_type": "markdown", + "id": "f8d10e79", + "metadata": {}, + "source": [ + "## Step 4: Insert the above results into a new table called product_names" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3ba5016c", + "metadata": {}, + "outputs": [], + "source": [ + "SELECT model, product_id \n", + "INTO product_names \n", + "FROM products \n", + "WHERE product_type = 'scooter';" + ] + }, + { + "cell_type": "markdown", + "id": "cb13f9f6", + "metadata": {}, + "source": [ + "# Exercise 9.2: Extracting the Sales Information\n", + "In this exercise we join sales data with the product names and then isolate Bat Scooter sales." + ] + }, + { + "cell_type": "markdown", + "id": "320897ce", + "metadata": {}, + "source": [ + "## Step 1: Load the sqlda database" + ] + }, + { + "cell_type": "markdown", + "id": "024ebee9", + "metadata": {}, + "source": [ + "psql sqlda" + ] + }, + { + "cell_type": "markdown", + "id": "db8191c4", + "metadata": {}, + "source": [ + "## Step 2: List the available fields in the database" + ] + }, + { + "cell_type": "markdown", + "id": "7b450661", + "metadata": {}, + "source": [ + "## Step 3: Create a new table (products_sales) by joining sales and product_names on product_id" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "63c62885", + "metadata": {}, + "outputs": [], + "source": [ + "SELECT model, customer_id, sales_transaction_date, sales_amount, channel, dealership_id \n", + "INTO products_sales \n", + "FROM sales \n", + "INNER JOIN product_names \n", + " ON sales.product_id = product_names.product_id;" + ] + }, + { + "cell_type": "markdown", + "id": "ac71ec20", + "metadata": {}, + "source": [ + "## Step 4: Display the first five rows of products_sales" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c86d6b99", + "metadata": {}, + "outputs": [], + "source": [ + "SELECT * \n", + "FROM products_sales \n", + "LIMIT 5;" + ] + }, + { + "cell_type": "markdown", + "id": "61ce0852", + "metadata": {}, + "source": [ + "## Step 5: Retrieve Bat Scooter sales ordered by sales_transaction_date" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7b697683", + "metadata": {}, + "outputs": [], + "source": [ + "SELECT * \n", + "FROM products_sales \n", + "WHERE model = 'Bat' \n", + "ORDER BY sales_transaction_date;" + ] + }, + { + "cell_type": "markdown", + "id": "2c413c11", + "metadata": {}, + "source": [ + "## Step 6: Count the number of Bat Scooter sales records" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2f086b97", + "metadata": {}, + "outputs": [], + "source": [ + "SELECT COUNT(model) \n", + "FROM products_sales \n", + "WHERE model = 'Bat';" + ] + }, + { + "cell_type": "markdown", + "id": "b44b3c2f", + "metadata": {}, + "source": [ + "## Step 7: Determine the last sale date for the Bat Scooter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "89fc6fdf", + "metadata": {}, + "outputs": [], + "source": [ + "SELECT MAX(sales_transaction_date) \n", + "FROM products_sales \n", + "WHERE model = 'Bat';" + ] + }, + { + "cell_type": "markdown", + "id": "7ad5e59f", + "metadata": {}, + "source": [ + "## Step 8: Insert Bat Scooter sales records into a new table (bat_sales) ordered by date" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d2f2fa8d", + "metadata": {}, + "outputs": [], + "source": [ + "SELECT * \n", + "INTO bat_sales \n", + "FROM products_sales \n", + "WHERE model = 'Bat' \n", + "ORDER BY sales_transaction_date;" + ] + }, + { + "cell_type": "markdown", + "id": "8569edce", + "metadata": {}, + "source": [ + "## Step 9: Remove the time information in bat_sales (convert to date)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c6218139", + "metadata": {}, + "outputs": [], + "source": [ + "UPDATE bat_sales \n", + "SET sales_transaction_date = DATE(sales_transaction_date);" + ] + }, + { + "cell_type": "markdown", + "id": "98204ab0", + "metadata": {}, + "source": [ + "## Step 10: Display the first five records of bat_sales ordered by date" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8a78b197", + "metadata": {}, + "outputs": [], + "source": [ + "SELECT * \n", + "FROM bat_sales \n", + "ORDER BY sales_transaction_date \n", + "LIMIT 5;" + ] + }, + { + "cell_type": "markdown", + "id": "d7a25b21", + "metadata": {}, + "source": [ + "## Step 11: Create bat_sales_daily table with daily sales count" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "abc86a3e", + "metadata": {}, + "outputs": [], + "source": [ + "SELECT sales_transaction_date, COUNT(sales_transaction_date) \n", + "INTO bat_sales_daily \n", + "FROM bat_sales \n", + "GROUP BY sales_transaction_date \n", + "ORDER BY sales_transaction_date;" + ] + }, + { + "cell_type": "markdown", + "id": "b157c6cf", + "metadata": {}, + "source": [ + "# Activity 9.1: Quantifying the Sales Drop\n", + "Here we compute a cumulative sum of daily sales, apply a 7-day lag, and calculate the growth rate (volume)." + ] + }, + { + "cell_type": "markdown", + "id": "fccfcb9e", + "metadata": {}, + "source": [ + "## Step 1: Load the sqlda database\n", + "\n", + "psql sqlda" + ] + }, + { + "cell_type": "markdown", + "id": "f68a4539", + "metadata": {}, + "source": [ + "## Step 2: Compute the daily cumulative sum of sales and insert into bat_sales_growth" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c5c67a07", + "metadata": {}, + "outputs": [], + "source": [ + "SELECT *, sum(count) OVER (ORDER BY sales_transaction_date) AS cumulative_sum\n", + "INTO bat_sales_growth\n", + "FROM bat_sales_daily;" + ] + }, + { + "cell_type": "markdown", + "id": "fd59ef82", + "metadata": {}, + "source": [ + "## Step 3: Compute a 7-day lag of the cumulative sum and insert into bat_sales_daily_delay" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "41990786", + "metadata": {}, + "outputs": [], + "source": [ + "SELECT *, lag(cumulative_sum, 7) OVER (ORDER BY sales_transaction_date) AS lag_value\n", + "INTO bat_sales_daily_delay\n", + "FROM bat_sales_growth;" + ] + }, + { + "cell_type": "markdown", + "id": "5cdec19c", + "metadata": {}, + "source": [ + "## Step 4: Inspect the first 15 rows of bat_sales_daily_delay" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac10527f", + "metadata": {}, + "outputs": [], + "source": [ + "SELECT * \n", + "FROM bat_sales_daily_delay \n", + "LIMIT 15;" + ] + }, + { + "cell_type": "markdown", + "id": "31a50be9", + "metadata": {}, + "source": [ + "## Step 5: Compute sales growth as a percentage and insert into bat_sales_delay_vol" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b955737e", + "metadata": {}, + "outputs": [], + "source": [ + "SELECT *, (cumulative_sum - lag_value) / lag_value AS volume\n", + "INTO bat_sales_delay_vol\n", + "FROM bat_sales_daily_delay;" + ] + }, + { + "cell_type": "markdown", + "id": "c814eeff", + "metadata": {}, + "source": [ + "## Step 6: Display the first 22 records of bat_sales_delay_vol" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e1978c91", + "metadata": {}, + "outputs": [], + "source": [ + "SELECT * \n", + "FROM bat_sales_delay_vol \n", + "LIMIT 22;" + ] + }, + { + "cell_type": "markdown", + "id": "5215b24c", + "metadata": {}, + "source": [ + "#### ----------------------------------------------------------------------------------" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2536ea0e", + "metadata": {}, + "outputs": [], + "source": [ + "*/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "85063dc4", + "metadata": {}, + "outputs": [], + "source": [ + "SELECT * \n", + "FROM products_sales \n", + "LIMIT 5;" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b11baa93", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b528ba0c", + "metadata": { + "lines_to_next_cell": 2 + }, + "outputs": [], + "source": [ + "\n", + "## Step 5: Retrieve Bat Scooter sales ordered by sales_transaction_date" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d21a7d4f", + "metadata": {}, + "outputs": [], + "source": [ + "*/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b7cf315e", + "metadata": {}, + "outputs": [], + "source": [ + "SELECT * \n", + "FROM products_sales \n", + "WHERE model = 'Bat' \n", + "ORDER BY sales_transaction_date;" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d6dab14d", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7ee34b48", + "metadata": { + "lines_to_next_cell": 2 + }, + "outputs": [], + "source": [ + "\n", + "## Step 6: Count the number of Bat Scooter sales records" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b42a0fd8", + "metadata": {}, + "outputs": [], + "source": [ + "*/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f0317e49", + "metadata": {}, + "outputs": [], + "source": [ + "SELECT COUNT(model) \n", + "FROM products_sales \n", + "WHERE model = 'Bat';" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d02892bb", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c9618f8c", + "metadata": { + "lines_to_next_cell": 2 + }, + "outputs": [], + "source": [ + "\n", + "## Step 7: Determine the last sale date for the Bat Scooter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "86fde4df", + "metadata": {}, + "outputs": [], + "source": [ + "*/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b95e07be", + "metadata": {}, + "outputs": [], + "source": [ + "SELECT MAX(sales_transaction_date) \n", + "FROM products_sales \n", + "WHERE model = 'Bat';" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "daba74d5", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9cb5729b", + "metadata": { + "lines_to_next_cell": 2 + }, + "outputs": [], + "source": [ + "\n", + "## Step 8: Insert Bat Scooter sales records into a new table (bat_sales) ordered by date" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a12eeba8", + "metadata": {}, + "outputs": [], + "source": [ + "*/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5fb0a33", + "metadata": {}, + "outputs": [], + "source": [ + "SELECT * \n", + "INTO bat_sales \n", + "FROM products_sales \n", + "WHERE model = 'Bat' \n", + "ORDER BY sales_transaction_date;" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f0503217", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "06983b3e", + "metadata": { + "lines_to_next_cell": 2 + }, + "outputs": [], + "source": [ + "\n", + "## Step 9: Remove the time information in bat_sales (convert to date)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c1558401", + "metadata": {}, + "outputs": [], + "source": [ + "*/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5122b580", + "metadata": {}, + "outputs": [], + "source": [ + "UPDATE bat_sales \n", + "SET sales_transaction_date = DATE(sales_transaction_date);" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fcaf6505", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c93d97e6", + "metadata": { + "lines_to_next_cell": 2 + }, + "outputs": [], + "source": [ + "\n", + "## Step 10: Display the first five records of bat_sales ordered by date" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e93a144e", + "metadata": {}, + "outputs": [], + "source": [ + "*/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d8675e3f", + "metadata": {}, + "outputs": [], + "source": [ + "SELECT * \n", + "FROM bat_sales \n", + "ORDER BY sales_transaction_date \n", + "LIMIT 5;" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "eeeed0dd", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c9137bcb", + "metadata": { + "lines_to_next_cell": 2 + }, + "outputs": [], + "source": [ + "\n", + "## Step 11: Create bat_sales_daily table with daily sales count" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4511890b", + "metadata": {}, + "outputs": [], + "source": [ + "*/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a330ceb1", + "metadata": {}, + "outputs": [], + "source": [ + "SELECT sales_transaction_date, COUNT(sales_transaction_date) \n", + "INTO bat_sales_daily \n", + "FROM bat_sales \n", + "GROUP BY sales_transaction_date \n", + "ORDER BY sales_transaction_date;" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "95d2dead", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "dc27c011", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fc6289cd", + "metadata": { + "lines_to_next_cell": 2 + }, + "outputs": [], + "source": [ + "# Activity 9.1: Quantifying the Sales Drop\n", + "Here we compute a cumulative sum of daily sales, apply a 7-day lag, and calculate the growth rate (volume)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b505fffa", + "metadata": {}, + "outputs": [], + "source": [ + "*/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c7707919", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "id": "e7ec4be0", + "metadata": {}, + "source": [ + "# Step 1: Load the sqlda database" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "87450942", + "metadata": {}, + "outputs": [], + "source": [ + "psql sqlda\n", + "*/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0053dc21", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7e0f8343", + "metadata": {}, + "outputs": [], + "source": [ + "## Step 2: Compute the daily cumulative sum of sales and insert into bat_sales_growth\n", + "*/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9037ec9f", + "metadata": {}, + "outputs": [], + "source": [ + "SELECT *, sum(count) OVER (ORDER BY sales_transaction_date) AS cumulative_sum\n", + "INTO bat_sales_growth\n", + "FROM bat_sales_daily;" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2172ebe3", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4c9d53f1", + "metadata": { + "lines_to_next_cell": 2 + }, + "outputs": [], + "source": [ + "\n", + "## Step 3: Compute a 7-day lag of the cumulative sum and insert into bat_sales_daily_delay" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e2abdc9a", + "metadata": {}, + "outputs": [], + "source": [ + "*/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ee606e34", + "metadata": {}, + "outputs": [], + "source": [ + "SELECT *, lag(cumulative_sum, 7) OVER (ORDER BY sales_transaction_date) AS lag_value\n", + "INTO bat_sales_daily_delay\n", + "FROM bat_sales_growth;" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "64ece351", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ed0abead", + "metadata": { + "lines_to_next_cell": 2 + }, + "outputs": [], + "source": [ + "\n", + "## Step 4: Inspect the first 15 rows of bat_sales_daily_delay" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d76d2a03", + "metadata": {}, + "outputs": [], + "source": [ + "*/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "035280c1", + "metadata": {}, + "outputs": [], + "source": [ + "SELECT * \n", + "FROM bat_sales_daily_delay \n", + "LIMIT 15;" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f5ed089a", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "623662a0", + "metadata": { + "lines_to_next_cell": 2 + }, + "outputs": [], + "source": [ + "\n", + "## Step 5: Compute sales growth as a percentage and insert into bat_sales_delay_vol" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f6b808a4", + "metadata": {}, + "outputs": [], + "source": [ + "*/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "feb2cf00", + "metadata": {}, + "outputs": [], + "source": [ + "SELECT *, (cumulative_sum - lag_value) / lag_value AS volume\n", + "INTO bat_sales_delay_vol\n", + "FROM bat_sales_daily_delay;" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6b3c5544", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "645dee5f", + "metadata": { + "lines_to_next_cell": 2 + }, + "outputs": [], + "source": [ + "\n", + "## Step 6: Display the first 22 records of bat_sales_delay_vol" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "95cb1e91", + "metadata": {}, + "outputs": [], + "source": [ + "*/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "11254ede", + "metadata": {}, + "outputs": [], + "source": [ + "SELECT * \n", + "FROM bat_sales_delay_vol \n", + "LIMIT 22;" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a8df8de0", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "34dd48cd", + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "#### ----------------------------------------------------------------------------------\n", + "*/" + ] + } + ], + "metadata": { + "jupytext": { + "cell_metadata_filter": "-all", + "main_language": "python", + "notebook_metadata_filter": "-all" + }, + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.13.2" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/DAT375_Week2/W2_case_study.pdf b/DAT375_Week2/W2_case_study.pdf new file mode 100644 index 0000000000000000000000000000000000000000..b38f8bf2394f839f977fea1e05953ede216d36d7 GIT binary patch literal 43385 zcma&OV~{P~wys;YZQHhO+qP}no@Lv!Y}>YtSvKdbwf4Rd=euXcjrAuZBSw#rIo{q{ z?@zw1jU-hN5u;_KV}&9;y1jaZVqzj-Ah0*Gg5u$!7qhf+F?FIBvoUlr6)`ooH!-D` zF|{*yu^?b#WZ>ZAgK~CpGBvb?^4Pf2k+$DrL+E)?zlEn6d;w;NDHO0KUYL;H zc^K&P8jWmMv8R<74xEi;)lAMkSizLb^M`+z!Hm=R1k#T_29o36oC7^ZGSOmx%j5NG z{~K^3Ym*)xe>@NVfh8ft`Jx$35xA~gG3pNo@{)6^szSUb4K&pj6f}!x+qPuE{#2hX ztW<<=etK4M?bA60Zl!YU$e3IJfSm}#7CaJb6cwUaiF@X!I1uMnL7cO-_-LNO^B})I zd@n!06BO%TSkXZCCp`<%n-kSk ztr#GT@!q>{?Im&o6E`s1kcyO4fhxB*I25_K$rq>v!6Yh1&d0>7`RZ+O_BxM{-w?X@ zdpOP*WROC_FQYFu-0jCWI46O9qf_Q_a=)NMo+)z}O;k0jXSak~Pfiu;?5UJ!5 zXk@XZ_KE{bP-Jj7OppA;D$^?D%ByrL%ba|h>!t%G!MGntm3=wTE{v_Lj`opy^TjDp zJ%+u#X>9LP!jz7b+kuZ+Zk1XTua&i^>A+DUGlIOSM*#suH5HBBOwLxc21Wk1EruNgQ0RZ zs+_orc!HMzUP_Ma!5v@%oXr`4cpBO0>%%qT2^5WTECbImBz8yv+q(wuC<8BU-iG~7 zGLq~F)5=RKdPVuiIXSp@$M|5)yj`A~uZ)y3H(>bjOI|kJ1K{2R>*WcF7V^qgysnvH z7G4%nHNOA;{0`ojG3)?&1?DO`&E(1gFW&y~Rl_`w_Jliut5r8@JkqVgPm6Pdj6%Ey z$+!_Dc3C!TZ!Ve`Bj*@rLfLXV;HzN<5HXAaol|_WAUGFf*FIE>w84`5+qf7P?*^M1 zCwl!IX05^Ek=$*9IKL#bl%uTgt%KhEl^qXHLOF zS4nk3woX(ia|sIJ?!SDVR;F2QHvm_L1)}QB~jhgt&^m&;b|5ywG7H&;(+)gbB#@+~v<2j*0LHXlBFb5(KhN zjh*|#NE33N%P`Z#b3D0H2Al6>%BKzG5$L%h=+4_yaP+t;yaGhFU6w!#q)C>*i7Y|v zsLn7*(0eUgAPfi%s>h2BI&@dobtlO!*j$+%bl1P{iSD2G1auR)V-B+8JFNd6EIjRHRXX?8O&Fl>rU2=-MdA#tj;rnZ^6bZ{m)xNEL|0FD{(V_7!y{b=k z$(?oU4=7DqF8m`OG4G9aZDuZeuwkMrV+kxO+zWh0S<$7@z>G!KX}ouPR!nMY&YsqE3#f`+@xTw4o_O<=<=ILHlvHl!mF&K zFExrk6|b4c(!EN*)K5b4`^Vh_*!2#CF)Z&=XWX9Tn`m)+xU+tt$y-{Lt^%_@s5o}Y zW^WF)%Vsx{&5Rh=8XbuEs%LS41EooEc(p{6;>f<}COKFT_jONCv{j(=r6XWDDqEJ* z(bywIX{A3%@pfHjV@2Xx5dALHGu%l~`0gB>+G)rrtpM<+VgOoA?$EB#C6Xh-j-p8!hYXPQUwy0G;8#n6t`0BWWP&4m?8ZE?HF_N zTye)@X1alb`d4HJ*EF*`K+*g-q1#V0svEQ6dzi1&R0OOoa50%SvmA?kBHqUKDD>PK zm>zxld`4K`wtX%Q`sXm^+3hvuI(4bJo6$apJztJ>jKsoZ2u!ZcJ($2_bLxUNF98*y zSh6&1TvDtPo+GXC8-dH5atSULs`)??@!ts+F4pY|4ZD~A>t-=PuI<%PO+is=;1-$l8CcjV!HN?{J{9 z;d!2@tku!O_CO3OOPN-KiaWW!EDM}}?0$pu(DGYCncA8BFGl>F{R=#p*#0X5VPIqU z{}RX%8_W-ZyuyoAy#P}L6$)S**lQlOtO+NL+rY0EZsNVaNb{D5AC7w%c`JeoG2Nvb zC!&!05sAu%-2h`#eAf3nyp{|evbS{xuCAi5-qYaLrW0pc{e68Q{p= zYqHMJUsotyEPWfc=?wQ^h0W!I!{;A337EOvXFc&3MO<=0?Y z79^2On*L1~eD7eo&}fD6;I?DiL7WD4Lx+3#EWh%^)l=%hM6dI$SJ$`Gi{tB6Z4$); z^%zxaveH>PzwU%0;^1^d)e3!ET6H1m5wXKUsDFV zLtnr@RU~ipjv;*msGH{Uqy2))+RMOmx>P|G8Q=>w0IyJ36z6nyW{t+I#i;HW^}fo= zJ5I^hbdEol&t9|l)*aC4z+$uj$g<28FLHd^$1wO~HQ$jn*i$U8aZW&aR-Dx3tsPg1 zXEMdHJ+t0^wI<@b$*L!v%>(S@Ue1b050d?<9hAL?2gc3|wY5*zAnDdY- zWm8D00F`O@S7>!{Q&`nxSq2Qp6uAcpraQ@?{;UA33`*D(6H7p38UDhRFrR)eQch*5%E6JwoC=a+ z{{`X2hN`q0>6Tw4gyeol&(u&y{GB9$I)9ta@TPKsKzxqD`N5VEY>%r%S^Qe-kk}z` zDbFH{<1wY{bCG&akTRT1bGnHh_x2!sWZ*%IPwM3-iY0IMwDc+|v3)}36~6Qi=&EPU zUF~sNXsS3!!@?_cgXk4MXHFMAZ2bJ2hRnBB8%-OqN)-{S=&pC$^>J-ly52SW;-(YgJKb`ZZ{Udg}Ek_1mcUZox_vlp$Kp6~NX^bT-V3ZK<)q zliK?(UAOvA`AsQm^aMrp+!$3lLPL?Fcu}0#!(6$?6yF@hdFT3$47EXX(y+u0v=o}{ zWCm&U55Q!=NGoVSh?k>$jal$BZN$gqitbHluLv2;L^#*wf~<7)hNk z-FRVWN)_@s{=%6o_6$FR44D7LiGPtq)dKrL$b!nS_zjUJFg$pURJEHruwb@vj)`g= zDya?81avv1`fa}x`&f}p%L1|wCTxtu5N6#U(NC;S&?jx;^D)b+ID5! z93TWv#W3nG zr%FA@Q@9~Q0m~Z?VGi0>doV;Phl!2t{v5-eL>x&Wf+_1x5Idem*f)<$eusGF&Gkz$ zs0wZ+K5bekXre|~1Sv1u?YZzTJ|1Q)YCLA>ZwwcmntVJ+^(&g8X;)U}sFnNLAz5*V z(jRb z_3(}$Y7AHbgV;q+;0t&TJ=O(Ue4n1<Y!tWPjJ(W$RDdx}!6i9a{W4xZO|U4Ni;oS)ITAMCZao%W>(sCo85} zMKmQj1oP{awtFc}n*6{?0|p-9<0+wh>M(%``nRWWU31?N z$S*WMb>|Z?+!$sujKy%_PF-M{s*Gq{I7_DiIw%AL?LVc$OiX6M6ujTlMjIg zZKI#xiqekCd(`MHfxfvd{MS!XGDRew{BFoC5HiMOwRQW(ys)6^Dv0QK<4=X6l zpOfbcPJDOoVRO7ChdLY^HS`yr{9j`U^I~N%N*SE(+tUDsZz8c0A&&KL_?fTo-rnx- z!*fAMiZ_2ARFPAUGo2q@k#^_Z_clWH1v2XbvG2Emj z4$2SG_YRz3M)?NMX8*+ulmpwInSQTQAlA!jS&Qsy8JyiU6F8&!+lAlr7jXlHPDhM& zs~3))E6R~Yb^GH~-!DNazLi~$dn@|Zv*YXOyKGy(wO)lKjzfEu8*~szg}{h+pc%r_gG>g^g#|p&EZHwfFvSUW0R- zU-CVa&H|KiH=SFPqKNV%+nh{(sYvv|im*1O!uq;4hb-}Xg4PWgM)b4i*sqAw;z`-l z#rBKH=ReMTCs*YC;Inj!zPGtYo16Y z(>49lkdT6$D<^LCT-2i44;rZ4iRoI>)`8!?z$(bFn$$VrTHQgJfZQ*mQN2e*6=%jZ zK~YspymUyV%3meocYZ{$ukJiVe-I7hAv$RW*y7vc5VRk+=|}FNj?@IxB6A|Q^I&3X z-soatl3$_V7%Fo-ogW7{;6oHVoM95gK`^OXV~9o0%eB<&0GuzQn0BfyjC-^M!4u*1 zsnJNsh^PRAG0L!Q7Z};?0WmF+)->uS%oNPG=y|9rcARjtA-Pv*j;B1;Oo$j1 z+0xrWyJRIDQKKdCN?p@C4I9EeOZ;k6`A%Z!jvIfI1f-x%@C{x{Pn&jAV#l?k9GAG@ z9Y<48mZiV>q--oald>voL@6oUkWk9ijdii%QasVG@-o`nz2x<#=B%eRCdBX2v3I(` z|b?LVGbgL+p`mhWg zp-XRM2qdGWTX5~MpkRzmpZ)=1L9bQq96ifg#dfuEWd#xYZZ(KO?B}1aks=>9ARU91u1q0Ilv_ z6~a1$#;3N(9dw$pyCi7!qrTkoh4kK}To^*t!N&_}cZ$?~j5@fGQff;~*7e&Z7*f$E zQEZWwNWYQqj7CYY9t{_VhbPxkG}{$9w>R?Os6Yg#vI;>Y#6Gy2+~>ev{0vr z9PBuhE{U7@lrBU2t39V&K^hBh{Q6_m>z6yw(L2{W)K`71Jr*^nV>UC`3gYsc+(fcg z1RZfz#1)7HEGmxA(%)DDW0)ariq%8i5tjcRQzUD*7{GxEY^b3H9ukPJf@z@cosgy| znu%hctG?k5!F-xhHgZm?GL!~Nl;-F)UJQ$7Yd*an`P8R+Mw%Cv>XfDf;wN8hFUP?C zlD>^v%p4Kk@%yfXbs@C3)~{^c^Yxft*75CMX5Orku8SfPj51hC^u=F)N%WC8l5bGb;|ZKG?)?5xYH*W>K2#GV)HI?t=|=o3|& zdB{w@-Stk#zG#aAn#EoV6$F+(jz{^Hn1oOHHbAk)b3!#haNennk*!j{9K!LVWQfPV zm6W6om~}sqWfx+q+XyD0g97d^<%KvXz~|WHg-`-KSwS`J(wrahY5vzv(1T;|bg2e| zRn7|m1U6q=B+F%XLYk0B78v_fX#?gu^3{;N?m6k|P|7}F)odn}9ld5Qa0C=2$SW-?6QpVnSSdHiwdY?+dY@+rWks~f2()lYk zq|>L48+ixTi>-o$_aps%^5Yl7rtX;QrZMVT)Vt}s1UMD$)=oTOjo#Q#jIKUBfq_F!w*tpW)}sH>dWK^1b}0(gP)6Jr4c$t-EgLMRcw_ z_ofNcc5^p8R;m=ydLf6yD&j`IBrDRwnm^wQVJf=&WrSI0Q1t{s=hpZXQTw)XTgooS z(fEM?(38^A0tZN-t-^J|1~Rv&$Tq?!Jl)44JWYWQgfQ{&rSh+Zbagtb=_(vxssq(o z??Xz_0D*EeKQj<=4~#4Y9uVT{-ZyDBf2J?Q&15wgjfqJv8y~x^q3}<2BB*RlBc-w% zUjwSLP$t%)oys|@hMmq69~Irf*Nb9_3>rR$@5K()(aEmzu@C>|&9j#58QqQD38`GW z;|%LPCSOjArQZ{+eG9c=Z$$e^T9Odx9?R zv#PD`@%a8HTaYu$$X$EeJcl2h>|$6PMf|`(<{HXznJ~TUbwx;m6}ZD?D=px%tD!n-{0SDADNf!mhAYb5dRTLKX-%Ejwwj(^ryb5DsAV<;CIZ%Mf1F$#>Ot{7@ zM9(DsKoNoU2VEQn4E1aq4Y(+HNO^X$mN+7+V_5jW$k#7oVWzxOmt(Wf7MloxyYS%B z9UuL?`((*w8bTtwOjm5DMWp)LQu)A3rI;*Jnbx&ajRr7u4@zMW#K$QN3uDme#{Yd6 z7wHjN?8XAaLS&~P!4SwHlwfiItPVnu$fivWl40fpc+8gJh+g;+POmoM61c=};q&jtkt?QkaroblTu3Ap=VC9o z;(~uJG4;n=)C^v^k3hxDRbJ5KpZ&3B$}YWV1&z0@!!_)})p=GUp6_PfpWiw#JPy~L zXm_3v;#SL@f@x{^ccf9r)L%%AcZ@}9KBSPf?y5_BQFX8ZdvlLhPop}R7$7lE8G$m*!4{i%xriWWbhMf0O|xp4 z5_!siH6qc-|H2Yyw!bA%(CsvJ^n_*_e(V)2_~ffJaaPdSWMhvnEv{JrEK1J(n&=wH zGS@;0qi}Am4p#m+Ed^X=caXXPVC$uU!{JUvS6y#G1(yUmuaurP(1c=k;-bT2hQH{z z|>S!_TSGTkOYHQx2SNn17tr&fg&^T<~u zC!lSoLn#4lzrDjBC#kxw&`*DS-YNp;qk?GgqrVHH2>TdNWU|63qXNmkBV zWd7MPVNU|ENd97}jU%vvu#mDOYo^aBQ@p1!`1@|3%r6~>R7;|?8jlyi@)|yT_nh3j z@DRWlDPr<@93FW?sE({Y-?;Aj`+>9@FOIw*W6z8vho_Q7NrgCe(mwkFoyDepkqBLeZktN@A z!V@}>4aY2<3uVtS%B5EJpaCALVV3e>E((2f0!K}8Y2(@{UWvsJV@zNtr3y5jb>uPo zpY#i1XVyxmpR6M=+N(OAo-lsy&mv?n{7k}vLv<{Ua5n50dUzG}tt87UNe?pEzRM;l zP{PRbqB&S%{Bv0$q+89nssW<}H3Br0x``58{@5HXsPb+$oUhEz$oolzIwa=fk;KHm z%(dxu;(^yKr|Im7S!vWFN*D@l;JK*uJ1weN#YRs;@#l%2P;|k4kx9igMO6D_F15yz zTXjP&wME%ClI7PDJ=2I0Y3auvIWzaEfG`kxXP0j~f znWrVOC-j6KHolk90$6EJuN#SWcQ%C=@SoUPe zGt%57T|=Vk5x}rmtAP&9&}hU`n!g^R5Zg5pr07iRrr4s#exYH7%ui8i!AO-g$oD`6Gml34zih z?YHfneYC%MgW_GfZ+&7=hNf+sha9(t?VV5Y3ug4`j|$PJmK;<1JnXN!i45Om@?2Mz zbI16csq`oKbcKgZ?Q+p8NBDdt%tfZJ$=nP6;CnVjNDPt8D7HmLMtdMgOS1ZUy)@bUi8l4g zjMUaYLCm!5Fh-}O(q}3|Y}hbjRx_Ih28I`G`%kif=(Xp6TlLZNR3Kq@>KSkj8VJ}rw*M@6;h^m6O6FQzhL%Bdo1l_1G*I|7{dpx=6Jac$(DqG z1K+Uzccu>;`#?v_b<#)n~Y5Vo*OAgX1%zU-I>MaPh|B88A*io^P|;rwut!)9&uMH_Su4 z%XCi$)IQ7~oa7MoUu1LkVBhw#o!5)!=i!9_qxBu!3S^BI#A5c!o5%ID(;9h9pXPEF z2$>+0xCl?yyzdj(MX}kY>7Hy zLj?+1&JZ7=OXBGfK)+uyME)YyuaB(3o{g~hd~o`Hf$`=RKT1%3D8t0Ugym#3sgv>W z{d>1x8a#zZ7Qy)00O4=^!rYTYU@Vv=MvO63|=O0t;wO15ec zBZ^e1gmNY9v?Vw$rNsS{gb1tF1%Rkfnh{7SL^6m)6deea{?Pv!qVgTGMEPo#SdKZR zN|4AlH)=y-)PeWA&W&pw8vm}|-ubp0zT;^-=_Ax)mTy)XjliM3 zOs1jLjDXqRaYetW{v8jg0%j#_b5)<*NQ$>tX5^W}I^teJ0zNn=K_?_MN@!;{R2)sx zdGmEoWxk_-X2XuYMyzMA_#P?lu)GS+G)Js+^0axZ-?l#H!8kOJLfi%9M{HIbB_m?J z6sG#-rUV1dI!>wE?sUmX=M)H9Ig3Y^;U7NIQ8aj2%*wfosDv$!rdsSBlt@rOMU&_) zDKZ9IQ6Tx2E5<$S^lAh93pF_a2DqBR>Ad$E!0DK+L6(hbizO0{w%eo>QX4>GNzxlc zR=vfGaAVzAG6ThOhIK%(P(9hDi+@0Y8a3vKuSiWmdxi#0Cv0HbKqF)T!HB!LS-zY# zpfII0zn*PQhiiTx*Llx<)$KT2K!gwpf^I0RAk|Zx>T4GVNO{^U(-5h;F$gJ}`&yPd zR2@jwz5ofNcIjt`zc>+8k1mMaN&^~@bU%z)W6OX6a{jPQ{O!U3=~byJ?JzNHGOj)A zfEwfy0t+^Otx0CM3ShON`$+*#%d{kHe${9XTLql2EK;Kcxm653WIAF(Y;I~kl|4T> zmmZTeA6-yokr3Goh*;TZ0jTEP(k?c1?(t$PjgX?QoP&rUCva6n9ff}m4n#UvUmfwj zu@$i!o3Yb_;TBGwBxj1wDALUb_ubbglYO4qAayw=j|KO<*lEUMTM}BSvG3!QSA~DR z!+O*&;}Ng0@cfj)o5NC4)unYIzpD$z%8JUq!fxokTM|od+9W8!_Xk5M@Zq$*)EozX zBrf0YgkG5oLFdWp2I7+i9v|8Po_?NyQ&8n#(Ve|*>!Uxk$@v#!>%2U<2Rw{AtjJq2 z93q!gMGA5n8sM1X1VMF$0-@YmdkRd!DbS@+x^}@|D3jMp-RLnk1Y}>I2S>{Y;>B|^a2AIMbS%&lQ&86)w*Nr}6amh( zZrb;~neKTp+x2F)20EuX;UY@_fTwy)a&=1)m62A1xkz>+R_S0coj><5)z59vo&I$r zHiV?FM~$Fy)AM;f`E!t=utRgk{UB)U+O!QJ!L4pZPMYwYQIlA3Qr7RmhKW{K6P5s= z`y83XR*LN^tZ$ie#`hWI24{!ZiD^Ny0PRsYhYAh2FR)xPf9Y=A+I~~ROHPQ^e<7?`-UT?VW*zXN`U3Of0-g48C@&A z#}+!zH8fuDcb#QsvrZ_x0fCAmA&q3bA{u#1fnGYyA`k5db|fb;$YKQwqura&wU`qmQd|Jr##nPt z_U+LPN>fNI%_eQ*mjEMpisyiXDv2e{$<#mj4LEve?whHe7T{H>o?=dUiYEoM?BBnB znF?elYWQ-{|K4cP(A+W^+TC$=CKC0^&ely2`odSP5;@+bX|_r=iOPyqstWmNpdepN zwV_|GNgevLXP-C=h#aBd4bmho;0?mEcw>R)jEbm`=+0di>{hRK7v7*-^SapG!|a$$ zcURg;nJO`^aEW8ltx~--Z#N;o`vo2ryoT~00Ey$j0VGCFj(>Lg;!N6YvEA;}?ZK!2 z)BpJ#D+MpJ*bx5le-5%%+5M(g_Yz;tsvD}{iMOqzw z{cUMYF*}$tA>;EK6Do#Of*7eAmbYI77`qCi?ag)9{=)0-{=U4Q_m6#)OP}D;KxP-+ zbD2KS@f*U}E1vgAxv=@Ga#kXTpp7O(L6oV~W_`&2+iokMk6=Z4x}ct#>X^bfsA8EP zzyymt0wR0Wz0@b7Y6%ocXR?QEAnAJNc%e}q8oy69Tk+Fx8=L@4=$=@$!$GXSZQg#- zjaYkZ1JJxR%`l3CuAoOrm`Er@Z3i8UoNww7)B{q^wonNq8pd#9xW)VprozY}hz*9m zC!%6Wn5g@}A%MjPJ_jucfmF;;28h4!HM(vYv+jn)@Y@w%>nj%gB?HDxdWiH-PYOJMGbX(1Tc!nVe4`2# z0_^Jw;<#LL2}?Qts=AL{w{jfgHwX$JQz>g#V5cXTN5|+3SX(}b@f;@EzFkirsg!1) zhv977F*6F5Yln5dN`+cSRfX&Ywo7=1@Lms}NjaQ7OBbU4dn?dK`6BW&f_J=5x>t_Z zwc^LH`)tY9Q1=++^dUC|JzQvXMA_WfZ?KyWEVHlC%~w-DF^IQJdm;YEw)YRjty~MJ z_Ynds@ph&oF{uebQkjUW!9&I&X5bKxsvZMl&KnQq2XKHWtH%y<$c9(-vOWU!ms)qS-(J z(__Y;T~%Qh>0H&Rm%5+PdHjdJMA2d!QC8-BX|dpJ9V$E$^|0B*v;pgwqh9&_QusBqIr0*8OVGL9e|o(AJuQK28$Zl4mS&B z!#VPcs*w2$d}W{Aj8;sxrk-~hT89=+xXx7_F{fx;PULbPB#U`HsM0BONF)LJFBuiK z1kuv7E<7_~i)$Y>H+`2Ahf2^y-{aBu-9euP>I4`$nQ`PZRr)DpFl-vjvqgiGvnH%O z6TH3g)5!)zqMJFi-&cnkiOuprsbI%aeOF&5RWn`)+1AXu^xIVxwUB&bR1V-Z>!s&+juyCY6OY~PFk(S6e+REp-m zCrhT2>kx~35Rj>?5Iou^E{;u1`KI1} zlEfsfn{Y?Im1yq&OX!% zBP^YR#P+wIm6P{dMo}%svN3J)ZoW3Gw#kWwBSH3}BUK`iTjV^XNOe>;?}b;Qd;KDp z-DX!?--V#YZv8vNVAIgvJ`eMv%%t`*mA~_j)0(f<*T-38@mf=56_+-#vV?-vEgo+j z!orz(vQ6DYNg37m_es3m8>u?LS|$XVEZC(5RR&w7>p7h^SwH(-^E48yHSMidM7O*(Idr|7+w@qdEF!QyR;n$ zd4(o8$M)#unN>=$j(&lLX?Lkq?iCh-_6Pr&KM7 zL8F@=?0O;OH~Y*yi{h@C@5OAVa~{MjxoBpwF}IImdxjbj?mk7DnJxX{`qZv3KkHv7~pW6-Y5 zEMu%*SI%UoNmFf#8O5-Siy7rEr=+xMh0VEyL7lA^FleKE7cg)$CgYSSZYq{Hb4oPu z)>k@l3nU(>nYcCHqn){QubTUBts3gI3`x?PXW%A%G)@MY^jjuSBHN=>$>uw)8M=%n zuxw4zLEDcHUo=N=8sm1&b)P?~MU85Ou?eMPx9R2P*y=SIx{5wx3_P@*PYv2y^20r` zVP*yIpyLoA5Z(T=c=)eD0$q`6V49U7gTT6#uC=Y`V?#JBSYU&{Yc*wozEgd9m+miG z!0lX#0AmjXwXm!NZA@KvUq|+Qobv->xVik_B*6CH^~Z4jqdi79QPDb>0VQrT8$1qL+o%);g=IjZ7(TIMX8hor&DcxFPA)rbN&JJ!MKpRUyZXpJzs`-Sqd#u!D`(gKXg zr7PItRL2OE$qxwgi#=doO+ALDUaFkox-|#`#ZPlyG7dQ`Rqd^I<%xXZg*)AoIi7l;uer8_ zE{EoGyz`EfTzt6`5B$9?M@VYireCYtITG={yLy+OW?Q%IZiRDs{n{R`svzU@!fowD zPUSFs+T!IToVwR{>ykT{zF8ucsSt^BCf_ent`7{iVZ~pJ+h4d{=pMgm@c62HyBd3A z4}W!X@JG^6PIBnNb-JBxx!P!$C`WH)*dsUiF=lkSXILttkt9|W&4!t@MdYw zM1qk#tVm01ykKT&Ak4A2Mh0e*4Acop`&lE|VRRaa`*A~ljBAN$AdNv^2^k}F3}aJC z1~r`IHNU-SY9yP&qDxpKJ&Bknc_hb_i3Ous*Cdqa?X@c3l(qRx{Te65yK%&J%@w_& z4f(JRB2nRUj(yI$KlqkL40S3GZt&g8$zlFOF#T6Ga~3wXe9kg6QrW6u+CQ zDj?2g4GLjNv0`H8_Vt1v03dnv5&+Q=p?`0``BJ#3RPkVX`|2u8bk`b+E_cQ7$J%O1 z-crF;=6v4Aq=t9%%VvU&@;hNh zRejs9+qd7c)mEQ@&G@VRRVr<3DXR}h@}ll%%T}nA&9BbZzPTe+9op<)JAcz}zObx<^=IHu$w%NaVRGRUtuFr~8eL9(%H+E`i?sXsZw=M8C z@h^IpIq2nc?I#aCGE4&R1<<6Fttg8tAZsctBk0s3=)7RN>zyENH6hPKG6Uxth0e~! z#}pgqh*>q?KqTew4hZIpCYj(;>gQObN(M&Ia&eLKPS|q!rn-qY`spU4%Rsv-3zH$wQcf<)fL&L(pY&(daXs z>&vl|TC;-N%{?5GklHz%9FGo;av}yX#yPJjdk=#?%31}*9QA$8kgO^KfDb|uq3Q=^A5c1 zY3AzpP3<|h1P*?t9W%uG&gw5WZJZg_SgiYOKoY%DG;?;26eajqqH@zk5{yzb91&lD8XQb=aad$IJDbiRBH^%=U9m{_u9s55#2;Ia#e)yepC?LU(d;Ke|BA&o# zd}>~|e5>Y4^7(n}orfw!n!a&us~`bh4jv$A6=+`@`}=P(6lckF;2M(t`shUj5=F%f!JbB&D^UP-2% z0qwbI;HUJP_JsUtjOFk1F_vYS^Oy2EDbLi&ErEhzhXRQpqQxmiT0ZY{dIGt7{?>fg zS-F789|^I1kYYioCNb2V%>a)0Czc57THIkBXW%HoGtw}j7(F8j3bNXa;JIcmx0H~4 zAP`VUeI$xAfRw$aGg3u@$0PwkVtG)4*NXgvK0#2>>5xpM9tw_v{g46w1RO}vaaKn# zSn$$y4DMlLI>mEb%qU2B(`bx2V8)Bb%(O)(41&))Vr`<0pCGrMr?J5wxeVSjXs zvvAVM?p`0e)&J1e&i>#1QI74OD48G~9n63*dgB+x>82SKU)unRSY1M3(^}H9U->75L?tG0Y0}zNdkfp1W>EnC*1!c<;X+JMMlrH=cR%FYFHx1c?wx5FqWTs;nl_8#+2l3PsDDvR2wyw5KbM zmzbC%h6v%%97MsJ1V?O9uk|t>(qvlGhh?jgt#v$zb>Reor{_(wpExGtY^8EZ@g|;@ z1azAqlFS5vDWJy!vqF~z{(`%ZBtYx(fqKgcu^UjaD}NX`%4*_mm_koWr^eh;GD)ShZTpYBlRt5oVjku846wY8kof)M)%pnvOI`-4UYn8 zd}49?-Ci&oa9v(eX~B1TgNG5D&AIJ%U|Sbn>ihGaipx95#|s-@NIm%8egA3QO`@75 zW$@;xgZ3ZPz85q+;8yIH}mSZB=aBwr#WGE4FRhw(Vrs zKKjf3^m}(7{rrKkzN~X%%sIw9;P^8CN~hEL01q?i!n(3&k|w{rzA zg0X2En)ucM=8Yp1rWmO4j^NacDRpZGJ$U>$?R!7zr1o@I0GbBx-~y)=Nna^c_hf#4 zC^ANFA&Zy1QH%T8ZGvU_SAsTm?zw<7{AEMSaoR6-s(jdgWD0$GL4@m>LC3Ihnx^M7NAGb_C z+H*rU#Dv)dx$UvV3lkIebH;Irh}xQ8POneof>)$@NlFQZ)va6|zZIKJF&se;RT37& z$_8K@BcJgp`AvE|qrHaC=};}FJl@`?oaM5bpYTu53O;o%dPiHgf92(F($KBB>?CLK zuz#apqjL2u+i6PgYcD13RFKS&1ZSQ5B7;!Ve=o>^to=yEU3)zn&nEDW@|W{%arccf zhu-Oc_Q^IEk5hl|F+WFs+u57514TfUtjkrtiD)6TKD@oAqG0zl)@@1FlB9>Cy44|W zb~T1b=vI6(O5_GkeI0D_2x;6`ePQ&TGf{Jz)45iNQUl;0OSKTyR>#5zqneteC8~ow zWg;+=Ex3f~<57Mk`w%RKhQ-b_^_P-pfIUA7h9b%(+7D$IlwCB9dF1`s- z7@eq=j<19UepY-V5x_Hj=`bT36%yelIn3udIcs{6pgxExS{Oj|v#$*ET;Snkr5P@p zIHKwj?DNV&zI_uOOl%w}LnZszpW9+BC3`&c==wQP$Gr07z(--+6ndez&^$dbA&D|D zj&)^D^>6j8$VbNOXH$MX+QixeL1?K48}Q!8HFhBLSilTXdL>;(!c&HDg8!Z>3@!U3 zbu{jn$Og#i#%!sAdE06Ay);!43Dsalg&xm&qst|P!2F*XkNy9Rj`|OF#8OXh zM-Ne14+uDzo*W1U0@&eSaR1RM|JRw6jqN{bQtM=2c_lUUVO`i!q43od)5^q>>&~`K zKZ3wJnDV;>UDpEYn$VFgDIE8GzJt3lAi=w!tUcrxQRYd zqn?#Heu@R%=gc2(l3tW8eo#ycFv9r&&eFWPz)N*-#!! zQYBnu*oz8m1@)|@<2~%cT3DEK#i!3tG(6EkW0T{0GFn|JV@7N$+Nd==QPLW+-11`8 z8rWo!V`KCKc`Nv2>FdHM+^XYJZ63=7dm?>hhn$o`=}O+*yKIj(zo`Q=IAbH`h8)I& zS!F9pMaITJV_4-5)=U%j3~0DwikLrT+<^iZSok*dMwrkHP6+VbdwZ6>;(hze%9Og^=T6 z4}?evDC6I1UZwphK7f88;@n|KgdL1>S1A*35_KZRq0tjXVtf}@9`aFz!2QnU_R@p? zdO)AR&b{c(0=XAjJ-b++DOBw_57fgZwzfWc#QF$j0ed5z*WFj3#%O-rF>^Mi9Tv)>jdK z2*oPX_<^yD{GxO2mn|QW5rIF#hkF_C#R@h5r>IX7vxrj@jwV@EDm{3`M4D2%7ll}C z#{%>9WMZaFDwKt`jt0WW9uHy@0AT%kdy0(T`>4r{?AKu;gjQ;?fGFF+tOhfYp)*Z6+N3t_$>VtnV9y*xp&4}b8j$herBlr?1ng;}F}+L#=ovAsFFn65U2p){_|>Ns!U z<;F{N?(MxL6pBsyqm$*KjiKQW;L7kIs}G05~$#5cZ&fYo22M7{_7)-gKjd z?PEP0$YgonE$**pi%s58LF!N6Ajvn7JEq1Rpb0km*LdrArx){s9Z=D=4PLU)IuG|D zDeFOr82yK$6^q&;5fd9M@koI2w(_bTzsTJvC7FmpM-{ouyiA z?VGqh4IF4oA5B7PTdPuw-qA3Hn4KAFIklke#T=Wg`548LU* z4nsE3dxTAAI#+1JO#;en0p)eAox9Bn%iw`kbmnOyLG>Trf0^AgY{~B%OnQI3XV& z#>!gjl6E0wroQ!xF0+ITP!x5iT;s|c{yg-mO+1W{JZ+RK$WQ8_fClHu`RavuLHD|a z9edf?=4odG%Q2SIU+i*Utz;bg1yPYe4G*TON**Anz47b01>m)K4nLst0-n{z{hBXUU z_ca+XithSzXAj+Ic6+wx)HlsH4zRwV5F{uFIf1UOd*ugMb1ROqe)$Pu@a`_B5A-+N zRL(9|fGJqVLWJ*7My^F+BSv2F(~^g0W_*?SxA<4P>I+){q8aXOJKEs@Uqfz0H-qB4 zDICJZMF~oiYLFkM96Pccic;7+D6%@t5YN`=PhotTd+7KFF16{;M6Ojvuiv((k6m8YYswirqJ1pymnMNTh?g&>9Ui6awhPDuW>-sDJlcq}yEX z$@t1jxK$}1!&iPq%EU<=PbEvD7%`f+C}K4mZE+Vc)7X~p zli_?Ey*pH^R%^_oD;PB>;gYF~h!~CKg`EPLQh7+~JJ_oPC+0u;upjLVR{B^NP}9^XCK&&a$PE_HpHOybxY3JMMgK9JXV-v_%)vHCDo@td z$InboP0c0>KRv)s%N-eV0W{Nv!x*mnAoU5>S!dAGgh$4hA7iiPj)cepQbl}2tql$Y zb^H;6;N3a-@U-VSx~oE&d6FlVpTj-Tv_0fqG$d$$%obQ?b_-8mWlU8?nM+t!p963g z7(E-;jtoS#wE9)%2)a=^Gej(*Z7E8r*z6P7V1E$Qt#jdZubkzf%Kh8&VEVMOCRuIsBA{6=oVNa{cE4Mjr`hnWq{H$ zvxFoEo^~u|{p6Y!CC_3p#nH>yr~_h0gGQafTUcURDC))FtPt5TaS1uVSjB7&p&J@f zH8c|omb*j(kM;!-IGHvE`VfZuprk{~$me_OXEKk78Z;NyZ(+0hMZ)z4fPoF&ruMx0 zumKZ)0X&G))b=US@9y7norC0kinTuP?@}3Q<5k03SP2^|b}$-$m6poGjtsMS+C1wl z1u;M8QPn52wSU2gkQ9UN4(`gWV(iqgvyWM#-_wBlp-6b&sZ`jKtOsnNzK3?4O%XyP z`x8-SR(q&{*Q}u5#KQ-1_VPW-1DcQKi8Ntj?tR@%JozP^K!0o%bai+mO-SCW+D0cO zN@(x(00UYBp*X#mOLD<``MMz@9dPyanVolW#*yy z{ng~wh_9q}<}jI$XPMK4mYnrAc32}jM#N=gQQxZ{3r_AXe!#k6y|2`^T6D5>;E?z< zY>~j>=Ky*<%!WPuIKDGNj?^1EM?TS#KP!b&Vzw(#aOe8LI_w!~P_E^6>Ku}^t%&YB zOe6&9FPnDQ(!i#D@7HezU0rF*jFqG!<~>e<0mbQHs>;*O14Rd>mUroWJlgspeN9{r zruVGa;qh<@gtiU%H{{YF>(!yQIEydgvUWlUQ2k3@6p#a(wc0wGF8T=}qa#vVy8?9D z&X4Vhn}i^3hcdLGaLcS^>!(jCAvrknEkF%>Xv~KqM!vS3>_0qrLcyb;>_F)3kbB_m z>O$n)ac=dbO_QXN@bK1fwtZ^~t78Q)tiEn0=%?vTLs+;^5YGj-U^l9w!V?}To>JA! zBB|4yw(-ovAPMyo(KKZi2{)$zGE1UzgJj@2hB>n3R>~w+ZXNJ!s>rB|I~WJ^Eh+uI;n9}Ul?c< z4RQ>Zh$6QDN8|nleKbd(qLrD}Ug&M!3+sUmB5_8KYD1LBu?YNwjJ_y3AtvhQJRGl? zyufuB=TIeL4k;xy%F~_G#$YU6pKvx4+Hh!89}#IYp+>)zlJ!*lJhf`ar!zK8$!H3h z4dTcnU!KH>&6D)Oh+^Q#?l2Z1cq&RWSK1=IN>s)OP9w%vr(`!%ky{h;FrZ4!_BYy1 zSx3DaAu2Tz58-0Iw%dyjAR~Bt^z3s3DDr3pR@fmO_APrNYQQ@%1oPIf5^um8{7@MC z3ykknA6j*4p!x-tSB~3-pZq@lDYo?IFPbZhr3I7<_>SIKGqVtl_8Hlw-Hlm{>Ee1OJxE-dz2JVR zyLp5b9TPd@5q?K4Z??cqyf>#Be)1Pp1QE=E*&2n9{X2r0mmB_Qsx<%TJf!sF=8Qc! z3*=(Ev9F=II&aVPFF=3v@u`+=)q_skTXO2b`EwVw>G;@_DzlVF(qs530|!xLm-Y|V zorfJ(bsRP?1&Cp%fIyFv0UQf{Lm+3)z>A656UcBY#!pwP{L~Iu(21bd+|Q5G9#T?b z?3IPRyNN0CJ{bcaRbkF~oUV4-Fwv)JVr+M!T-_O?8qaHSTP>!Fy#(pC5aWU;R#;z% zBnKq!Ezf7-4ljnI0>|lBqfb$u%K72k?Lyy6ZWjNYad!-REr;zRn(u9MP6mF1~MvVlpZX3UF`rGCFYRoYq&a}Iy zCfYy5KT99MJmAtZd(n>TPTjSw$)sc^$I5?M9Fi0dHWt69^(6$3DLrK4EQe}Xd_`_S!cmgk^)+1Qy&-s2F5852o)Y! z?&%L{6fxkho|(S6XNp_Wf8K~)Qd!*yy?wT5AR3%SXzHO~P}zV8RXvPfoZNTA#c8$8 zFYLewob^NEOc_G+-~wY*^GDaMGArUz90DJw+naRN2O2nOaGF=aIK2hH9@XXD<>F!2 zPJkQH#_qz>$b{YH0yuD9_h$u!bc}V_UR;#%T9`3oK>W@ke;OVXUV|OIfp9cr&lscm#i0u076`LWG0r(ybB-CUiAgr`HjV3}m> zU2EO`?#;1jx1rm!PCNq?d!7*cEwFB@CoU8`q` z`0UMB*5?UnCExuI$huqJZEUm&a~D5?dkE)L+<8VTI&NYjvOQpYfALtj@wcG^O+%ca zFrLL<4P)V9B3MeFXf~`2w$!pAHcOuyvS|EDf%o_YS){XkwWH7V2EZ{lJCO1v&|+We z)*@URwZB5-^Vf>k0*KJuFUx3y(wv za+*by&AvybpB4(r#2Ax`#^9~#vod7}(w4)PA}U;TKfC9mI>)dz(uY`^J2aJKmKUz7 zHpf$ve8CPEO|2S#-WKV@?e~^h!L)|j?e2LU;jUnPk;~@G-m-|6{ax_*>o@1%BW))d zmcc~l^U?i!J&49jq%pXPA-2SM@m@tF&s#vI&dXp7k6 z2VeZO@@Ve75C*s*dlp82{D93tq!-HmjDk}UDZE#2Nh3@QeNM80u8s4OiHVY`*Php# zh{*VQy48{SRi-Pc&_<}ra5@Z&_hR!&<_``ZXRAken?dkVv(v8}d`?(-wiyEQWr&uM zW_xdV4x+Pn4m~i{?QICtp?blKv-ZCJ{^z63>n-2!wa4)-o1w8)0SI^rkdfhgwy3rU zoNaz=MGprZ6BFK_S}iQptuw4u6t(iS3d~2#EO@khZc$hodt{8qIf_glUQY6Q6STo@ z5~SUNUd#Ke$PD7IYQ^MXlMH0wUi6ng)^VDz_xilXX{T2wnB;G8+`mhY(Q&EKIMYl= zbVZr)=L41HT0gkX3-KRNm&S4?v^Ratd-d+BH3$!_j{JB0<@Osjv`Z*u%7!l9#<~T>UK8{}08(>=VB#m4@g8i4%lTB@GEK0b>GoJTwU%<%%y7zD) zUASyRyC*kR>h`ojsz#*7WUA+q&}wQRq0VMNY)2>L2Z-0AC8nn) zv|u6&K17aYIv0$Tm~(SUwoba;YD1qF*IfZ;eC@AG4s?}q)4M@Hv^PR~KS{?py<&<; zz_hZIqd$VoHLVR0HVduGmNU+sO|>7~P6Hn%TMSCNjx(yTCKca8Z=wBzFI0q`pyH`J1Fa);K#Gnry( z)|z++hhsSi3wRk05vUzsUUA`m)sDC{qOZ=a+4|3o&#)3 z%P;5r6p3zv6A_74Xz&-R8xctwqV&)jw|u8gPZMfu7-wG5`F3%;5Zq~l$97)wB4iKu z71&EDWi+vFq;GPve#}gm`s%vwE{zVS(^qU9Eb}HSdmzhyDrE_lmeM0kl`bqE8`Qt| zk?AZnkvsHBsTD!S#y)K_R9Y;7{=?uar_v!2qan&bb>0}bD7{j~6uikias9n_;OiVM zA9595J@7Ezw|9ti7jmN{J!GQc-~q7K@z+c=>G)~Z>5FcK8R{MW;hw#d_Qhi;k1fM~ z0#nCJ7d&44lU;=ohlH`V>yHgMw)la+7qL8UT|#N=h<`{(IsPA5aDCd!h2J&SHZ-(= zWcHiEd;@oTyJxGjHI-Ao^ITZ{~a~^49Oh$4bf?J&^K^Z z~6;xE5Uhep@2v($(Jo&PXMQ+?xg0m$lL9(lls6H!5Xp5mIc5t?seI zZ!HU@@hS)zPHtKuj?VKeTChA{`n7faf$-zmLuBRk%_Dm1d7u=6#Noqat3Pym16T|k zKT2h52quSUd_9{|x$V`(G>Vp60a`e2p$sF%Nc7kK(=*e|lUPC!)#&P55Kbt<^*O## zDW1X_Iv^gu^TwWE3XTS{^#cMu=^d+jhJ!5tAp@NE8;IZxrOW8zVtzQH9@vGWEv}aI z)p(Lt+w2J8!h8fPYf#Q5-vzEA%EHgXnNwoZTYJKPTs%;|sXFI8zdfU+SMtNp7PU*C zLLUzs`Sq4lfUCm?MyL4*=EYPj+c1lx#yL^k&xROy1Jy;#Zr1*YVnZqbauZEddA}~( z#$x|TZI>PNIjFNRSl}wvt8%Ipdc(1&6JevQ{ew3U13RX1g19cx|KxlFE~84s!K&TJ10JvyT#!m7pSoL@bKh?ZGJ>B#}r$(f5yUR3848%#wT?A+BW zrtCTBW6dYNRrAsK%jw=0C~dF+F(WB(MNzb+fn0ob;d~#lgPiHm?L>c5sv>lG->KC_ zyoUPFX4qV-n7*45z~+se@AD0hT!c zF{w}0ku$@D(FA9C2q4Oo(hsZeh!Yagx4?2lZB4TB6D=bOJBTX5!rcm^d!Hq|Aj}a3>)u0lJt8B`Rym)4PPLiR}Vpw}Nd_d0U(j(-s5nyz4wx48 z@}S3r+pCzq>3OM>S=MM#NWmK-)c&la8Hgs~hKwR!Y;He03J7e{nWY`BXpG=%ntDrh z(|CXZHoN|>Ex^R~YVHXURJ?_$Fu(qoAW=vjRB$&7}DPa|f_!so`8t zEdnT|*`NC6tZF?H?1NHR&>?dJZ^3APFjgImlUa~0M8qV2`nWOEvXJtg^js++Y=fkh zE21CWt`oeIIB-z0yP*2;MkwvkFl3>yI6RgM)3PKBkk3xr%@ugdqAI0qkKd>o*=k_r zSiE4lfyk{-zu*WUO2P7-w{e-O+*XwDT&onTkT7ix$=UG5)HlyZFR$ZO(k)}_7*7@- zK0W>THCT+cMt0d%|NgwnH`pxuL2&yGtfObU{x3B5KbE@wFBfh8W5e_>g&wzZ!{>Q_ z&GSw6-S!>p50VE2A{%5qyy7*hdp)r>mVJRscl zymLvj%DGu}&?7YhT|^*`OPro0h@Un{Ti<~+D2Rl=B~Tl*x>QB07`sYESgYODQ?tTb z)0M8>^STLOo;vL~1=`^Dw7S_Xy8}gEnclec+_nAQxebkg!-2ei>n3Nj(xQ;|(@Vd1 zqj7KGBC503QftL@^*D{b2PCT4y;2X9i!vyidk%Hzhb{m{;_bQy$xK6C+;?RJf@r{54P57w*w3Yy$bbcOc=}j)S$Ny3`Qpl#Y>mU%`ivr_udvwJ~JJ)P+VFf zjXIlLjeU+@qWswJNyG`%Bgh2BIh1-Ous&r0X~|0dgjC`&l%nvUgpH2lV<>|;raMr@ zd%$ll3J`fh2k{nw7X(Hd1mr5qehue_Uz;u{cJg zqi#B%E{#McIco8w+~Lr{!W48l3ge zL%r5CNEE}@G@2kGf6re-=n%J<;Sx3P9iJW3qQbH4oO`Kpzld~Wl%s=M!U!QJOGOH0 zgKc0`g1|*>u{K%33FbLm##dbX5A?5NJUz0gxEZ-=JBU6uQ0(P4+!Q}aaJP|f4F+0A zjN`NAX5|*|FzzzRDl_6(ckfGhP}pQXZqox-hMBFO7S_GwvY$9DQfYcC zdM=%R6x)Dr`%`$96(}sVW5kR7$OyfemDhO``hsD1F?dz};?{GCc?6;7$V49FDrD-5Aj01t^E$dA1_QWF}&+IP%;($ewyDYl(qgHCahff@rfVi3nr$+w~$l^7K8zzF6Q@)!Zy5oA?d^Bh0mrj0S}{5 zb}JF_z$wB&p}TE`Y6C^ch{6t$=DvUs}DZ@LvI*R56MAQ=&v=JhLD^v`6H zv4WHm?&)C7BVPLa*h;63qZEf*(52%i{XGtC1Hr8~7>mfDX7((Q z(VR39wtz$`{WH;ZXl{4q6Ba71W`&yN-N=S!arfl5+s$T6AvF?#2qE{sXIs@`>c zy)@3E^N_N0*3%k*%{6Wp78hZ;?-q$3Ms%oeWW>jXfh%U4wtPJ`BPTvXiTv#3^x_1# zIv?f(Ttf>w?7@^2=DDbGBm==2_o9H~MFT9f)0{AJeN|Pq>42V7zsw~RIu8nW9L3Izx==c4Bu z%%(15yh=#Ro!*4q^DMNE+}X{w((k4fGuDt^+XzV1ZWrWJe*GIFNMnq@a@MFkpDXM< zmCB3fL6>u!?tK)dw$nE67FUaSw&7&0=;%|=)s}AlzPqIkwe6*XtU?FmJ&ezFlYLRl zc?rWoLmk)e2xtQ@?8U3c6A!wY8k|_pfirI8oA7C+$-=Jct@`*Xfo-{)4~r*MRQfme zr_P!Jsha{bxdqwlo>=BL6mk9TJw0rQw-~h^itj$PmhNiDXU|`YB;6JmVh9k^C+m`C z2aubRVA~0A@8#OA%-roaB;lBkyPc;^Cw3~xnrvWp-k$SMY@csuNY7}VKEABkTBmXc z8EvQ;leLe**?e30;cM@`Uk|3Z(FsRW5rxVgi5fy$mnM;+R^yK7M+eu_;e50mceAs6 z&7A(vY4yvqo>*V3xA40*hX?8X0XniPnq#r*oB1*>nb(NM*0fAB;DI_45fw1!nb;rM z0>y6M-Gq=msURKe&w5;;LOe>ZIxF-<^!q39I!0P3sy2H=x z0|O{o;w;)LM-6M0+5Ok&JzrRzH@kW4hYm=mF{+xf)aF#s&~)fl<#&>c(2TD?=X;*6 ziQptKLC6E_8o0if*R>zZ9_}iVc++*XEEa&Pq%GnU#uyU^_s$yGW}Zxz!yV>I?hat! zL>Qn6V@)I%!^h9srRs3=lc5YmWt|%#-rjf-g7v10nm2f!%%~+E?YC#(r?U&pGhI>NoSMmK!+)PXNK2k|rN3n4$K?1NmcN zG;ds-xUf0R2?p&`2`yEGiJ_q}9SN=(F=*g!L^l>_*OGXC;vKPP`Rs<)+w25%-;#F* z-6Rt_m|YL%kl4X12Gn+DIW&Lbn>+pAnq=EpmzR=@IhbL<2S7=c5Pphn#c2z_t?x@j%nvw9lm*_}L(r!;!3ihTBae{&3^~mnA^fVYi4oMhLu}RYQ_r-QFO0 zg{t`AL5EHTwxZm~Y>Z79AGaK@fam;hdM|;)u)o%?-xSN)bhKM z+Sw1;@6s$^E2gDkNJ_#wC8dB`rjFL6Z2fe5YyUn8G+YxXBJ}r;-&Wp~3v?<6cWx>u1wi-%(!|x>Pnzg0-k}F+EIsXbj?*(5NE@Nq2ZJ)NJYmv?KESUqAwXt=qA;^ZWfQ!I_?dK@?s(ewyVwBhbq#E2&8Wha_YVD{;45yp1rT@ID=XrY z5j2N&DcfA%+L7Y2KoE*_oj_28yOmUy?EWiKUoz*w@w#|f9IObA+y*f$+(X)i|I~Wm3Vsi zdH`sO+VT|2KY$*eld8!SvT)(M*rDk`bHO9veSRBNn+1J<#STBQ-kjfg`RaPgAoCL? zmrdu0R6Jz30%_aOQP7cFqAYi;FLob|+=2Y_w9e(9c^%l@dkfEA5~^*uPv>#DIC|fh ztMa)&%=OIcAa)75`{ng==gj`esZDdZX<|)uA<)81AElodFC`wXduF^?uKpec^~ zhSltenV4Of@?PYwX&w%I!9Tsj^mRf&$2X>d;hhsEFn^0FnEXW_t`&}Kf5weQc}rP= zGgk3i)ef9D@cYHK+J`skngSPa2tmMPXlEW+VDHc9DEgM#=v`e{YX+MUbmHz^ z(d{R|LcZ%%6LR0u0cLh!S-YoXx;tV^c)jn)Vta`-(Oiq+TfSRs>(~jCDyIhp{91Xh z5279oC)g@mIOoSwZwT8rHxr;3EiF-gAu_2VM)SL*cL9QKr=FnxajACzmI`G9(N8qS ziiIuUi6hgO;}*RSDB+u2_s45L2q~Ft4;;H!g5QESrc164<$kqBqZM|Vw04l_2gV*9 zUb~A!u`sINJw(6!DuVC6n|1K!M|(SgFdQbfwz$DIo$hrYpNQB@az8nI^BxccKH;R}gPKGdy^nPE8Pu@0E}b_sf-W^EU|+dso$6x>PnUKbt^%5H zaGiBs`}~Dic=ZOey_FS0A#$<5;!vgY*I;v2NX3R4IM+~aZ;5KiinLQQRl7W~1l8YU zwCj(0gi@IN6pb{Li`(TU;7_0v?pjE-2kcUIV9nh5$^)O-JNuOhc=u2By6!)G7CU9@ z@qG2e-3U!j*ue>-^M1I|ii*O?No(chlgTQk=En+B#i87pOR5&&xlXyQ_Q@TMt+wd= zI3Z=1IK-Yy^q;#c`jHyq9S}^nbYavv7oQP(MnXtcQKqQSca5%*I06Tqnc1!%PN+D2 z+btfKxHSoGsW@JNFBklR#CTVurX26n==PI)zh2P3*UDqqjBFWZ*L#4CO&qydw1WQN zgfy)EHZ5tVrt;J(jnE8~ABJkk(h1DC)o*7lTAbAWdrR?9+RRqTbJ~2&yHr_G>imH! zI&7kXvb^HTF571TyFZmL$X!iHoi1u0;ez+E31h-+042*F@H`U9#`{t5?7fIAuUtN# z!0}7^RX)=ZQP#UMl6Jv7!kJ&KKTD;O1<&G~AAaytScc}5N9c7Jdk2v=`yo24d7=`_Eldxb8&oZ6R1>kP zJWQ?Yk4IL2|8|^dXaEb}i};*;&C@tzf~OEdIH4_78W9vx4S_{1BE@o~FoxZqF8+ny zye#IS0{4D!N@_bDGM(bwpv~x<^ixwR2Y%NTGrT>{HPI zdL(6I?*Ux_f;hA{rLsKdn3WLcd?^i*)?o3N2x974Y=&CK*3P~qujnj zZ9upCnc-RHUGbRM)uS4|R?m0p(X)o2_Jon8*(r640v?I!%>IJv%K1glI}toN{NY^6 zu!H%a9W5!hQ}sy*~l8Tey+@n@`LXI^4#G zW?r%jN(*}HVr z9Y$X$+egkP-4mZrK@sA)pEBLHHa}&Q&NAYOnts0?iUlc7F?_K+^+HMjj~N*4QgOa{ zwGvjwQ4{O%uAv?CB-Y;+o;!7|&Q%9_?-&BDY4Ggm?!2Hne^4%2%b6-!po+Rki44tp zU51t;b7^_?)P@VFC^>o$|IE(N(GczO+9kU_ao23s8ULLugZ+SOHd>5qR-_*%@acXR zo_OcY=Qm&0Jb`=cr60n}=2|}%qegs^zLP3TjhQqw9_4yu8~N}E7rcw=#26>U#T#Nb1u@Ky^+K z!6P_INOYX2*YfZ~?|Etqpyrl$d(%AvhUC(F@$l6HO6CTBL zs@l8^+wB-@dv$h4XK&>w?YL1CnM6Gj`&gDSN6~_OLzYpP)tju~wrcgVcXgfELM>AV<#b@mPG2Zu2}?zdGO1rw zF)pTimuq&C$6G9*`JLXTEOsqGeS0~^1C2$h##s*QWS!hP#pxy(PfoMZ+RpT(InQ7! z#wC7vGRLA2_e_Ml%H+m)XXuC3=P|(cdzpA!f0^=iyZ&Imav}TxT>b_;rq5w+CMj6i z>a;fFDdOshtD$~$Q9&w4&O#=u6DJx2E+K(hl+|u%eq&RxwQ6wCqEPf#fs!f3-rneI zYW-V`>q2^!o~X~l9!0sh@$-pUcc{7E$O!+><;+wPY&H=Bxh_bt?T~oabD!$>0XVnt zdLxY0K`(aNw|fyYGEF}cv*`mRCSO0L9pV;*KY=XZyqY7ewMCRTjiFuf9=T}F+)0$KZGyUh3!e(m%vloX~;mLu&{o@;!Flb!$qAEte-Q0>8dIf z_oJh`Y#BACxAOfW({*2AZ@Mab|M&*p5fmZ)7asi|lCl2_PyZh#RR2FuPhUgZ@nYw( zImGs@$^it#R0PrcFW~>!qyKM7dRFHDkiR)A>WV8VVDhC(IMuIAQJFP1jN zpY1r+9}-nN7n@vRv{A>x4K+WVZ3EllZ*Z&!Td>#qHYrMM9DQ%Sq?rX|0(=bf?#0OoqlwQgA3^?)XRU}BVHZ5Y5MLs z2)Q5JaPdAp{lUa4IC48>!Cg~JGty_@wCtIS<6J6x#k~+U*}42+J^5B|dMpvC=%eky z%-7+jC1o|!IQsZ;RQZ<2SSU^X+5sHgs!_b7jp@nKHAXjU{A#`QI&OmyW3y3|ZS@ZJ^%4=u_@{HlJ&d$m8Hmz$jL1XR*or`dBo56PE1jmMR z6a8Q)b&G88qvmFUCKY;a@))M&qvor7tuih8_@N{c8%4`40#O_2)xZ4O|B!L~pWh7w z(?8`JmU@`}{6Iin;6U;J0{D-C*?;?17#JA;LyMA}_Ha(Pwa zh=Tl9mnxh#iiimdnFj?H$`Bc>qic2Q>UXx07 ze4h*fSu>Oxk{tNGcSR@~21=X&jYKi1rZnJ`>_=I|R-42k{rKR0`h;>+t^a5Ps*vTu z;O=rAgnhyCD%Tj%nFD*Xq9a@=j5^{X9BD-8ae#Cu9ACFcwk^7>P|P|u-rCXwT)660 z8utW-nc6?<|HI_I_u?QO20AKlLi74hrbaW2dMB&A2`HMVyae000>(&DXI_khCXWmM zx_>YyJhn!JzJFjoj<;0`l6n>`I`e!K&Ip^3I|a4WDu3a!D1)D);*Pp=G8pQ-4t!Gk z86}+*a3C;HN`YVEzE`LW8DLhw2rG1y_PB;@+638BvWp3|TgjXuDHR=EMHLN;gAd|@ z`G*y%sk6KKzVFamb>v7B3R2Om4Hcp*g7pe^hnJaq0;4kbd4WGxL zCacm_^K{%#HcL4+HU_c2$Svoh5Rl-8heCaHtuR$({sH6L4bpjd&S=$^!aFvFN-OMT zYG$6n!G06>;{0m>?KRX5DMR5?;j0}c%aTY*KQpibz=eg#%k6 zrFRmU=;?$kt#o~o9^W-7Ju6C-Jbdy_Wcp^3cUVm&_n=>@;Nw*%lUHK-YNhCvmTZz& zjP1_%WNpum7vN`0LfD!@+y$Y#^cx0V%bI&#TKO;4#C$3h0JAK<0$I-z&(K;~8JSc^ zkQeCH&=q6KQE(GxA~anJ``)<_V6s)Hs5q<&!pAvQ*rapF>9xziUU z-9b`S>YXn?-tlF!Tr%!$7 zz}_0;BVh>_$)TTy0w1hk5IQDkxE`o(;Uz}wY)?T#Qgv=|f*i7Wju>RKiqk@)J*0yO zF;8&*1-^4ZIs8B47X$r2Ci4I1h{*WAMZ~|Nc0D6KWj%d8Jv(_IAgFX;gnz^SN0sv5 z21jOkmj4j82dhJ=D=jziGLVoH7aU?%a2#fp@-I^D*kx;;!%~^q>UbzWz9>Qgsh5r-@cPgwL&aCzi+!S^=LbNzG>a-i2N-qdiAeI!3E;>jG zc!K~VK073q`e5+^uK4&tY$#-aV%pZ$KCSa7Trvs@3OYt*%E>jCWzr+7TKlYAyJ`#N zPxx|3@c&oYR|UlZW!vKJPH+N^L({ku+=4s7t#Ru>;~rds2MumP8%dC$f#4F{-QAs= zd0#U%^XAt5J$35*oT^p3)?UIq{rP2Zx_XeWdayAQ>dq%4WI$Mcg^8vcfy&AkJQxRW zl2t8?PT3Lw(27b{VV~*s$v&X=GFyfsrsQVvJ8JuY z=`B-N=zJ0A8{r4{|D-B8Nd|!g`T6aGvy2=+Bvt|dzc~_PCtAXBu_s#$cHiGZU5@+q zS1Qx0^jpot9UP^;fz$~LuW%!EBElSu0~7XNp9+8P{V7)kse% z))?hWYE^{ObL7Zdlq1lQkDR@HfK$g*K>KsK#*n-*Rl5z`{N|L3hj#lK*b@xLeHR=l z#L=KVR5>{jFuE8y=s<+0ddt}!&*a$IWZ*H)L~uw!QDMjmiU*{+%I+9ycil9g4)5NM zLjk#s6fw*1NCJ0ZmsB~aqA)(Z@_>z&BAxKE%zh)Y?|u(Uy|u>yJ3On1(^q%2wh6X2 znaBouW#s%Hg|2R;(dW38)|JNxyWAeU=000e-yJESU2d*Z#-&pxmhBxP|IVYiD-RXd zbSo-o(`@>2=x$GexdfA68@3cF^dmK_!QS%g@hS?h%MISGxV==DIR2> z(P{Iyj7IL>cvl$;joA}c!HcEZ*tJWfl}NH%N>BTwiA|ZC7Qby|Q(I|E)m|5j>x|xt zmA&CMzYNp+{wR<~l4p6^qrhjf^1C!ETAjrxRHac=W|EItAl|4=#of7QaDkCsJjl3? z2d6gxh^z)L28F}EF&kRk+!?l6zqZl9oC{`}iw-wKP_tuIT66#wS-GK;Yp^Zri^6v` z{8^TRQkih=&Dhd~nz*caxF_BIVxD43KBzFn$?=cee|kS>iJWHsrsKSjq79uVQ&P8c z9yg5M6~W7uKn#J2n*j~v?F&xs2n2u6%M)sAoL-t5U60`pY>@+OFbdC!3QibfkvCps z;p_3A#A96JR$vM*x;C(LCGddjcScOpgZpGdq__H_(q_68nO<$(8rls&QciwmIkG zS9vzPp4p-$Ns#MY5|4Q)Yzgm}oXRonY4QS`@Ds8-fU?Y`5!)O*!Sshet_nk3#o^x#Pajri}AL<{8_mhj5ZI7Lt zk+gk$n8#ni?i+GznVNETtg=tbDHGIjG7n_w_FRl@dk!M4@5ICekI*Mx1!98gY9;S< zR;Z`ymZ)M7Yhyi33R)sep=DsL5r0Uw6Ex_mvCW#l4XzwH;qJ>xhp{s!H~H25($9t~ zh8&Brs9wwI5$l|6f2BpHvrI1vc1KLUSEQt(6W-?>YcK&d@t*&kJ=z#^c}&DYh4&8q zG^H^O8Ghz>%bQ&fbEJM|(ol!hW?*JBldA$pcQy2Ls%*3*1Ia!iZ_DI&58uPA`>qY zRVHEiWaJjIv05 z8cFwfJEOmRVVJM(kXXD~H$;FP3fX^Io#pFIac}%-0w$q=QINx+I`5Q}!wbgsb=@Ol z#J$34(U-Qz#809fRatf@1|2TrGpvLRgysh@me<29`*HNN-2}}l{Fp8>M!3VL93i{I z$meW$YUMyZPQ+Ohtplwj&Hht*e{40IuyIL$xAUDx zodsFS>en8BHe7v4S~h=dRh%$D-M75*P(n)%hp=Yw4r{=yeXDORkpy+NFX}X`-^K7x zDtYt!yYx1(B9=}r_NmoVE2#1#x(Ay%8dqBn`x05hcXG$E(97O{9nO@aBurFr+ob)O z0^el%`Sd)qK`B{=%BhmKmy?uxE=!I`hLOQa892Tg9Xo0QrKXkoe7}%}^J$nnIc|*U zDXTc;H~p1?OxQqQRNl~6x#~0@t0Tr`8^%ha2eUTWXn{S(VEt&7&us_ulB-Cmu5r|m z%wbqJ+rI5egl@=@N3PAu6ZxQ8Wb1VvmK3rxQiYF4S@AT1B{jzIN&YEW=Dzp3C{eD- zE9P9Eb%gDl$w_5XLk{TMduiho1<=6j-T=H@e$3!$!Ug z?2t$ix$c;cq z(M=>(y$zs?ex}98Jh$YS(|}3}>}ingHX_AnP-cCtNX%`?2rP@EDeG&C{xkxRLgP8N z!4xEHH=E!Uxgwmsj4~t>DTjQ@rgU)dIj4&1W;^sOSHCsj;}n7y$?d}H~oo3 zN>9;ev1@JAu3d2eThG?vj1{_l`E{XtquY#2Ee{NgO@?sYBvWa%TXd1oB<|8_b!*@( z2E}2d^|&KDir{*5qA+~0E1$?HWq$w8VBe=GW~Y|0M66E0q`Y&*tCmTHA&2fH1DzO# zs;sf^;#7JQy$+wTwRQ}j3#I@4{QxYcWC_z$SK^=4V<52T;wNdNc0$M7tZAMw+m3WpJp8*k>0%ZNTiT!##VHaPONJo3NYq82 z?G$yaML{S32mz~$E6<*gCzD=-$W0KMw@=IblcWu5Zd-zp(>rpHUl#X~MWPS`sjNto z;-8--LSuh7%OY@`F1Rn<=HTHrrgrvfYJR+Y$%yDUjH;UB5d29Q{B|EqIE8S?l4g>fXAmQ{ui$UZSJ9rj zagO6|W5fW%mn9boU%9ir5#G=Od`b!zzGy=qGnWMY41~6)4-G;u%)4G7hCr>l;mWP{ zB#&sL)dE($-I_>lw}Cz>dV196$Llp`o@-`*n)ICH%+INB*=CV;Q0wratlgGO-Fx3l z*u^VXvcL2<6*vDXBR*|;yR3V9A&#y0#SRE)Ut$((&>&>3*^1Xu2vIYYlAj+sGh~U~ zjp~JS{E5`mpG7r_5K2xPjj-h#WNw1SxW?6PX(P>qMO2nb47Ue75zJ~VPy57F zfk>w-M`a?F)$BUDq+ie@TD@)b92~-SM@#(uubbtb-IbGaP^?=c|$fmVL~5u`9V- z#2)3qtVI_&Qz90#sb84OYU7J8Nqgb|B55Nt>9(H05QSy~rY+Wp+JQs{_d%R{3E+sMi4dVG(tCD~E0FV-=Z(!H5xB31%u0)0OB@ zGAt1^LPK*hp{&j|?Ts&`PkzG*b5*&IzRwY}5_(Ifo`{!oHb2KgoT_{i__8YmwURyE z<>6;nCPFXfT|Udp$lR!2+~HFZRN;=7l5Jqm`xw~Z4F1+-*e+B$$tH3fXIM1`Ba}cY z-PIm0(hBNJfo2H9X9V zinsSbemBJfARKm?NBtuJqqV3M!UmUb%S$<&(KZv5GEL<=vuVD*rlkDGfr>m-2TJW# za%^Ejytw!SI^R7q^rS5s-+7+Y04Y+_WhZz3w<5rl@M)C?;smC+E3=>U=TU zmu;Ql{E%(z^~JKUOTw;Xz1i!}o`6L3Q}GS8kJ)-GX3?<4V?wQR#NPewob1Z^;jMlB zT8n!QaGIXL8FSPfvkG_et;7u%z#{*rFdHJBzwVoah)oOjh~%Upl5%lRp9@s7Sbw{S{@#hUj-#Sz*vW=kJLs z0cK!-?xODSO7dmhQ^{r@@HSPguAPN%JG-{pAO_Ahb^OYobLu12-y#!-7Fvi6lmShx zqSpo_GsMp_8R}@c3NtxaWK+YRcmyA?(v>%&l@$DScXwzw*1E_|6yk}tNIqCrQ7VUx zbHtP_C{=SU;|#pGG*1YneEU5xz`Nzzdm0MG;>CHWx|VsvKWRIDn#KE#+;voGp5{v` zC?K74*b+G_C*(rQJGri|LDqamS2oPyBb|uwG>vE*qz0B0iuaB~C_7INnoO01$i-fP z>D4tle-WM<*42?$-X!kqjaI#tFQnWJwY4lQ0n61t{m^YUe`0GSb!D0<&Nc2}#OQGwkrUh?dW|wsxWe z>mZDR#SggSUG>xfhx2tZ>^~6(|Fn7dFJ#2Ou%`b=@WHFqX)6nV6rme~=@Al&p{j?y2AA#xk|41j`cME+hTsH}nL7tiq%jtKhOCl-DZn=k9n+IW+$WeZ@FXjpT`z#fU5hp24`&3>U3-VY^fF?cdM9_X#YWo zL&xF_B3dW@0yN?r)fz&Xx`^^SzfzIi`%#t=ZVtU{uyPK>H$Rx4R0{YYCV)<<-Vt4{ z38Ym1HCabK$9^P!Lt5QC{YsT$yVF!3ZZor8=X>A*@p;}z{ovHT=-lWn8E?|D7iTq! zd@o!(s=dAD*Vcoh!pWmwiug-dz5AYF3TMZIvo^KyOEyWTK~V5ATAS$MPVn@Ix5GWM zuvzWA_uA$X4(*8Z%s?h|2CI#e=MaUO0&~Vu-vtgidk8W5NatIO?K>L)oahijR=-v& zJKNA^x0vbhkA#ii(;MKKZ3tpXB?@T+)CKJ zA}a}c#SKI$))5W;8U&J5_ECZq*;&wH6$SzQoSPnJ_D^<7w6Zcxf5N`rk|}xDo{_L7 z6sEgW1$0tX#>o4V_uw9$wB6&`oh1JG6OR46>|Obf1etFZL#kP+a;Jnv6_w?eR%=M% zCB{?|WSA$oz%8P`9zu`AmBYm?BjwL5`l)q|4m_I)0ciPYqI`k4SEp9JJThWefs=2d z`fetUot}3Ci$`zB4sn6UX^}rZID{6rMe$#zImxkh94!@7Vg0a>6?}u;tidGW)6(j7 zrF~kD9E&y2XE~5M`U$WI6L7Zmn-4-izZ2ApTAzk_tojkdr0zNi-S zme4bt(7z?4_S2ioJ>gQ2Fq8yGLweWDHBoU2-#foae22e|H6K4ziHN9#8DJOw(wjiw za6ATDuSzGtp% z(XKT7APr8~EttZ{9QC=_y&-S}!ObSP(tRj8UYWn7SqNXE__*>IavpD-`p_DVCU3X7 zsRmd*et14CTbtjM={tj=j;#sae|T`Yf74w51EBt+^xZZtkrWqWwa&}EcJNgswa}|V zOv=2#FJ6t$>8Y>)GC2_+32ITpz^NVFF4x4Lep`O8vi46`^&dX9J$c@Xe6le7TVoR4 zCr7r?h>*{4`6tOag;V{&JE_!6IqAh6&_hA0>q)C;#9m!gI*6U^gP~mtC^1otYnCyj z|C~0X!0^y+xXeQqCu=bQh6Fn1J%o3_8+76T{u3(x(*o=N54qg`;=FP&g9}B(!ot8} zM}fmeflvMSXF>l2#B+aY@3O#NmrnaDujD1aQBhA2uG>30&{#FwqpTkYf|1%s3khLtWe zY`l?^7RdB|&j~$8IhpMwx4D>BHcPM^w-cpS8o5gb#xP3{)+Nd2FzQf)#XI*B^It*@ zNQMbBAUaBIZq9bgOrCs(DLOrr2ZE}e?>15%p^D>^R<(b0fyWaMoe;Bbw0<*stQ}A- zPdrcIILa8-5y#V;IU`7H=^i6&+$)>sJApW}4P!FFL!{B0_@8nG)b0$(@I1jR9E@6E zGd6BV21szwJK;21Np>aRwxIAEj&Aku*JxRAZ+X8V%b`irn`6Dj%@_6fElux{@I=%` zK%J>fLhI#997Nc^1@Gp1g^a+v^p;q~8jq$BB_9!J31qtXsF2i6lI9%@L9|Vw(v|;O z>13wQ<$!K$wLgJhyNj@?FB4pjyky_K3`F%}PYmqF#T;5kD)WVJLo{SWxBU(|LnXyg z1_**KfJpBVNWLh-v*MlMp!C z4@lbqxwe;f?n(aSxb@9kEe_#<*bf^)n*rmt8)vm-5;Uu^m$ELWR{V4F2$yXzFU$_q z`eZ=OgMh*NB1^~ckUcYLqiZYqexk{iS!S(bWtgWyyhHoVSCNHpz9ofD$r6k6MpmJI zqFK6y?6ShuTT1X6pB93S$bSAh*Zel~^LXLb(B5h!>x4{)0!UTpfY@ZIV*PWquSZr* zoR3YM0*e{9Uo` zV2N|38+G*nS2~@B#Gpfs+G$;>0``L2YhsAdk~l+TYGQ=T%fPj@q3hAiwu(y(mD_>9HWd(xi*}m?Pd@aLZ-rmF~oG&_cXM|0z=p-#5L|@@* zK%dbJ>YCy@!>=um(r%w8)zJ4UKBo5D{ZSk0Urb5xY#?{O`%%rlB`wa(zg!zsVh68d z4L`EFh2QvoK${FIoXd5wjRRRRW&a~h<%7y&1a2Q8fwkV#8UYR9+^&!LLk~5Yu3Vh# z_(mp-udEu&Dl=M9Jetj&n={#76fV`dP3^b_i?-y(>l#><{w9hc-!cVQE!g`m4Dx3@ zVtc@rMMWoTT|Z#sWfpsB!d=_T-#?q z{#B7vQ)`;Jyv-+`$5`^6%cBV=ADNWgIA8h-$<{Jlf~)ST7J_go`sQL1(kKTc652ive8GQB+pxb>T7c$Xj+GkSMVyK z7ewJ4b?nFOCHN(EZX3~}n3=g;4@ne^nbezLyh5gtVs5dSqD^crU?_|ACoGI_Wn3}6 zi7dI`K}=Gy{wbOMI^&qhX%v-J|D>T-HSeXMQPnavgj~pD47<`;%W@1ZSAbXtRHSuZ zt&ZyFh@4mQOZF632gk+Hj-!uQjQjw&?f35LP5dXAngNry7>&*4Ii$ghzc&~-+VK(i z=lGbRHUBcW(2@2PIz~0z-#pj4f649iiRsi$@?uNOKiy-JZ`R8YxD#rVkVIrsA_4IOzYxK1&d=csqH zYXZVMK&Gb3^o-wRfA{=J_=vpeWZ&_#*yfN#k3(RDwN1mbK%xAb(oMyZjKAb@=NFA4 zqa+iKiMO*hf#~-yLtFO{AcX~a(x5*4Y;6Lb4Wxt=)x(|7SAa@dq=%)RE&5o-`}Sa~ zdAGmPZ0isHuNYyCHXjBJ$5iGPhG;&%f;kKg?Koy;fpStNnuG0;xE!4lNlbRXk4w4SutQ+F?XEZyq({7l-G2ABF%?zE&4Z? z#I(qh-cyy1oj-alSDKEUQwtOe)ZJedcn%UAjOG`nnscc6JED%jRv-ifqF$)IXrL3B_JgJj!v>m&Q!KIO)PFE2h>)JkX z1otIjF!EZnt9n6mMJt-+^Ij zFUz??WQ1qadO0KWLvKrXIhUDfm!`G9G*#-r4k{&KQjQ-U;Bkrgh-vbcLV&D|S*I+d z*?{QB6%5c8kDkATQ+-DD!LAz8Ed9MCdaZsml|TGMcekqCy_s{v8FaA6Rh;rF~3xv5CgRMsZdRQ_q_ykIHs*iJW_Ydm zgapiOfa3rA4P!1%7Z(V|KdKNlxfGpkTxkEHV$kH$vh%T~<>m%*=^1hVJpvj3x9L^z zUnmO3u^*sN4YG*yOF$YegN5>uMVySIVkE-`Aw$|1h#^(6$ej(dMM?!PcsX?9J2G)9 zx6KDc47XhGtX0w1FU^AeWPiX=l{p4l91Up1q^+M;iV8pE}V~vuog-Skw#niit3Y( zyO2+VLW_mJ2Pc8mjfB+&jO}EO?GhaB;q7JEZtPIWGXkBaId&(p8C+BspV_d89~|;* z+%=LEJ(Z@W4%X>vR@@qTm%rF^4DLthYK&4=F}l?>HvM{@G)#-Jtup?BWmlzeEL%yh zr&y&^o(HB\n", + "\n", + "rowcount\n", + "\n", + "\n", + "1\n", + "\n", + "" + ], + "text/plain": [ + "+----------+\n", + "| rowcount |\n", + "+----------+\n", + "| 1 |\n", + "+----------+" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 4 + }, + { + "cell_type": "markdown", + "source": "Only one command can be executed from within a single code cell:", + "metadata": {} + }, + { + "cell_type": "code", + "source": "INSERT INTO players (Name, Class, Level, Hitpoints) VALUES (\"Sir Wolf\", \"Cleric\", 2, 20);\n\n-- The following will not be inserted\nINSERT INTO players (Name, Class, Level, Hitpoints) VALUES (\"Sylvain, The Grey\", \"Wizard\", 1, 10);", + "metadata": { + "trusted": true + }, + "outputs": [], + "execution_count": 5 + }, + { + "cell_type": "code", + "source": "SELECT Name, Level, Hitpoints FROM players;", + "metadata": { + "trusted": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
NameLevelHitpoints
Martin Splitskull340
Sir Wolf220
" + ], + "text/plain": [ + "+-------------------+-------+-----------+\n", + "| Name | Level | Hitpoints |\n", + "+-------------------+-------+-----------+\n", + "| Martin Splitskull | 3 | 40 |\n", + "+-------------------+-------+-----------+\n", + "| Sir Wolf | 2 | 20 |\n", + "+-------------------+-------+-----------+" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 6 + }, + { + "cell_type": "code", + "source": "INSERT INTO players (Name, Class, Level, Hitpoints) VALUES (\"Sylvain, The Grey\", \"Wizard\", 1, 10);", + "metadata": { + "trusted": true + }, + "outputs": [], + "execution_count": 7 + }, + { + "cell_type": "code", + "source": "SELECT Name, Level, Hitpoints FROM players;", + "metadata": { + "trusted": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
NameLevelHitpoints
Martin Splitskull340
Sir Wolf220
Sylvain, The Grey110
" + ], + "text/plain": [ + "+-------------------+-------+-----------+\n", + "| Name | Level | Hitpoints |\n", + "+-------------------+-------+-----------+\n", + "| Martin Splitskull | 3 | 40 |\n", + "+-------------------+-------+-----------+\n", + "| Sir Wolf | 2 | 20 |\n", + "+-------------------+-------+-----------+\n", + "| Sylvain, The Grey | 1 | 10 |\n", + "+-------------------+-------+-----------+" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 8 + }, + { + "cell_type": "markdown", + "source": "## Querying Tables\n\nA full range of SQL query commands are supported, including aggregation operations:", + "metadata": {} + }, + { + "cell_type": "code", + "source": "SELECT SUM (Level) FROM players", + "metadata": { + "trusted": true + }, + "outputs": [], + "execution_count": null + }, + { + "cell_type": "markdown", + "source": "Grouping also works:", + "metadata": {} + }, + { + "cell_type": "code", + "source": "SELECT Level, SUM(Hitpoints) AS `Total Hitpoints`\nFROM players\nGROUP BY Level\nORDER BY `Total Hitpoints` DESC;", + "metadata": { + "trusted": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
LevelTotal Hitpoints
340
220
110
" + ], + "text/plain": [ + "+-------+-----------------+\n", + "| Level | Total Hitpoints |\n", + "+-------+-----------------+\n", + "| 3 | 40 |\n", + "+-------+-----------------+\n", + "| 2 | 20 |\n", + "+-------+-----------------+\n", + "| 1 | 10 |\n", + "+-------+-----------------+" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 11 + }, + { + "cell_type": "markdown", + "source": "## Charting Using Vega\n\nThe `jupyter-xeus/xeus-sqlite` kernel also bundles Vega charting components.\n\nVega charts can be generated by piping the result of a SQL query into a Vega line magic command.", + "metadata": {} + }, + { + "cell_type": "code", + "source": "%XVEGA_PLOT\n X_FIELD Level\n Y_FIELD Hitpoints\n MARK circle\n WIDTH 100\n HEIGHT 200\n <>\n SELECT Level, Hitpoints FROM players", + "metadata": { + "trusted": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
LevelHitpoints
340
220
110
" + ], + "text/plain": [ + "+-------+-----------+\n", + "| Level | Hitpoints |\n", + "+-------+-----------+\n", + "| 3 | 40 |\n", + "+-------+-----------+\n", + "| 2 | 20 |\n", + "+-------+-----------+\n", + "| 1 | 10 |\n", + "+-------+-----------+" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "application/vnd.vegalite.v3+json": { + "$schema": "https://vega.github.io/schema/vega-lite/v4.json", + "config": { + "axis": { + "grid": true + } + }, + "data": { + "values": [ + { + "Hitpoints": "name", + "Level": "name" + }, + { + "Hitpoints": "40", + "Level": "3" + }, + { + "Hitpoints": "20", + "Level": "2" + }, + { + "Hitpoints": "10", + "Level": "1" + } + ] + }, + "encoding": { + "x": { + "field": "Level", + "type": "quantitative" + }, + "y": { + "field": "Hitpoints", + "type": "quantitative" + } + }, + "height": 200, + "mark": { + "type": "circle" + }, + "width": 100 + }, + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAJMAAAD3CAYAAAAZgGZZAAAAAXNSR0IArs4c6QAAFBRJREFUeF7tnQlsFlUXhk9Ly77+gAUryCIgUIWwVQJKEbAGBEWpLFJZQqrmVwFBi1qgSItIoCAgSqSyCT8KyJZIoICyBatYoSxhKwpYbEGBFixr6Z9zzVe6fT132pnp943vTYhgz9w5896n79yZuTPHJycnJ4fQoIAJCvgAJhNURBdKAcAEEExTADCZJiU6AkxgwDQFAJNpUqIjy2G6c+cOXb58merWrZur9t9//02VKlUiX19fjICDFLAcpnHjxtGhQ4do69at9Oeff9KQIUPIz8+Pzpw5Q2+//TYNHz7cQXI681D2HUmlX/+4rA6ucf1a1Ll1YJEHailMGzdupM8++4zYnRim6dOn09WrVyk2NpbS0tKofv36xC5VuXJlZ46CA46KQUrYfzrfkfTq0KRIoCyD6fTp0/Tqq69SVFQUxcTEKJhGjRpFPXv2pEGDBhHfK+XTXEpKCjVp0sQBsjvzEFZuO0ynUv9xJVd7KLAWDekZVOiALYHpxo0b1L17d4qPj6crV65QdHS0gunFF19UfwYMGKASCQgIoMTERGrUqBHt2bOH9u7dmy9BnmeFhIQ4c5S85Kg2/5xK5/7Mys2WzyK2wsTghIaGUseOHSkjI4NOnDhBERERFBgYSNWrV6cxY8ZQdnY21apVS8HmbiL+0UcfUWRkZJnKzg7rCc5ZVnnkPc1lZWWpKYmtpzneaWpqqoLgwIEDFBcXR6tXr6akpCSaP3++cin+N///ffv2uYUFMN2Tpqxg4gxcE3C+Km/Xquj5EsdZcprLS8ePP/6o5k0M0PXr16l379509OhR9feEhAQKDg4GTBreW5YwudKTcrAcpqJ0OnfuHNWrV4/8/f2LlRHO5BnO5NEwafwiqhDABJh0WRHjABNgEiHRDQBMgEmXFTEOMAEmERLdAMAEmHRZEeMAE2ASIdENAEyASZcVMQ4wASYREt0AwASYdFkR4wATYBIh0Q0ATIBJlxUxDjABJhES3QDABJh0WRHjABNgEiHRDQBMgEmXFTEOMAEmERLdAMAEmHRZEeMAE2ASIdENAEyASZcVMQ4wASYREt0AwASYdFkR4wATYBIh0Q0ATIBJlxUxDjABJhES3QDABJh0WRHjABNgEiHRDQBMgEmXFTEOMAEmERLdAMAEmHRZEeMAE2ASIdENAEyASZcVMQ4wASYREt0AwASYdFkR4wATYBIh0Q0ATIBJlxUxDjABJhES3QDABJh0WRHjABNgEiHRDQBMgEmXFTEOMAEmERLdAMAEmHIVuHXrlqo1V7CenG4lTMAEmJQCEyZMoB07dlCrVq1U5aYVK1aoeilGKmECJsCkqltyvTkuwsOta9eu9M4776gCPEYqYQImwJSrwOHDh2nx4sX05Zdf0rFjx1RNXiOVMAETYMpVIDk5mT799FN1ilu/fr2q12ukEiZ3FBYWpjtfR5wNChRXyNGSEmFcuHD//v307LPPqsObOHGiqhzOBZ5RCbNkIy7VeitZr8a2knKwBCaumPjwww/TL7/8ourKhYeHU7du3dTfUQnT2AC6oqWBLFmvxraScrAEJk7xgw8+oJkzZ1KNGjWoTZs2tHz5cqpYsSIqYRobv9xoaSBL2K2hzaQcLIOJs+RbAVxJnAs7522ohGloDFWwNJDGezS+hZSDpTAZTzf/Friaw9VcaRnK3R4wASbAZJoCgMk0KeFMgAkwmaYAYDJNSjgTYAJMpikAmEyTEs4EmACTaQoAJtOkhDMBJsBkmgKAyTQp4UyACTCZpgBgMk1KOBNgAkymKQCYTJMSzgSYAJNpCgAm06SEMwEmwGSaAoDJNCnhTIAJMJmmwL8AJv6yiZ+fnwWS5e8SzuRAmPhlytGjR9O3335LvXr1oiNHjhAP9GuvvWYpUIDJgTB17txZvbfFr3m/8cYb1K5dOzp58iRdunTJUocCTA6DiV+irFSpEm3cuJHmzJlD/GUT/o5Aw4YN6eDBg/Too49a5k6AyWEw8eG0aNGCWrZsSRs2bKCIiAj1AYopU6bQtWvXqEqVKoDJMgUcCNOyZcto2LBh6sjYjfjjXX379lWfyrGywZkcCBNfvWVmZqr5EX8SJykpSZ3erL6iA0wOgunmzZvq4xPt27en6Oho5Ubc+GqO3enMmTNq7mRVA0wOgmnGjBkUGRnplhX+dmXBL+maCRZgchBM3333HW3ZsoUWLFhAISEh6su5rhYcHEz9+/c3k51CfQEmB8HkOpRNmzapD3ZZeUorikrA5ECYdu/eTbNmzaJ9+/blG/OUlBSqWrWqZe4EmBwIU+vWrdU3vPnOd/ny5XOPcPv27ZgzWfarlL9j6attdqQh5SB+OS47O1vdApg8ebK6orOzwZkc6EwjRoxQ1QZWrVqV7/uUgYGB5OPjYxlfgMmBMAUEBNCFCxcKQZORkaFuYlrVAJMDYeIHvFzzpGDjeigVKlSwiiW1zKW4+1yW7ThPx9I8wY4ceB+ekIeUQ7FzJl5mUrt2bVWViedOBVvTpk3J19fXMj0Bk0OciWvFsetMmzZNLT3Bac6y3xmtjiVX0OqklEFSDm6diZ2InYHLVPD6JX50UrCNHz8+362CUuZaaHM4k0OcqeDI8sqBnTt3Kqi4zJfOMznehtc81axZM193qIRp/NdOcgXjPRrfQspBvM/Eu2SIeMVA3kk4n/p4Xbi7FhcXRwsXLiR+hsfLV9hleP6FSpj6g3jt+i3ac+gcXbySRdf/zqTuHVtSswf+o9+ByZGlhunu3bvUsWNHNWeKiopSjjRv3jz66aefVNkvBqRgc823XCsxY2JiKD09nfi+FCph6o/wsi3J9FtahtogKytLaf/f/h2odvVK+p2YGFlqmP766y+qU6cOzZ07V71MwG3btm3qLZUffvhBOU9RjcuEcQEeFqFHjx7KxXg7VMLUG90bt+7QjP/dexbqgqlfl+bU9qEAvU5Mjio1TDk5OarMFz+fmz17tvrtiI2Npa+//prS0tKIb2i6a7wik++eBwUFUXx8PL388suohKk5wDdv36WlO1IKRXcLCqAWgdbdKJbSK3UlzI8//pjGjBmTbz98yps6darbffNDYJ4fsaMNHDhQxXENOlTClIbr3s8dd5pzHdrZs2dp3bp1ajL9/PPPK6dy11xuxqe1Tp065Ybx61KohKkPk+Mm4HzofM+JJ9z8qhNf1vfr148ef/xxt49STp06Rc2aNcunGr/dwsWfe/furZaz8NryhIQEt3Mu3hj3me5JKM1X9BEteaSUg9atAb4a47d587YuXbrQrl27SvQ4BZUwjQ+oNJDGezS+hZSDCBO7El+VNW7cmNasWaPq7E6aNImWLFlCJ06cKORAxlN0vwWcyYHOxPMjXmXJRZu5bd68WZ2u+AEwX+lZ1QCTA2HiCTdPvocPH66Ojl3pgQceoKFDh6p/Dx482JJvDgAmB8LkbnGc61D5nlNYWJjpBgWYHAjTjh07qEGDBupOODd+0/fnn39Wb/ryMhV+Q8Xf3x8wma6Ag2DiS/zjx4/TyJEj1R9+JZwbL5obO3Ys/fbbb/Tggw9aJiGcyUEwFfd6eLVq1Yifv5UrVw4wWaaAg2By50x8iHyF16hRI0tlhDM5CCbXoZw/f14tcNNZEGcmXYDJITDdvn1bXf7zA91FixapFQIFG14PN/NXp/i+pLvPdmQi5eD2DjgvuQ0NDVUTb34mx+uaCjb+oIWVbgVncogz8WHwBLu4xo9ZrGyAyUEwSa9+441eK3+V8vctnWLsyETKodgHveHh4cTrubnxXe7mzZtT27Ztc/Pmxyr8WWerGpzJQc6UFxJ2Kf5cM68YsKsBJsBkGmuAyUEw8dUav+rE7bnnnlMvA/C6blfr06ePpZ9vBkwOggkTcM/4+ggjJU1+TTsdFNORlEOxE3CeYBf19RPX/vjVJStWC7j6hzM5yJnsoL24fQAmwGQag4AJMAEm0xQATKZJCWcCTIDJNAUAk2lSwpkAE2AyTQHAZJqUcCbABJhMUwAwmSYlnAkwASbTFABMpkkJZwJMgMk0BQCTaVLCmQATYDJNAcBkmpRwJsAEmExTADCZJiWcCTABJtMUAEymSQlnAkyAyTQFAJNpUsKZAFM+mFw15/K+g4dKmMZ/36R31oz3aHwLKQexQoHxXf6zBRc7TE5OVkV7+HOG9913nyp26C2VME/+fomSUy7Q+bSL1Kzx/dT1kQZUtVL5kspR6u2kgSz1DjQ6kHKwDKa1a9fS3r17VY06roLJME2fPt0rKmH+lXmdPlm3X8nrKhrYqF4Nejn0UQ3JrQmRBtKavebvVcrBMphcafDpzQXTqFGjvKIS5oFT6bRx74l8MPE/3hncmSqW97Nj3ArtQxpIO5KScrAVJv7wBf8ZMGCAOnaufJCYmKi+2rtnzx7lZAWbFZUPJOGPp2bSzsPphcKGPdmUKvj7Sps7+uelroRZGnXyOpO3VMLEaa7oEfcoZ/KmSpiYgBcGyiNg4iu7unXrquqXqIRZMp+XBrJkvRrbSsrB8jlTUemiEqaxQeRoaSCN92h8CymHMoFJ9zBwB/yeUtJA6mpamjgpB8AkqCsJWJrBMbKtJ+Qh5QCYAJM204BJW6qSXQ6XsnvtzaWB1O6oFIFSDnAmOJM2XoBJWyo4kyQVYJIUgjNpKwSYtKWCM0lSASZJITiTtkKASVsqOJMkFWCSFIIzaSsEmLSlgjNJUgEmSSE4k7ZCgElbKjiTJBVgkhSCM2krBJi0pYIzSVIBJkkhOJO2QoBJWyo4kyQVYJIUgjNpKwSYtKWCM0lSASZJITiTtkKASVsqOJMkFWCSFIIzaSsEmLSlgjNJUgEmSSE4k7ZCgElbKjiTJBVgkhSCM2krBJi0pYIzSVIBJkkhOJO2QoBJWyo4kyQVYJIUgjNpKwSYtKWCM0lSASZJITiTtkKASVsqOJMkFWCSFIIzaSsEmLSlgjNJUgEmSSE4k7ZCgElbKjiTJBVgkhSCM2krBJi0pYIzSVIBJkkhOJO2Qh4JEyphao9fbqA0kMZ7NL6FlIOtX9v1pkqYLqklAY0PScm28IQ8pBxshclbKmHmHW5JwJKhYXwrT8hDysFWmLylEiZgKtnFiK0wGa2E6e/vT7dv3zb+a4wtLFGAy7yNHDnSbd+2wuQtlTDzquUJlaU4H0/IQ8rBVpi8qRKmCyhJQEssoIhOPSEPKQdbYfLGSpiSgIDpngK2wuTarTdVwgRM92CRtCgTmHR/m7k8fdeuXXXDLYnzhBz4wDwhDykHj4bJEjrQqWUKACbLpP33dewVMN29e1eVsa9SpYrtI3Tnzh26desWVa5c2fZ9u3bIOVy7do1q1qxpew6XL1+mWrVqae3X42FavHgxzZkzhwIDA4lFXbFiBfHNM6tbdnY2HT58mOLj46lcuXI0e/Zsq3dZZP9xcXG0cOFCCg4OpszMTHW/qUWLFpbncuzYMXrppZeoadOmlJWVRUOHDqVBgwYVu1+Phonh4bvgV65coRo1atCbb75J9evXp3fffddyMdkJJk2aRPv376f27duXCUzsiBUqVFCuxK4cExND6enpNG/ePMuPnyFmrQcPHkzbtm2jcePG0cGDB70Xpl9//ZV69uxJKSkp6iBYxAMHDii3sKt98skndOrUqTKBiY/RdZphd+jRoweNHj1adAgztVmwYIFyxvDwcBo/frz3wpScnExhYWF0/PhxdRDLly+nnTt30qJFi8zUq9i+yhomTi4pKYlGjBhBQUFB6hepYsWKth0/n97Xrl2r5oxbt271Xph40s0HwRNwHx+fXHcYO3asbWKWNUzbt2+nIUOG0Ny5c2ngwIG2Hff69eupU6dOdP/996tpBk/CU1NT1b/dNY+eM3HSbdq0IbbaRx55hEJDQ2nKlCn01FNP2SZqWcKUk5Oj5oo8Z+GBtbNNmDBBzdcmT55MR48eVafY8+fPq4sRr4WJHw7zlQS3Pn360MqVK5VL2dUYJp6z8YTU7sZztWbNmuXb7bBhw2jJkiWWp8IARURE0MmTJ6l8+fI0bdo0NW8qrnm8M3HyPPnMyMhQVxdo9irwxx9/UEBAAPn6+oo79gqYxKNAgEcoAJg8YhickQRgcsY4esRRACaPGAZnJAGYnDGOHnEUgMkjhsEZSQAmg+PYr18/2rRpU+7DV4Oba4XzDcqbN2+KD1a1OrMxCDAZFNsF09WrV6lq1aoGt9YL51UKN27coCNHjuht4CFRgMngQBQH065du4jvmH///fcUEhKi/v7VV1/R0qVLaf78+eqRyIwZM2jNmjXqDy8x4UdFvEaL1yjxuq127dqpJS+AyeDAeGO4O5h4zVG1atWoS5cu9PTTT9PEiRPVs8SpU6cqiN577z2KjY1Vi83Y0XhtUPfu3dVzL35w/fnnn9PFixeJ7zg/8cQTgMkb4TCaszuYVq9eTa7X39lleIXo77//rlZHdujQgfz8/JQbtWrVSq0AeOGFF9Tq0ccee4x69eql3Gz37t20efNmev/99wGT0YHxxnh3MH344YfKfXi5SJMmTXIPLTIyUp3K+L+vv/66Ot2x+/DDY36Ni52MHcrVeJkJP8zFac4b6TCYswumqKgo9TSdG7sOz3P4tMYwvfLKKzRz5kz1/7/55hvil04bNmyoYvv27Uu8EuLSpUtUu3Ztat68OX3xxRe0atUqSkxMpA0bNtAzzzwDmAyOi1eGu2AqmDyvPeLF/rxU5cKFC2r+xAvMnnzySRXKa7ASEhLUqY5Pcdz4FkN0dLRaSclt1qxZ9NZbb6k5Fm4NeCUe5ibNb7WcPXuWGjRooJxJp3F8nTp1yvR1Kp08pRjcGpAUws+1FQBM2lIhUFIAMEkK4efaCgAmbakQKCkAmCSF8HNtBf4PeeHBOWN20+UAAAAASUVORK5CYII=" + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 12 + }, + { + "cell_type": "markdown", + "source": "## Database Administration\n\nSeveral line magics are defined to support database administration", + "metadata": {} + }, + { + "cell_type": "code", + "source": "%TABLE_EXISTS players", + "metadata": { + "trusted": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "The table players exists." + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 15 + }, + { + "cell_type": "code", + "source": "%TABLE_EXISTS npcs", + "metadata": { + "trusted": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "The table npcs doesn't exist." + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 16 + }, + { + "cell_type": "code", + "source": "%GET_INFO", + "metadata": { + "trusted": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Magic header string: SQLite format 3\n", + "Page size bytes: 4096\n", + "File format write version: 1\n", + "File format read version: 1\n", + "Reserved space bytes: 0\n", + "Max embedded payload fraction 64\n", + "Min embedded payload fraction: 32\n", + "Leaf payload fraction: 32\n", + "File change counter: 4\n", + "Database size pages: 2\n", + "First freelist trunk page: 0\n", + "Total freelist trunk pages: 0\n", + "Schema cookie: 1\n", + "Schema format number: 4\n", + "Default page cache size bytes: 0\n", + "Largest B tree page number: 0\n", + "Database text encoding: 1\n", + "User version: 0\n", + "Incremental vaccum mode: 0\n", + "Application ID: 0\n", + "Version valid for: 4\n", + "SQLite version: 3032003\n" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 17 + }, + { + "cell_type": "markdown", + "source": "## Connecting to a Different Database\n\nCreating a new database will connect the kernel to the new database instance.", + "metadata": {} + }, + { + "cell_type": "code", + "source": "%CREATE potato.db ", + "metadata": { + "trusted": true + }, + "outputs": [], + "execution_count": 18 + }, + { + "cell_type": "code", + "source": "CREATE TABLE potaters(production INTEGER)", + "metadata": { + "trusted": true + }, + "outputs": [], + "execution_count": 19 + }, + { + "cell_type": "code", + "source": "INSERT INTO potaters (production) VALUES (7)", + "metadata": { + "trusted": true + }, + "outputs": [], + "execution_count": 20 + }, + { + "cell_type": "code", + "source": "SELECT * FROM potaters", + "metadata": { + "trusted": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
production
7
" + ], + "text/plain": [ + "+------------+\n", + "| production |\n", + "+------------+\n", + "| 7 |\n", + "+------------+" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 21 + }, + { + "cell_type": "markdown", + "source": "The original database is lost:", + "metadata": {} + }, + { + "cell_type": "code", + "source": "SELECT Name, Level, Hitpoints FROM players;", + "metadata": { + "trusted": true + }, + "outputs": [ + { + "ename": "Error", + "evalue": "no such table: players", + "output_type": "error", + "traceback": [ + "Error: no such table: players" + ] + } + ], + "execution_count": 23 + } + ] +} \ No newline at end of file diff --git a/DAT375_Week2/w2_.ipynb b/DAT375_Week2/w2_.ipynb new file mode 100644 index 0000000..470f679 --- /dev/null +++ b/DAT375_Week2/w2_.ipynb @@ -0,0 +1,712 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Exercise 9.1: Preliminary Data Collection Using SQL Techniques\n", + "This exercise collects preliminary data. We will load the database, list scooter product details, extract product IDs, and store the results in a new table." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step 1: Load the sqlda database" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "dotnet_interactive": { + "language": "csharp" + }, + "polyglot_notebook": { + "kernelName": "csharp" + } + }, + "outputs": [], + "source": [ + "#r \"nuget:Microsoft.DotNet.Interactive.PostgreSql, *-*\"" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "dotnet_interactive": { + "language": "javascript" + }, + "polyglot_notebook": { + "kernelName": "javascript" + }, + "vscode": { + "languageId": "polyglot-notebook" + } + }, + "outputs": [ + { + "ename": "Error", + "evalue": "(1,20): error DNI103: Unrecognized parameter name '--kernel-name'\n(1,51): error DNI103: Unrecognized parameter name '--connection-string'", + "output_type": "error", + "traceback": [ + "(1,20): error DNI103: Unrecognized parameter name '--kernel-name'\n", + "(1,51): error DNI103: Unrecognized parameter name '--connection-string'" + ] + } + ], + "source": [ + "#!connect postgres --kernel-name myPostgresKernel --connection-string \"Host=localhost;Port=54321;Username=postgres;Password=securepassword;Database=sqlda1\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step 2: List the model, base_msrp, and production_start_date for scooter products" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "dotnet_interactive": { + "language": "sql-myPostgresKernel" + }, + "polyglot_notebook": { + "kernelName": "sql-myPostgresKernel" + }, + "vscode": { + "languageId": "polyglot-notebook" + } + }, + "outputs": [ + { + "ename": "Error", + "evalue": "Microsoft.DotNet.Interactive.NoSuitableKernelException: No handler registered on kernel sql-myPostgresKernel for command: SubmitCode: SELECT model, base_msrp, production_start_date ...\n at Microsoft.DotNet.Interactive.Commands.KernelCommand.InvokeAsync(KernelInvocationContext context) in D:\\a\\_work\\1\\s\\src\\Microsoft.DotNet.Interactive\\Commands\\KernelCommand.cs:line 188\n at Microsoft.DotNet.Interactive.Kernel.HandleAsync(KernelCommand command, KernelInvocationContext context) in D:\\a\\_work\\1\\s\\src\\Microsoft.DotNet.Interactive\\Kernel.cs:line 365\n at Microsoft.DotNet.Interactive.CompositeKernel.HandleAsync(KernelCommand command, KernelInvocationContext context) in D:\\a\\_work\\1\\s\\src\\Microsoft.DotNet.Interactive\\CompositeKernel.cs:line 216\n at Microsoft.DotNet.Interactive.KernelCommandPipeline.b__6_0(KernelCommand command, KernelInvocationContext context, KernelPipelineContinuation _) in D:\\a\\_work\\1\\s\\src\\Microsoft.DotNet.Interactive\\KernelCommandPipeline.cs:line 60\n at Microsoft.DotNet.Interactive.KernelCommandPipeline.<>c__DisplayClass6_1.<b__3>d.MoveNext() in D:\\a\\_work\\1\\s\\src\\Microsoft.DotNet.Interactive\\KernelCommandPipeline.cs:line 75\n--- End of stack trace from previous location ---\n at Microsoft.DotNet.Interactive.App.KernelExtensions.<>c__DisplayClass6_0.<b__0>d.MoveNext() in D:\\a\\_work\\1\\s\\src\\dotnet-interactive\\KernelExtensions.cs:line 457\n--- End of stack trace from previous location ---\n at Microsoft.DotNet.Interactive.KernelCommandPipeline.<>c__DisplayClass6_1.<b__3>d.MoveNext() in D:\\a\\_work\\1\\s\\src\\Microsoft.DotNet.Interactive\\KernelCommandPipeline.cs:line 75\n--- End of stack trace from previous location ---\n at Microsoft.DotNet.Interactive.App.KernelExtensionLoader.<>c__DisplayClass0_0.<b__0>d.MoveNext() in D:\\a\\_work\\1\\s\\src\\dotnet-interactive\\KernelExtensionLoader.cs:line 25\n--- End of stack trace from previous location ---\n at Microsoft.DotNet.Interactive.KernelCommandPipeline.<>c__DisplayClass6_1.<b__3>d.MoveNext() in D:\\a\\_work\\1\\s\\src\\Microsoft.DotNet.Interactive\\KernelCommandPipeline.cs:line 75\n--- End of stack trace from previous location ---\n at Microsoft.DotNet.Interactive.App.KernelExtensions.<>c__DisplayClass5_0.<b__0>d.MoveNext() in D:\\a\\_work\\1\\s\\src\\dotnet-interactive\\KernelExtensions.cs:line 388\n--- End of stack trace from previous location ---\n at Microsoft.DotNet.Interactive.KernelCommandPipeline.<>c__DisplayClass6_0.<g__Combine|2>d.MoveNext() in D:\\a\\_work\\1\\s\\src\\Microsoft.DotNet.Interactive\\KernelCommandPipeline.cs:line 73\n--- End of stack trace from previous location ---\n at Microsoft.DotNet.Interactive.KernelCommandPipeline.<>c__DisplayClass6_0.<g__Combine|2>d.MoveNext() in D:\\a\\_work\\1\\s\\src\\Microsoft.DotNet.Interactive\\KernelCommandPipeline.cs:line 73\n--- End of stack trace from previous location ---\n at Microsoft.DotNet.Interactive.KernelCommandPipeline.<>c__DisplayClass6_0.<g__Combine|2>d.MoveNext() in D:\\a\\_work\\1\\s\\src\\Microsoft.DotNet.Interactive\\KernelCommandPipeline.cs:line 73\n--- End of stack trace from previous location ---\n at Microsoft.DotNet.Interactive.KernelCommandPipeline.SendAsync(KernelCommand command, KernelInvocationContext context) in D:\\a\\_work\\1\\s\\src\\Microsoft.DotNet.Interactive\\KernelCommandPipeline.cs:line 41", + "output_type": "error", + "traceback": [ + "Microsoft.DotNet.Interactive.NoSuitableKernelException: No handler registered on kernel sql-myPostgresKernel for command: SubmitCode: SELECT model, base_msrp, production_start_date ...\n", + " at Microsoft.DotNet.Interactive.Commands.KernelCommand.InvokeAsync(KernelInvocationContext context) in D:\\a\\_work\\1\\s\\src\\Microsoft.DotNet.Interactive\\Commands\\KernelCommand.cs:line 188\n", + " at Microsoft.DotNet.Interactive.Kernel.HandleAsync(KernelCommand command, KernelInvocationContext context) in D:\\a\\_work\\1\\s\\src\\Microsoft.DotNet.Interactive\\Kernel.cs:line 365\n", + " at Microsoft.DotNet.Interactive.CompositeKernel.HandleAsync(KernelCommand command, KernelInvocationContext context) in D:\\a\\_work\\1\\s\\src\\Microsoft.DotNet.Interactive\\CompositeKernel.cs:line 216\n", + " at Microsoft.DotNet.Interactive.KernelCommandPipeline.b__6_0(KernelCommand command, KernelInvocationContext context, KernelPipelineContinuation _) in D:\\a\\_work\\1\\s\\src\\Microsoft.DotNet.Interactive\\KernelCommandPipeline.cs:line 60\n", + " at Microsoft.DotNet.Interactive.KernelCommandPipeline.<>c__DisplayClass6_1.<b__3>d.MoveNext() in D:\\a\\_work\\1\\s\\src\\Microsoft.DotNet.Interactive\\KernelCommandPipeline.cs:line 75\n", + "--- End of stack trace from previous location ---\n", + " at Microsoft.DotNet.Interactive.App.KernelExtensions.<>c__DisplayClass6_0.<b__0>d.MoveNext() in D:\\a\\_work\\1\\s\\src\\dotnet-interactive\\KernelExtensions.cs:line 457\n", + "--- End of stack trace from previous location ---\n", + " at Microsoft.DotNet.Interactive.KernelCommandPipeline.<>c__DisplayClass6_1.<b__3>d.MoveNext() in D:\\a\\_work\\1\\s\\src\\Microsoft.DotNet.Interactive\\KernelCommandPipeline.cs:line 75\n", + "--- End of stack trace from previous location ---\n", + " at Microsoft.DotNet.Interactive.App.KernelExtensionLoader.<>c__DisplayClass0_0.<b__0>d.MoveNext() in D:\\a\\_work\\1\\s\\src\\dotnet-interactive\\KernelExtensionLoader.cs:line 25\n", + "--- End of stack trace from previous location ---\n", + " at Microsoft.DotNet.Interactive.KernelCommandPipeline.<>c__DisplayClass6_1.<b__3>d.MoveNext() in D:\\a\\_work\\1\\s\\src\\Microsoft.DotNet.Interactive\\KernelCommandPipeline.cs:line 75\n", + "--- End of stack trace from previous location ---\n", + " at Microsoft.DotNet.Interactive.App.KernelExtensions.<>c__DisplayClass5_0.<b__0>d.MoveNext() in D:\\a\\_work\\1\\s\\src\\dotnet-interactive\\KernelExtensions.cs:line 388\n", + "--- End of stack trace from previous location ---\n", + " at Microsoft.DotNet.Interactive.KernelCommandPipeline.<>c__DisplayClass6_0.<g__Combine|2>d.MoveNext() in D:\\a\\_work\\1\\s\\src\\Microsoft.DotNet.Interactive\\KernelCommandPipeline.cs:line 73\n", + "--- End of stack trace from previous location ---\n", + " at Microsoft.DotNet.Interactive.KernelCommandPipeline.<>c__DisplayClass6_0.<g__Combine|2>d.MoveNext() in D:\\a\\_work\\1\\s\\src\\Microsoft.DotNet.Interactive\\KernelCommandPipeline.cs:line 73\n", + "--- End of stack trace from previous location ---\n", + " at Microsoft.DotNet.Interactive.KernelCommandPipeline.<>c__DisplayClass6_0.<g__Combine|2>d.MoveNext() in D:\\a\\_work\\1\\s\\src\\Microsoft.DotNet.Interactive\\KernelCommandPipeline.cs:line 73\n", + "--- End of stack trace from previous location ---\n", + " at Microsoft.DotNet.Interactive.KernelCommandPipeline.SendAsync(KernelCommand command, KernelInvocationContext context) in D:\\a\\_work\\1\\s\\src\\Microsoft.DotNet.Interactive\\KernelCommandPipeline.cs:line 41" + ] + } + ], + "source": [ + "SELECT model, base_msrp, production_start_date \n", + "FROM products \n", + "WHERE product_type = 'scooter';" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step 3: Extract the model name and product IDs for scooter products" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "dotnet_interactive": { + "language": "sql-myPostgresKernel" + }, + "polyglot_notebook": { + "kernelName": "sql-myPostgresKernel" + } + }, + "outputs": [], + "source": [ + "SELECT model, product_id \n", + "FROM products \n", + "WHERE product_type = 'scooter';" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step 4: Insert the above results into a new table called product_names" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "dotnet_interactive": { + "language": "sql-myPostgresKernel" + }, + "polyglot_notebook": { + "kernelName": "sql-myPostgresKernel" + } + }, + "outputs": [], + "source": [ + "Drop table if exists product_names;\n", + "\n", + "SELECT model, product_id \n", + "INTO product_names \n", + "FROM products \n", + "WHERE product_type = 'scooter';" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Exercise 9.2: Extracting the Sales Information\n", + "In this exercise we join sales data with the product names and then isolate Bat Scooter sales." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step 1: Load the sqlda database" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "psql sqlda" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step 2: List the available fields in the database" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step 3: Create a new table (products_sales) by joining sales and product_names on product_id" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "dotnet_interactive": { + "language": "sql-myPostgresKernel" + }, + "polyglot_notebook": { + "kernelName": "sql-myPostgresKernel" + } + }, + "outputs": [], + "source": [ + "Drop table if exists products_sales;\n", + "\n", + "SELECT model, customer_id, sales_transaction_date, sales_amount, channel, dealership_id \n", + "INTO products_sales \n", + "FROM sales \n", + "INNER JOIN product_names \n", + " ON sales.product_id = product_names.product_id;" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step 4: Display the first five rows of products_sales" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "dotnet_interactive": { + "language": "sql-myPostgresKernel" + }, + "polyglot_notebook": { + "kernelName": "sql-myPostgresKernel" + } + }, + "outputs": [], + "source": [ + "SELECT * \n", + "FROM products_sales \n", + "LIMIT 5;" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step 5: Retrieve Bat Scooter sales ordered by sales_transaction_date" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "dotnet_interactive": { + "language": "sql-myPostgresKernel" + }, + "polyglot_notebook": { + "kernelName": "sql-myPostgresKernel" + } + }, + "outputs": [], + "source": [ + "SELECT * \n", + "FROM products_sales \n", + "WHERE model = 'Bat' \n", + "ORDER BY sales_transaction_date;" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step 6: Count the number of Bat Scooter sales records" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "dotnet_interactive": { + "language": "sql-myPostgresKernel" + }, + "polyglot_notebook": { + "kernelName": "sql-myPostgresKernel" + } + }, + "outputs": [], + "source": [ + "SELECT COUNT(model) \n", + "FROM products_sales \n", + "WHERE model = 'Bat';" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step 7: Determine the last sale date for the Bat Scooter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "dotnet_interactive": { + "language": "sql-myPostgresKernel" + }, + "polyglot_notebook": { + "kernelName": "sql-myPostgresKernel" + } + }, + "outputs": [], + "source": [ + "SELECT MAX(sales_transaction_date) \n", + "FROM products_sales \n", + "WHERE model = 'Bat';" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step 8: Insert Bat Scooter sales records into a new table (bat_sales) ordered by date" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "dotnet_interactive": { + "language": "sql-myPostgresKernel" + }, + "polyglot_notebook": { + "kernelName": "sql-myPostgresKernel" + } + }, + "outputs": [], + "source": [ + "Drop TABLE IF EXISTS bat_sales;\n", + "\n", + "SELECT * \n", + "INTO bat_sales \n", + "FROM products_sales \n", + "WHERE model = 'Bat' \n", + "ORDER BY sales_transaction_date;" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step 9: Remove the time information in bat_sales (convert to date)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "dotnet_interactive": { + "language": "sql-myPostgresKernel" + }, + "polyglot_notebook": { + "kernelName": "sql-myPostgresKernel" + } + }, + "outputs": [], + "source": [ + "UPDATE bat_sales \n", + "SET sales_transaction_date = DATE(sales_transaction_date);" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step 10: Display the first five records of bat_sales ordered by date" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "dotnet_interactive": { + "language": "sql-myPostgresKernel" + }, + "polyglot_notebook": { + "kernelName": "sql-myPostgresKernel" + } + }, + "outputs": [], + "source": [ + "SELECT * \n", + "FROM bat_sales \n", + "ORDER BY sales_transaction_date \n", + "LIMIT 5;" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step 11: Create bat_sales_daily table with daily sales count" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "dotnet_interactive": { + "language": "sql-myPostgresKernel" + }, + "polyglot_notebook": { + "kernelName": "sql-myPostgresKernel" + } + }, + "outputs": [], + "source": [ + "SELECT sales_transaction_date, COUNT(sales_transaction_date) \n", + "INTO bat_sales_daily \n", + "FROM bat_sales \n", + "GROUP BY sales_transaction_date \n", + "ORDER BY sales_transaction_date;" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Activity 9.1: Quantifying the Sales Drop\n", + "Here we compute a cumulative sum of daily sales, apply a 7-day lag, and calculate the growth rate (volume)." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step 1: Load the sqlda database\n", + "\n", + "psql sqlda" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step 2: Compute the daily cumulative sum of sales and insert into bat_sales_growth" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "dotnet_interactive": { + "language": "sql-myPostgresKernel" + }, + "polyglot_notebook": { + "kernelName": "sql-myPostgresKernel" + } + }, + "outputs": [], + "source": [ + "DROP TABLE IF EXISTS bat_sales_growth;\n", + "\n", + "SELECT *, sum(count) OVER (ORDER BY sales_transaction_date) AS cumulative_sum\n", + "INTO bat_sales_growth\n", + "FROM bat_sales_daily;" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step 3: Compute a 7-day lag of the cumulative sum and insert into bat_sales_daily_delay" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "dotnet_interactive": { + "language": "sql-myPostgresKernel" + }, + "polyglot_notebook": { + "kernelName": "sql-myPostgresKernel" + } + }, + "outputs": [], + "source": [ + "DROP TABLE IF EXISTS bat_sales_daily_delay;\n", + "\n", + "SELECT *, lag(cumulative_sum, 7) OVER (ORDER BY sales_transaction_date) AS lag_value\n", + "INTO bat_sales_daily_delay\n", + "FROM bat_sales_growth;" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step 4: Inspect the first 15 rows of bat_sales_daily_delay" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "dotnet_interactive": { + "language": "sql-myPostgresKernel" + }, + "polyglot_notebook": { + "kernelName": "sql-myPostgresKernel" + } + }, + "outputs": [], + "source": [ + "SELECT * \n", + "FROM bat_sales_daily_delay \n", + "LIMIT 15;" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step 5: Compute sales growth as a percentage and insert into bat_sales_delay_vol" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "dotnet_interactive": { + "language": "sql-myPostgresKernel" + }, + "polyglot_notebook": { + "kernelName": "sql-myPostgresKernel" + } + }, + "outputs": [], + "source": [ + "DROP TABLE IF EXISTS bat_sales_delay_vol;\n", + "\n", + "SELECT *, (cumulative_sum - lag_value) / lag_value AS volume\n", + "INTO bat_sales_delay_vol\n", + "FROM bat_sales_daily_delay;" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step 6: Display the first 22 records of bat_sales_delay_vol" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "dotnet_interactive": { + "language": "sql-myPostgresKernel" + }, + "polyglot_notebook": { + "kernelName": "sql-myPostgresKernel" + } + }, + "outputs": [], + "source": [ + "SELECT * \n", + "FROM bat_sales_delay_vol \n", + "LIMIT 22;" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### ----------------------------------------------------------------------------------" + ] + } + ], + "metadata": { + "kernelInfo": { + "defaultKernelName": null, + "items": [ + { + "aliases": [ + "c#", + "cs" + ], + "languageName": "C#", + "name": "csharp" + }, + { + "aliases": [ + "f#", + "fs" + ], + "languageName": "F#", + "name": "fsharp" + }, + { + "languageName": "HTML", + "name": "html" + }, + { + "languageName": "HTTP", + "name": "http" + }, + { + "aliases": [ + "js" + ], + "languageName": "JavaScript", + "name": "javascript" + }, + { + "languageName": "Mermaid", + "name": "mermaid" + }, + { + "aliases": [ + "powershell" + ], + "languageName": "PowerShell", + "name": "pwsh" + }, + { + "name": "value" + } + ] + }, + "language_info": { + "name": "csharp" + }, + "polyglot_notebook": { + "kernelInfo": { + "defaultKernelName": "csharp", + "items": [ + { + "aliases": [], + "languageName": "csharp", + "name": "csharp" + }, + { + "aliases": [ + "f#", + "fs" + ], + "languageName": "F#", + "name": "fsharp" + }, + { + "aliases": [], + "languageName": "HTML", + "name": "html" + }, + { + "aliases": [], + "languageName": "HTTP", + "name": "http" + }, + { + "aliases": [ + "js" + ], + "languageName": "JavaScript", + "name": "javascript" + }, + { + "aliases": [], + "languageName": "Mermaid", + "name": "mermaid" + }, + { + "aliases": [ + "powershell" + ], + "languageName": "PowerShell", + "name": "pwsh" + }, + { + "aliases": [], + "languageName": "PostgreSQL", + "name": "sql-myPostgresKernel" + }, + { + "aliases": [], + "name": "value" + } + ] + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/DAT375_Week2/w2_case_study.dib b/DAT375_Week2/w2_case_study.dib new file mode 100644 index 0000000..361d99a --- /dev/null +++ b/DAT375_Week2/w2_case_study.dib @@ -0,0 +1,244 @@ +#!meta + +{"kernelInfo":{"defaultKernelName":"csharp","items":[{"name":"csharp","languageName":"csharp"},{"name":"fsharp","languageName":"F#","aliases":["f#","fs"]},{"name":"html","languageName":"HTML"},{"name":"http","languageName":"HTTP"},{"name":"javascript","languageName":"JavaScript","aliases":["js"]},{"name":"mermaid","languageName":"Mermaid"},{"name":"pwsh","languageName":"PowerShell","aliases":["powershell"]},{"name":"sql-myPostgresKernel","languageName":"PostgreSQL"},{"name":"value"}]}} + +#!markdown + +# Exercise 9.1: Preliminary Data Collection Using SQL Techniques +This exercise collects preliminary data. We will load the database, list scooter product details, extract product IDs, and store the results in a new table. + +#!markdown + +## Step 1: Load the sqlda database + +#!csharp + +#r "nuget: Microsoft.DotNet.Interactive.PostgreSql, 1.0.0-beta.25177.1" + +#!javascript + +#!connect postgres --kernel-name myPostgresKernel --connection-string "Host=localhost;Port=54321;Username=postgres;Password=securepassword;Database=sqlda1" + +#!markdown + +## Step 2: List the model, base_msrp, and production_start_date for scooter products + +#!sql-myPostgresKernel + +SELECT model, base_msrp, production_start_date +FROM products +WHERE product_type = 'scooter'; + +#!markdown + +## Step 3: Extract the model name and product IDs for scooter products + +#!sql-myPostgresKernel + +SELECT model, product_id +FROM products +WHERE product_type = 'scooter'; + +#!markdown + +## Step 4: Insert the above results into a new table called product_names + +#!sql-myPostgresKernel + +Drop table if exists product_names; + +SELECT model, product_id +INTO product_names +FROM products +WHERE product_type = 'scooter'; + +#!markdown + +# Exercise 9.2: Extracting the Sales Information +In this exercise we join sales data with the product names and then isolate Bat Scooter sales. + +#!markdown + +## Step 1: Load the sqlda database + +#!markdown + +psql sqlda + +#!markdown + +## Step 2: List the available fields in the database + +#!markdown + +## Step 3: Create a new table (products_sales) by joining sales and product_names on product_id + +#!sql-myPostgresKernel + +Drop table if exists products_sales; + +SELECT model, customer_id, sales_transaction_date, sales_amount, channel, dealership_id +INTO products_sales +FROM sales +INNER JOIN product_names + ON sales.product_id = product_names.product_id; + +#!markdown + +## Step 4: Display the first five rows of products_sales + +#!sql-myPostgresKernel + +SELECT * +FROM products_sales +LIMIT 5; + +#!markdown + +## Step 5: Retrieve Bat Scooter sales ordered by sales_transaction_date + +#!sql-myPostgresKernel + +SELECT * +FROM products_sales +WHERE model = 'Bat' +ORDER BY sales_transaction_date; + +#!markdown + +## Step 6: Count the number of Bat Scooter sales records + +#!sql-myPostgresKernel + +SELECT COUNT(model) +FROM products_sales +WHERE model = 'Bat'; + +#!markdown + +## Step 7: Determine the last sale date for the Bat Scooter + +#!sql-myPostgresKernel + +SELECT MAX(sales_transaction_date) +FROM products_sales +WHERE model = 'Bat'; + +#!markdown + +## Step 8: Insert Bat Scooter sales records into a new table (bat_sales) ordered by date + +#!sql-myPostgresKernel + +Drop TABLE IF EXISTS bat_sales; + +SELECT * +INTO bat_sales +FROM products_sales +WHERE model = 'Bat' +ORDER BY sales_transaction_date; + +#!markdown + +## Step 9: Remove the time information in bat_sales (convert to date) + +#!sql-myPostgresKernel + +UPDATE bat_sales +SET sales_transaction_date = DATE(sales_transaction_date); + +#!markdown + +## Step 10: Display the first five records of bat_sales ordered by date + +#!sql-myPostgresKernel + +SELECT * +FROM bat_sales +ORDER BY sales_transaction_date +LIMIT 5; + +#!markdown + +## Step 11: Create bat_sales_daily table with daily sales count + +#!sql-myPostgresKernel + +Drop TABLE IF EXISTS bat_sales_daily; + +SELECT sales_transaction_date, COUNT(sales_transaction_date) +INTO bat_sales_daily +FROM bat_sales +GROUP BY sales_transaction_date +ORDER BY sales_transaction_date; + +#!markdown + +# Activity 9.1: Quantifying the Sales Drop +Here we compute a cumulative sum of daily sales, apply a 7-day lag, and calculate the growth rate (volume). + +#!markdown + +## Step 1: Load the sqlda database + +psql sqlda + +#!markdown + +## Step 2: Compute the daily cumulative sum of sales and insert into bat_sales_growth + +#!sql-myPostgresKernel + +DROP TABLE IF EXISTS bat_sales_growth; + +SELECT *, sum(count) OVER (ORDER BY sales_transaction_date) AS cumulative_sum +INTO bat_sales_growth +FROM bat_sales_daily; + +#!markdown + +## Step 3: Compute a 7-day lag of the cumulative sum and insert into bat_sales_daily_delay + +#!sql-myPostgresKernel + +DROP TABLE IF EXISTS bat_sales_daily_delay; + +SELECT *, lag(cumulative_sum, 7) OVER (ORDER BY sales_transaction_date) AS lag_value +INTO bat_sales_daily_delay +FROM bat_sales_growth; + +#!markdown + +## Step 4: Inspect the first 15 rows of bat_sales_daily_delay + +#!sql-myPostgresKernel + +SELECT * +FROM bat_sales_daily_delay +LIMIT 15; + +#!markdown + +## Step 5: Compute sales growth as a percentage and insert into bat_sales_delay_vol + +#!sql-myPostgresKernel + +DROP TABLE IF EXISTS bat_sales_delay_vol; + +SELECT *, (cumulative_sum - lag_value) / lag_value AS volume +INTO bat_sales_delay_vol +FROM bat_sales_daily_delay; + +#!markdown + +## Step 6: Display the first 22 records of bat_sales_delay_vol + +#!sql-myPostgresKernel + +SELECT * +FROM bat_sales_delay_vol +LIMIT 22; + +#!markdown + +#### ---------------------------------------------------------------------------------- diff --git a/DAT375_Week2/w2_case_study.md b/DAT375_Week2/w2_case_study.md new file mode 100644 index 0000000..bd4cb00 --- /dev/null +++ b/DAT375_Week2/w2_case_study.md @@ -0,0 +1,150 @@ +# Exercise 9.1: Preliminary Data Collection Using SQL Techniques + +This exercise collects preliminary data. We will load the database, list scooter product details, extract product IDs, and store the results in a new table. + +## Step 1: Load the sqlda database + +# r "nuget:Microsoft.DotNet.Interactive.PostgreSql, *-*" +# !connect postgres --kernel-name myPostgresKernel --connection-string "Host=localhost;Port=54321;Username=postgres;Password=securepassword;Database=sqlda1" + +## Step 2: List the model, base_msrp, and production_start_date for scooter products + +SELECT model, base_msrp, production_start_date +FROM products +WHERE product_type = 'scooter'; + +## Step 3: Extract the model name and product IDs for scooter products + +SELECT model, product_id +FROM products +WHERE product_type = 'scooter'; + +## Step 4: Insert the above results into a new table called product_names + +Drop table if exists product_names; + +SELECT model, product_id +INTO product_names +FROM products +WHERE product_type = 'scooter'; + +# Exercise 9.2: Extracting the Sales Information + +In this exercise we join sales data with the product names and then isolate Bat Scooter sales. + +## Step 1: Load the sqlda database + +psql sqlda + +## Step 2: List the available fields in the database + +## Step 3: Create a new table (products_sales) by joining sales and product_names on product_id + +Drop table if exists products_sales; + +SELECT model, customer_id, sales_transaction_date, sales_amount, channel, dealership_id +INTO products_sales +FROM sales +INNER JOIN product_names + ON sales.product_id = product_names.product_id; + +## Step 4: Display the first five rows of products_sales + +SELECT * +FROM products_sales +LIMIT 5; + +## Step 5: Retrieve Bat Scooter sales ordered by sales_transaction_date + +SELECT * +FROM products_sales +WHERE model = 'Bat' +ORDER BY sales_transaction_date; + +## Step 6: Count the number of Bat Scooter sales records + +SELECT COUNT(model) +FROM products_sales +WHERE model = 'Bat'; + +## Step 7: Determine the last sale date for the Bat Scooter + +SELECT MAX(sales_transaction_date) +FROM products_sales +WHERE model = 'Bat'; + +## Step 8: Insert Bat Scooter sales records into a new table (bat_sales) ordered by date + +Drop TABLE IF EXISTS bat_sales; + +SELECT * +INTO bat_sales +FROM products_sales +WHERE model = 'Bat' +ORDER BY sales_transaction_date; + +## Step 9: Remove the time information in bat_sales (convert to date) + +UPDATE bat_sales +SET sales_transaction_date = DATE(sales_transaction_date); + +## Step 10: Display the first five records of bat_sales ordered by date + +SELECT * +FROM bat_sales +ORDER BY sales_transaction_date +LIMIT 5; + +## Step 11: Create bat_sales_daily table with daily sales count + +SELECT sales_transaction_date, COUNT(sales_transaction_date) +INTO bat_sales_daily +FROM bat_sales +GROUP BY sales_transaction_date +ORDER BY sales_transaction_date; + +# Activity 9.1: Quantifying the Sales Drop + +Here we compute a cumulative sum of daily sales, apply a 7-day lag, and calculate the growth rate (volume). + +## Step 1: Load the sqlda database + +psql sqlda + +## Step 2: Compute the daily cumulative sum of sales and insert into bat_sales_growth + +DROP TABLE IF EXISTS bat_sales_growth; + +SELECT *, sum(count) OVER (ORDER BY sales_transaction_date) AS cumulative_sum +INTO bat_sales_growth +FROM bat_sales_daily; + +## Step 3: Compute a 7-day lag of the cumulative sum and insert into bat_sales_daily_delay + +DROP TABLE IF EXISTS bat_sales_daily_delay; + +SELECT *, lag(cumulative_sum, 7) OVER (ORDER BY sales_transaction_date) AS lag_value +INTO bat_sales_daily_delay +FROM bat_sales_growth; + +## Step 4: Inspect the first 15 rows of bat_sales_daily_delay + +SELECT * +FROM bat_sales_daily_delay +LIMIT 15; + +## Step 5: Compute sales growth as a percentage and insert into bat_sales_delay_vol + +DROP TABLE IF EXISTS bat_sales_delay_vol; + +SELECT *, (cumulative_sum - lag_value) / lag_value AS volume +INTO bat_sales_delay_vol +FROM bat_sales_daily_delay; + +## Step 6: Display the first 22 records of bat_sales_delay_vol + +SELECT * +FROM bat_sales_delay_vol +LIMIT 22; + +#### ---------------------------------------------------------------------------------- diff --git a/week2_case_study.sql b/DAT375_Week2/week2_case_study.sql similarity index 81% rename from week2_case_study.sql rename to DAT375_Week2/week2_case_study.sql index 297e20f..27c3fe8 100644 --- a/week2_case_study.sql +++ b/DAT375_Week2/week2_case_study.sql @@ -1,23 +1,30 @@ -/*markdown -/*markdown -# Exercise 9.1: Preliminary Data Collection Using SQL Techniques + + +# Exercise +9.1: Preliminary Data Collection Using SQL Techniques */ -/*markdown + -- Active: 1742829589420@@127.0.0.1@54321@sqlda -1. Load the `sqlda` database from the accompanying source code located [here](https://github.com/TrainingByPackt/SQL-for-Data-Analytics/tree/master/Datasets). +1. Load the `sqlda` database from the accompanying source code located [here] +( +https: +//github.com/TrainingByPackt/SQL-for-Data-Analytics/tree/master/Datasets). */ */ -/*markdown + -- Load the sqlda database -- Command depends on your database setup */ -/*markdown -2. List the model, `base_msrp` (MSRP: manufacturer's suggested retail price) and `production_start_date` fields within the product table for product types matching `scooter`. + +2. List the model, `base_msrp` +( +MSRP: +manufacturer's suggested retail price) and `production_start_date` fields within the product table for product types matching `scooter`. */ -- Active: 1738444549781@@127.0.0.1@54321@sqlda @@ -25,7 +32,7 @@ SELECT model, base_msrp, production_start_date FROM products WHERE product_type = 'scooter'; -/*markdown + 3. Extract the model name and product IDs for the scooters available within the database. We will need this information to reconcile the product information with the available sales information. */ @@ -35,11 +42,11 @@ FROM products WHERE product_type = 'scooter'; SELECT * FROM product_names; -/*markdown + ## Exercise 9.2: Extracting the Sales Information */ -/*markdown + #### 1. Use an inner join on the `product_id` columns of both the `product_names` table and the `sales` table. From the result of the inner join, select the `model`, `customer_id`, `sales_transaction_date`, `sales_amount`, `channel`, and `dealership_id`, and store the values in a separate table called `product_sales`. */ @@ -54,13 +61,13 @@ SELECT FROM sales INNER JOIN product_names ON sales.product_id = product_names.product_id; -/*markdown + 2. View all rows of the `product_sales` table. */ SELECT * FROM product_sales; -/*markdown + 3. Select all the information from the `product_sales` table that is available for the Bat Scooter and order the sales information by `sales_transaction_date` in ascending order. By selecting the data in this way, we can look at the first few days of the sales records in detail. */ @@ -68,7 +75,7 @@ SELECT * FROM product_sales WHERE model = 'Bat' ORDER BY sales_transaction_date; -/*markdown + 4. Count the number of records available by the following query. */ diff --git a/Lesson02/with.pgsql b/DAT375_Week2/with.pgsql similarity index 66% rename from Lesson02/with.pgsql rename to DAT375_Week2/with.pgsql index f7503fb..4e7370e 100644 --- a/Lesson02/with.pgsql +++ b/DAT375_Week2/with.pgsql @@ -5,4 +5,7 @@ SELECT * FROM dealerships SELECT * FROM salespeople INNER JOIN d ON d.dealership_id = salespeople.dealership_id -ORDER BY 1; \ No newline at end of file +ORDER BY 1; + + +SELECT SUM(base_msrp)::FLOAT/COUNT(*) AS avg_base_msrp FROM products; \ No newline at end of file