neurohackweek
diff --git a/‎.DS_Store
6 KB b/‎.DS_Store
6 KB
diff --git a/‎.idea/misc.xml
+4 b/‎.idea/misc.xml
+4
diff --git a/‎.idea/modules.xml
+8 b/‎.idea/modules.xml
+8
diff --git a/‎.idea/pymvpa.iml
+12 b/‎.idea/pymvpa.iml
+12
diff --git a/‎.idea/workspace.xml
+692 b/‎.idea/workspace.xml
+692
diff --git a/‎HCPML_analysis.py
+119 b/‎HCPML_analysis.py
+119
diff --git a/‎HCPML_plt.py
+29 b/‎HCPML_plt.py
+29
diff --git a/‎pltAccBelowChance.py
+61 b/‎pltAccBelowChance.py
+61
diff --git a/‎pltClfAcc.py
+80 b/‎pltClfAcc.py
+80
diff --git a/‎runCV.py
+11 b/‎runCV.py
+11
@@ -0,0 +1,119 @@
+from mvpa2.suite import *
+from joblib import Parallel, delayed
+from HCPML_plt import clfAccHist
+import os
+import platform
+import numpy as np
+import nibabel as nib
+import multiprocessing
+import runCV
+
+
+#enable output to console
+verbose.level = 2
+
+script_start_time = time.time()
+
+#define paths
+task      = 'motor' #motor, WM
+clf_name  = 'lfvslh' #lfvslh, multiclass (all 5 movements)
+if platform.node() == 'Patricks-MacBook-Pro.local':
+    data_path = os.path.join('/Volumes/maloneHD/Data/HCP_ML/', task)  # base directory (mac)
+    beta_path = os.path.join('/Volumes/maloneHD/Data_noSync/HCP_ML/', task, 'betas/')  # beta images
+else:
+    data_path = os.path.join('/media/malone/maloneHD/Data/HCP_ML/', task)  # base directory (linux)
+    beta_path = os.path.join('/media/malone/maloneHD/Data_noSync/HCP_ML/', task, 'betas/') #beta images
+
+mvpa_path = os.path.join(data_path,'mvpa',clf_name)
+parc_path = os.path.join(data_path,'parc') #parcellations
+
+#analysis parameters
+nsubs    = 950 #number of subjects
+nparc    = 360 #number of parcels/ROIs
+clf_type = 'SVM' #KNN, SVM
+knn_k    = round(np.sqrt(nsubs)) #k-nearest-neighbor parameter
+cv_type  = 'nfld' #split_half, LOSO (leave-one-subject-out), nfld (n-fold)
+targets  = ['lf','lh']
+pe_num   = ['2','3']
+#targets  = ['lf','lh','rf','rh','t'] #targets to be classified
+#pe_num   = ['2','3','4','5','6'] #parameter estimate numbers corresponding to targets
+
+#define subjects and mask
+subs      = os.listdir(beta_path)
+subs      = subs[:nsubs]
+surf_mask = np.ones([1,59412]) #mask for cortical surface nodes, not subcortical/cerebellum volumetric voxels
+msk_path  = os.path.join(parc_path, 'Glasser_360.dtseries.nii')
+msk       = nib.load(msk_path)
+msk_data  = msk.get_data()
+msk_data  = msk_data[0, 0, 0, 0, 0, 0:]  #last dimension contains parcel data
+
+#load beta imgs
+ds_all = []
+for index, s in enumerate(subs):
+    tds_beta_path = os.path.join(beta_path, s,
+                                 'MNINonLinear', 'Results', 'tfMRI_Motor',
+                                 'tfMRI_MOTOR_hp200_s2_level2.feat',
+                                 'GrayordinatesStats')
+    pe_paths = []
+    for p in pe_num:
+        pe_paths.append(os.path.join(tds_beta_path,
+                                     'cope'+p+'.feat','pe1.dtseries.nii'))
+
+    ds = fmri_dataset(pe_paths,targets=targets,mask=surf_mask)
+
+    ds.sa['subject'] = np.repeat(index, len(ds))
+    ds.fa['parcel']  = msk_data
+    ds_all.append(ds)
+    verbose(2, "subject %i of %i loaded" % (index, nsubs))
+
+fds = vstack(ds_all) #stack datasets
+
+#classifier algorithm
+if clf_type is 'SVM':
+    clf = LinearCSVMC()
+elif clf_type is 'KNN':
+    clf = kNN(k=knn_k, voting='weighted')
+#cross-validation algorithm
+if cv_type is 'split_half':
+    cv = CrossValidation(clf,
+                         HalfPartitioner(count=2,
+                                         selection_strategy='random', attr='subject'),
+                         errorfx=mean_match_accuracy)
+elif cv_type is 'LOSO':
+    cv = CrossValidation(clf,
+                         NFoldPartitioner(attr='subject'),
+                         errorfx=mean_match_accuracy)
+elif cv_type is 'nfld':
+    cv = CrossValidation(clf,
+                         NFoldPartitioner(count=5,
+                                         selection_strategy='random', attr='subject'),
+                         errorfx=mean_match_accuracy)
+#run classification
+parc       = range(1,nparc+1)
+cv_results = [0 for x in parc]
+num_cores  = multiprocessing.cpu_count()
+cv_results = Parallel(n_jobs=num_cores)(delayed(runCV.runCV)
+                                        (p,fds[:, fds.fa.parcel == p],clf,cv,nparc) for p in parc)
+
+#save nii accuracy map
+msk      = nib.load(msk_path)
+msk_data = msk.get_data()
+msk_data = msk_data[0, 0, 0, 0, 0, 0:]  #last dimension contains parcel data
+for index, i in enumerate(msk_data):
+    msk_data[index] = np.mean(cv_results[int(i)-1])
+msk_data = msk_data.reshape((1, 1, 1, 1, 1, msk_data.size))
+nib.save(msk, os.path.join(mvpa_path,'accuracy_maps',
+                           str(nsubs)+'subs_'+cv_type+'_CV_'+clf_type
+                           +'clfAcc.dtseries.nii'))
+
+#convert clf results to numpy array and save
+cv_results_out = [np.asarray(cv_results[index]) for index, i in enumerate(cv_results)]
+cv_results_out = np.asarray(cv_results_out)
+np.save(os.path.join(mvpa_path,'cv_results',str(nsubs)+'subs_'+cv_type+'_CV_'+clf_type+'clfAcc'),
+        cv_results_out)
+
+#generate clf accuracy histogram
+chance = float(1)/float(len(targets))
+clfAccHist(nsubs,clf_type,cv_type,chance,mvpa_path)
+
+verbose(2, "total script computation time: %.1f minutes" % ((time.time() - script_start_time)/60))
@@ -0,0 +1,29 @@
+import numpy as np
+import os
+import matplotlib.pyplot as plt
+
+def clfAccHist(nsubs,clf_type,cv_type,chance,mvpa_path):
+
+    #load clf results
+    cv_results = np.load(os.path.join(mvpa_path,'cv_results',
+                                      str(nsubs)+'subs_'+cv_type+
+                                      '_CV_'+clf_type+'clfAcc.npy'))
+    #average acc across CV folds
+    acc_mean = np.mean(cv_results,1)
+    #average/std acc across parcels
+    pmean    = (np.mean(acc_mean)).round(2)
+    pstd     = (np.std(acc_mean)).round(2)
+
+    #plot acc histogram
+    plt.figure(figsize=(8,6))
+    plt.hist(acc_mean)
+    plt.ylabel('Num parcels')
+    plt.xlabel('Accuracy')
+    plt.axis([0, 1, 0, 140])
+    plt.axvline(chance, color='k', linestyle='dashed', linewidth=1)
+    plt.title(str(nsubs)+' subs,'+cv_type+' CV, '+clf_type+
+              ' clf: mean='+str(pmean)+' std='+str(pstd))
+
+    plt.axvline(pmean, color='r', linestyle='dashed', linewidth=1)
+    plt.savefig(os.path.join(mvpa_path,'images',str(nsubs)+'subs_'+cv_type+'CV_'+clf_type+'clfAcc.png'),dpi=400)
+    return
@@ -0,0 +1,61 @@
+import numpy as np
+import os
+import matplotlib.pyplot as plt
+
+samp_sizes = [10,20,50,100,200,500,1080]
+mvpa_path  = '/Volumes/maloneHD/Data/HCP_ML/motor/mvpa/lfvslh/'
+cv_type    = 'LOSO'
+
+nBelowChnce = np.empty([len(samp_sizes),2])
+for i, s in enumerate(samp_sizes):
+    #load SVM results
+    cv_results = np.load(os.path.join(mvpa_path,'cv_results',
+                                      str(s)+'subs_'+cv_type+
+                                      '_CV_SVMclfAcc.npy'))
+    #average acc across CV folds
+    acc_mean = np.mean(cv_results,1)
+    #average/std acc across parcels
+    #pmean    = (np.mean(acc_mean)).round(2)
+    #pstd     = (np.std(acc_mean)).round(2)
+
+    nBelowChnce[i,0] = sum(acc_mean<0.5)
+
+    #load KNN results
+    cv_results = np.load(os.path.join(mvpa_path,'cv_results',
+                                      str(s)+'subs_'+cv_type+
+                                      '_CV_KNNclfAcc.npy'))
+    #average acc across CV folds
+    acc_mean = np.mean(cv_results,1)
+    #average/std acc across parcels
+    #pmean    = (np.mean(acc_mean)).round(2)
+    #pstd     = (np.std(acc_mean)).round(2)
+
+    nBelowChnce[i,1] = sum(acc_mean<0.5)
+
+
+plt.figure(figsize=(12, 9))
+
+# remove plot frame lines
+ax = plt.subplot(111)
+ax.spines["top"].set_visible(False)
+ax.spines["right"].set_visible(False)
+#
+# ensure that the axis ticks only show up on the bottom and left of the plot
+ax.get_xaxis().tick_bottom()
+ax.get_yaxis().tick_left()
+
+plt.ylim(0, 100)
+
+# make xticks larger enough to read 
+plt.xticks(range(0, 1100, 100), fontsize=14)
+
+plt.ylabel("Num ROI with below-chance clf acc", fontsize=16)
+plt.xlabel("Sample size (nsubs)", fontsize=16)
+
+# plot the means as a white line in between the error bars.
+plt.plot(samp_sizes, nBelowChnce[:,0], color="#3F5D7D", lw=2, label='SVM')
+plt.plot(samp_sizes, nBelowChnce[:,1], color="#2cf7b2", lw=2, label='KNN')
+
+plt.legend(loc=1)
+
+plt.savefig(os.path.join(mvpa_path,'images','accBelowChance.png'),dpi=200)
@@ -0,0 +1,80 @@
+import numpy as np
+import os
+import matplotlib.pyplot as plt
+
+samp_sizes = [10,20,50,100,200,500,1080]
+mvpa_path  = '/Volumes/maloneHD/Data/HCP_ML/motor/mvpa/lfvslh/'
+cv_type    = 'LOSO'
+
+accBySampSize = np.empty([len(samp_sizes),2])
+semBySampSize = np.empty([len(samp_sizes),2])
+for i, s in enumerate(samp_sizes):
+    #load SVM results
+    cv_results = np.load(os.path.join(mvpa_path,'cv_results',
+                                      str(s)+'subs_'+cv_type+
+                                      '_CV_SVMclfAcc.npy'))
+    #average acc across CV folds
+    acc_mean = np.mean(cv_results,1)
+    #average/std acc across parcels
+    pmean    = (np.mean(acc_mean)).round(2)
+    pstd     = (np.std(acc_mean)).round(2)
+
+    accBySampSize[i,0] = pmean
+    semBySampSize[i, 0] = pstd/np.sqrt(360)
+
+    #load KNN results
+    cv_results = np.load(os.path.join(mvpa_path,'cv_results',
+                                      str(s)+'subs_'+cv_type+
+                                      '_CV_KNNclfAcc.npy'))
+    #average acc across CV folds
+    acc_mean = np.mean(cv_results,1)
+    #average/std acc across parcels
+    pmean    = (np.mean(acc_mean)).round(2)
+    pstd     = (np.std(acc_mean)).round(2)
+
+    accBySampSize[i,1] = pmean
+    semBySampSize[i, 1] = pstd/np.sqrt(360)
+
+
+plt.figure(figsize=(12, 9))
+
+# remove plot frame lines
+ax = plt.subplot(111)
+ax.spines["top"].set_visible(False)
+ax.spines["right"].set_visible(False)
+#
+# ensure that the axis ticks only show up on the bottom and left of the plot
+ax.get_xaxis().tick_bottom()
+ax.get_yaxis().tick_left()
+
+plt.ylim(0.5, 0.65)
+
+# make xticks larger enough to read
+plt.xticks(range(0, 1100, 100), fontsize=14)
+
+plt.ylabel("Accuracy", fontsize=16)
+plt.xlabel("Sample size (nsubs)", fontsize=16)
+
+# matplotlib's fill_between() call to create error bars.
+# SVM error bars
+plt.fill_between(samp_sizes, accBySampSize[:,0] - semBySampSize[:,0],
+                 accBySampSize[:, 0] + semBySampSize[:, 0], color="#3F5D7D")
+
+# KNN error bars
+plt.fill_between(samp_sizes, accBySampSize[:,1] - semBySampSize[:,1],
+                 accBySampSize[:, 1] + semBySampSize[:, 1], color="#2cf7b2")
+
+# plot the means as a white line in between the error bars.
+plt.plot(samp_sizes, accBySampSize[:,0], color="white", lw=2, label='SVM')
+plt.plot(samp_sizes, accBySampSize[:,1], color="white", lw=2, label='KNN')
+
+plt.legend(loc=3)
+
+#change legend color to color of error bar
+ax = plt.gca()
+leg = ax.get_legend()
+hl_dict = {handle.get_label(): handle for handle in leg.legendHandles}
+hl_dict['SVM'].set_color(color="#3F5D7D")
+hl_dict['KNN'].set_color(color="#2cf7b2")
+
+plt.savefig(os.path.join(mvpa_path,'images','accBySampSize.png'),dpi=400)
@@ -0,0 +1,11 @@
+from mvpa2.suite import *
+
+def runCV(p,sub_fds,clf,cv,nparc):
+
+    #enable output to console
+    verbose.level  = 2
+    clf_start_time = time.time()
+    cv_out         = cv(sub_fds)
+    verbose(2, "classification computation time: %.1f seconds" % (time.time() - clf_start_time))
+    verbose(2, "parcel " + str(p) + " of " + str(nparc))
+    return cv_out