Skip to content

Commit 0f1edd3

Browse files
committed
initial commit
0 parents  commit 0f1edd3

10 files changed

+1016
-0
lines changed

.DS_Store

6 KB
Binary file not shown.

.idea/misc.xml

+4
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

.idea/modules.xml

+8
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

.idea/pymvpa.iml

+12
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

.idea/workspace.xml

+692
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

HCPML_analysis.py

+119
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,119 @@
1+
from mvpa2.suite import *
2+
from joblib import Parallel, delayed
3+
from HCPML_plt import clfAccHist
4+
import os
5+
import platform
6+
import numpy as np
7+
import nibabel as nib
8+
import multiprocessing
9+
import runCV
10+
11+
12+
#enable output to console
13+
verbose.level = 2
14+
15+
script_start_time = time.time()
16+
17+
#define paths
18+
task = 'motor' #motor, WM
19+
clf_name = 'lfvslh' #lfvslh, multiclass (all 5 movements)
20+
if platform.node() == 'Patricks-MacBook-Pro.local':
21+
data_path = os.path.join('/Volumes/maloneHD/Data/HCP_ML/', task) # base directory (mac)
22+
beta_path = os.path.join('/Volumes/maloneHD/Data_noSync/HCP_ML/', task, 'betas/') # beta images
23+
else:
24+
data_path = os.path.join('/media/malone/maloneHD/Data/HCP_ML/', task) # base directory (linux)
25+
beta_path = os.path.join('/media/malone/maloneHD/Data_noSync/HCP_ML/', task, 'betas/') #beta images
26+
27+
mvpa_path = os.path.join(data_path,'mvpa',clf_name)
28+
parc_path = os.path.join(data_path,'parc') #parcellations
29+
30+
#analysis parameters
31+
nsubs = 950 #number of subjects
32+
nparc = 360 #number of parcels/ROIs
33+
clf_type = 'SVM' #KNN, SVM
34+
knn_k = round(np.sqrt(nsubs)) #k-nearest-neighbor parameter
35+
cv_type = 'nfld' #split_half, LOSO (leave-one-subject-out), nfld (n-fold)
36+
targets = ['lf','lh']
37+
pe_num = ['2','3']
38+
#targets = ['lf','lh','rf','rh','t'] #targets to be classified
39+
#pe_num = ['2','3','4','5','6'] #parameter estimate numbers corresponding to targets
40+
41+
#define subjects and mask
42+
subs = os.listdir(beta_path)
43+
subs = subs[:nsubs]
44+
surf_mask = np.ones([1,59412]) #mask for cortical surface nodes, not subcortical/cerebellum volumetric voxels
45+
msk_path = os.path.join(parc_path, 'Glasser_360.dtseries.nii')
46+
msk = nib.load(msk_path)
47+
msk_data = msk.get_data()
48+
msk_data = msk_data[0, 0, 0, 0, 0, 0:] #last dimension contains parcel data
49+
50+
#load beta imgs
51+
ds_all = []
52+
for index, s in enumerate(subs):
53+
tds_beta_path = os.path.join(beta_path, s,
54+
'MNINonLinear', 'Results', 'tfMRI_Motor',
55+
'tfMRI_MOTOR_hp200_s2_level2.feat',
56+
'GrayordinatesStats')
57+
pe_paths = []
58+
for p in pe_num:
59+
pe_paths.append(os.path.join(tds_beta_path,
60+
'cope'+p+'.feat','pe1.dtseries.nii'))
61+
62+
ds = fmri_dataset(pe_paths,targets=targets,mask=surf_mask)
63+
64+
ds.sa['subject'] = np.repeat(index, len(ds))
65+
ds.fa['parcel'] = msk_data
66+
ds_all.append(ds)
67+
verbose(2, "subject %i of %i loaded" % (index, nsubs))
68+
69+
fds = vstack(ds_all) #stack datasets
70+
71+
#classifier algorithm
72+
if clf_type is 'SVM':
73+
clf = LinearCSVMC()
74+
elif clf_type is 'KNN':
75+
clf = kNN(k=knn_k, voting='weighted')
76+
#cross-validation algorithm
77+
if cv_type is 'split_half':
78+
cv = CrossValidation(clf,
79+
HalfPartitioner(count=2,
80+
selection_strategy='random', attr='subject'),
81+
errorfx=mean_match_accuracy)
82+
elif cv_type is 'LOSO':
83+
cv = CrossValidation(clf,
84+
NFoldPartitioner(attr='subject'),
85+
errorfx=mean_match_accuracy)
86+
elif cv_type is 'nfld':
87+
cv = CrossValidation(clf,
88+
NFoldPartitioner(count=5,
89+
selection_strategy='random', attr='subject'),
90+
errorfx=mean_match_accuracy)
91+
#run classification
92+
parc = range(1,nparc+1)
93+
cv_results = [0 for x in parc]
94+
num_cores = multiprocessing.cpu_count()
95+
cv_results = Parallel(n_jobs=num_cores)(delayed(runCV.runCV)
96+
(p,fds[:, fds.fa.parcel == p],clf,cv,nparc) for p in parc)
97+
98+
#save nii accuracy map
99+
msk = nib.load(msk_path)
100+
msk_data = msk.get_data()
101+
msk_data = msk_data[0, 0, 0, 0, 0, 0:] #last dimension contains parcel data
102+
for index, i in enumerate(msk_data):
103+
msk_data[index] = np.mean(cv_results[int(i)-1])
104+
msk_data = msk_data.reshape((1, 1, 1, 1, 1, msk_data.size))
105+
nib.save(msk, os.path.join(mvpa_path,'accuracy_maps',
106+
str(nsubs)+'subs_'+cv_type+'_CV_'+clf_type
107+
+'clfAcc.dtseries.nii'))
108+
109+
#convert clf results to numpy array and save
110+
cv_results_out = [np.asarray(cv_results[index]) for index, i in enumerate(cv_results)]
111+
cv_results_out = np.asarray(cv_results_out)
112+
np.save(os.path.join(mvpa_path,'cv_results',str(nsubs)+'subs_'+cv_type+'_CV_'+clf_type+'clfAcc'),
113+
cv_results_out)
114+
115+
#generate clf accuracy histogram
116+
chance = float(1)/float(len(targets))
117+
clfAccHist(nsubs,clf_type,cv_type,chance,mvpa_path)
118+
119+
verbose(2, "total script computation time: %.1f minutes" % ((time.time() - script_start_time)/60))

HCPML_plt.py

+29
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
import numpy as np
2+
import os
3+
import matplotlib.pyplot as plt
4+
5+
def clfAccHist(nsubs,clf_type,cv_type,chance,mvpa_path):
6+
7+
#load clf results
8+
cv_results = np.load(os.path.join(mvpa_path,'cv_results',
9+
str(nsubs)+'subs_'+cv_type+
10+
'_CV_'+clf_type+'clfAcc.npy'))
11+
#average acc across CV folds
12+
acc_mean = np.mean(cv_results,1)
13+
#average/std acc across parcels
14+
pmean = (np.mean(acc_mean)).round(2)
15+
pstd = (np.std(acc_mean)).round(2)
16+
17+
#plot acc histogram
18+
plt.figure(figsize=(8,6))
19+
plt.hist(acc_mean)
20+
plt.ylabel('Num parcels')
21+
plt.xlabel('Accuracy')
22+
plt.axis([0, 1, 0, 140])
23+
plt.axvline(chance, color='k', linestyle='dashed', linewidth=1)
24+
plt.title(str(nsubs)+' subs,'+cv_type+' CV, '+clf_type+
25+
' clf: mean='+str(pmean)+' std='+str(pstd))
26+
27+
plt.axvline(pmean, color='r', linestyle='dashed', linewidth=1)
28+
plt.savefig(os.path.join(mvpa_path,'images',str(nsubs)+'subs_'+cv_type+'CV_'+clf_type+'clfAcc.png'),dpi=400)
29+
return

pltAccBelowChance.py

+61
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
import numpy as np
2+
import os
3+
import matplotlib.pyplot as plt
4+
5+
samp_sizes = [10,20,50,100,200,500,1080]
6+
mvpa_path = '/Volumes/maloneHD/Data/HCP_ML/motor/mvpa/lfvslh/'
7+
cv_type = 'LOSO'
8+
9+
nBelowChnce = np.empty([len(samp_sizes),2])
10+
for i, s in enumerate(samp_sizes):
11+
#load SVM results
12+
cv_results = np.load(os.path.join(mvpa_path,'cv_results',
13+
str(s)+'subs_'+cv_type+
14+
'_CV_SVMclfAcc.npy'))
15+
#average acc across CV folds
16+
acc_mean = np.mean(cv_results,1)
17+
#average/std acc across parcels
18+
#pmean = (np.mean(acc_mean)).round(2)
19+
#pstd = (np.std(acc_mean)).round(2)
20+
21+
nBelowChnce[i,0] = sum(acc_mean<0.5)
22+
23+
#load KNN results
24+
cv_results = np.load(os.path.join(mvpa_path,'cv_results',
25+
str(s)+'subs_'+cv_type+
26+
'_CV_KNNclfAcc.npy'))
27+
#average acc across CV folds
28+
acc_mean = np.mean(cv_results,1)
29+
#average/std acc across parcels
30+
#pmean = (np.mean(acc_mean)).round(2)
31+
#pstd = (np.std(acc_mean)).round(2)
32+
33+
nBelowChnce[i,1] = sum(acc_mean<0.5)
34+
35+
36+
plt.figure(figsize=(12, 9))
37+
38+
# remove plot frame lines
39+
ax = plt.subplot(111)
40+
ax.spines["top"].set_visible(False)
41+
ax.spines["right"].set_visible(False)
42+
#
43+
# ensure that the axis ticks only show up on the bottom and left of the plot
44+
ax.get_xaxis().tick_bottom()
45+
ax.get_yaxis().tick_left()
46+
47+
plt.ylim(0, 100)
48+
49+
# make xticks larger enough to read
50+
plt.xticks(range(0, 1100, 100), fontsize=14)
51+
52+
plt.ylabel("Num ROI with below-chance clf acc", fontsize=16)
53+
plt.xlabel("Sample size (nsubs)", fontsize=16)
54+
55+
# plot the means as a white line in between the error bars.
56+
plt.plot(samp_sizes, nBelowChnce[:,0], color="#3F5D7D", lw=2, label='SVM')
57+
plt.plot(samp_sizes, nBelowChnce[:,1], color="#2cf7b2", lw=2, label='KNN')
58+
59+
plt.legend(loc=1)
60+
61+
plt.savefig(os.path.join(mvpa_path,'images','accBelowChance.png'),dpi=200)

pltClfAcc.py

+80
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
import numpy as np
2+
import os
3+
import matplotlib.pyplot as plt
4+
5+
samp_sizes = [10,20,50,100,200,500,1080]
6+
mvpa_path = '/Volumes/maloneHD/Data/HCP_ML/motor/mvpa/lfvslh/'
7+
cv_type = 'LOSO'
8+
9+
accBySampSize = np.empty([len(samp_sizes),2])
10+
semBySampSize = np.empty([len(samp_sizes),2])
11+
for i, s in enumerate(samp_sizes):
12+
#load SVM results
13+
cv_results = np.load(os.path.join(mvpa_path,'cv_results',
14+
str(s)+'subs_'+cv_type+
15+
'_CV_SVMclfAcc.npy'))
16+
#average acc across CV folds
17+
acc_mean = np.mean(cv_results,1)
18+
#average/std acc across parcels
19+
pmean = (np.mean(acc_mean)).round(2)
20+
pstd = (np.std(acc_mean)).round(2)
21+
22+
accBySampSize[i,0] = pmean
23+
semBySampSize[i, 0] = pstd/np.sqrt(360)
24+
25+
#load KNN results
26+
cv_results = np.load(os.path.join(mvpa_path,'cv_results',
27+
str(s)+'subs_'+cv_type+
28+
'_CV_KNNclfAcc.npy'))
29+
#average acc across CV folds
30+
acc_mean = np.mean(cv_results,1)
31+
#average/std acc across parcels
32+
pmean = (np.mean(acc_mean)).round(2)
33+
pstd = (np.std(acc_mean)).round(2)
34+
35+
accBySampSize[i,1] = pmean
36+
semBySampSize[i, 1] = pstd/np.sqrt(360)
37+
38+
39+
plt.figure(figsize=(12, 9))
40+
41+
# remove plot frame lines
42+
ax = plt.subplot(111)
43+
ax.spines["top"].set_visible(False)
44+
ax.spines["right"].set_visible(False)
45+
#
46+
# ensure that the axis ticks only show up on the bottom and left of the plot
47+
ax.get_xaxis().tick_bottom()
48+
ax.get_yaxis().tick_left()
49+
50+
plt.ylim(0.5, 0.65)
51+
52+
# make xticks larger enough to read
53+
plt.xticks(range(0, 1100, 100), fontsize=14)
54+
55+
plt.ylabel("Accuracy", fontsize=16)
56+
plt.xlabel("Sample size (nsubs)", fontsize=16)
57+
58+
# matplotlib's fill_between() call to create error bars.
59+
# SVM error bars
60+
plt.fill_between(samp_sizes, accBySampSize[:,0] - semBySampSize[:,0],
61+
accBySampSize[:, 0] + semBySampSize[:, 0], color="#3F5D7D")
62+
63+
# KNN error bars
64+
plt.fill_between(samp_sizes, accBySampSize[:,1] - semBySampSize[:,1],
65+
accBySampSize[:, 1] + semBySampSize[:, 1], color="#2cf7b2")
66+
67+
# plot the means as a white line in between the error bars.
68+
plt.plot(samp_sizes, accBySampSize[:,0], color="white", lw=2, label='SVM')
69+
plt.plot(samp_sizes, accBySampSize[:,1], color="white", lw=2, label='KNN')
70+
71+
plt.legend(loc=3)
72+
73+
#change legend color to color of error bar
74+
ax = plt.gca()
75+
leg = ax.get_legend()
76+
hl_dict = {handle.get_label(): handle for handle in leg.legendHandles}
77+
hl_dict['SVM'].set_color(color="#3F5D7D")
78+
hl_dict['KNN'].set_color(color="#2cf7b2")
79+
80+
plt.savefig(os.path.join(mvpa_path,'images','accBySampSize.png'),dpi=400)

runCV.py

+11
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
from mvpa2.suite import *
2+
3+
def runCV(p,sub_fds,clf,cv,nparc):
4+
5+
#enable output to console
6+
verbose.level = 2
7+
clf_start_time = time.time()
8+
cv_out = cv(sub_fds)
9+
verbose(2, "classification computation time: %.1f seconds" % (time.time() - clf_start_time))
10+
verbose(2, "parcel " + str(p) + " of " + str(nparc))
11+
return cv_out

0 commit comments

Comments
 (0)