-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathrun_test.py
109 lines (88 loc) · 4.27 KB
/
run_test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# @author Nikhil Bhagawt
# @date 20 Sept 2018
import pandas as pd
import sys
import os
import argparse
import re
from lib.preproc_checks import *
from lib.minc_wrap import *
from lib.outlier_checks import *
preproc_pipeline_dir = '/data/ipl/scratch03/nikhil/MR_preproc_dash/code/nist_mni_pipelines/'
if preproc_pipeline_dir not in sys.path:
sys.path.append(preproc_pipeline_dir)
#source /opt/minc/1.9.16/minc-toolkit-config.sh #Mac laptop
#source /ipl/quarantine/experimental/2013-02-15/init.sh #BIC systems
local_env = '/ipl/quarantine/experimental/2013-02-15/init.sh'
script_dir = './scripts/'
def main():
#argparse
parser = argparse.ArgumentParser(description = 'Code for preproc checks on dir-tree and output files')
parser.add_argument('--data_dir', required=True, help='local dataset path')
parser.add_argument('--save_path', required=True, help='path for summary csv/dataframe')
args = parser.parse_args()
# Req params
data_dir = args.data_dir
save_path = args.save_path
# List of all subject subdirectory names
subject_names = next(os.walk(data_dir))[1]
pattern = re.compile("([0-9]*_S_[0-9]*)")
# Expected output directories per timepoint
# First level output subdirs
output_dirs = ['clp','clp2','stx','stx2','vbm','cls','vol','lng'] # In each timepoint
# Registration dirs and parameters (for outlier detection)
reg_dirs = ['stx','stx2']
reg_cols = ['x_center', 'y_center', 'z_center', 'x_translation', 'y_translation', 'z_translation', 'x_rotation', 'y_rotation', 'z_rotation',
'x_scale', 'y_scale', 'z_scale', 'x_shear', 'y_shear', 'z_shear']
# List of files expected in each subdir
task_file_names_dict = {}
task_file_names_dict['clp'] = ['clp','den','nuc'] #_t1
task_file_names_dict['clp2'] = ['clp2'] #_t1
task_file_names_dict['cls'] = ['cls','lob']
task_file_names_dict['stx'] = ['stx','nsstx'] #_t1
task_file_names_dict['stx2'] = ['stx2'] #_t1
task_file_names_dict['vol'] = ['vol'] # text file
task_file_names_dict['vbm'] = ['vbm_imp_csf','vbm_imp_gm','vbm_imp_wm']
task_file_names_dict['lng'] = ['lng'] #_t1
# iterate thru all subjects
df_preproc = pd.DataFrame()
reg_param_list_subject = []
reg_param_flat_subject = pd.DataFrame()
for subject_name in subject_names:
if not pattern.match(subject_name):
print('\ndirectory name {} does not match subject naming convention'.format(subject_name))
else:
subject_dir = data_dir + subject_name + '/'
pipeline_data_pickle = pd.read_pickle(subject_dir + '{}.pickle'.format(subject_name))
df = parse_pickle(pipeline_data_pickle,output_dirs)
df, missing_tp, missing_dir = check_output_dirs(df,output_dirs,subject_dir)
df, missing_file, reg_param_flat_tp, reg_param_list_tp = check_output_files(local_env,df,task_file_names_dict,subject_dir,script_dir)
reg_param_flat_subject = reg_param_flat_subject.append(reg_param_flat_tp)
reg_param_list_subject = reg_param_list_subject + reg_param_list_tp
print('')
print('---------------------------------------------------------------')
print('subject: {}'.format(df['subject_idx'].values[0]))
print('missing timepoints (# {}): \n{}'.format(len(missing_tp),missing_tp))
print('')
print('missing dirs (# {}): \n{}'.format(len(missing_dir), missing_dir))
print('')
print('missing files(# {}): \n{}'.format(len(missing_file),missing_file))
df_preproc = df_preproc.append(df)
print('')
print('Lenth of df_preproc: {}'.format(len(df_preproc)))
print('number of subjects: {}'.format(len(set(df_preproc['subject_idx'].values))))
print('saving summary csv at {}'.format(save_path + '.csv'))
print('saving reg_param dataframe at {}'.format(save_path + '.pkl'))
print('')
# Find outliers
od_df = reg_param_flat_subject.copy()
od_df = find_reg_outliers(od_df,reg_dirs,reg_cols)
# Save ouput
df_preproc.to_csv(save_path + '.csv')
od_df.to_pickle(save_path + '.pkl')
if __name__ == '__main__':
rc = main()
sys.exit(rc)