From 1a156cdf739f488fbb473762381e6704fbdfec42 Mon Sep 17 00:00:00 2001 From: Maxime Mulder Date: Mon, 2 Dec 2024 19:03:08 -0500 Subject: [PATCH 1/8] add basic typing --- python/lib/bidsreader.py | 37 ++++----- python/lib/config.py | 0 python/lib/eeg.py | 26 +++--- python/lib/mri.py | 27 +++---- python/scripts/bids_import.py | 144 +++++++++++----------------------- 5 files changed, 83 insertions(+), 151 deletions(-) create mode 100644 python/lib/config.py diff --git a/python/lib/bidsreader.py b/python/lib/bidsreader.py index f63105186..5a71e30fb 100755 --- a/python/lib/bidsreader.py +++ b/python/lib/bidsreader.py @@ -3,6 +3,7 @@ import json import re import sys +from typing import Any from bids import BIDSLayout @@ -35,16 +36,13 @@ class BidsReader: bids_reader = BidsReader(bids_dir) """ - def __init__(self, bids_dir, verbose, validate = True): + def __init__(self, bids_dir: str, verbose: bool, validate: bool = True): """ Constructor method for the BidsReader class. - :param bids_dir: path to the BIDS structure to read - :type bids_dir: str - :param verbose : boolean to print verbose information - :type verbose : bool + :param bids_dir : path to the BIDS structure to read + :param verbose : boolean to print verbose information :param validate : boolean to validate the BIDS dataset - :type validate : bool """ self.verbose = verbose @@ -73,7 +71,7 @@ def __init__(self, bids_dir, verbose, validate = True): # load BIDS modality information self.cand_session_modalities_list = self.load_modalities_from_bids() - def load_bids_data(self, validate): + def load_bids_data(self, validate: bool): """ Loads the BIDS study using the BIDSLayout function (part of the pybids package) and return the object. @@ -114,13 +112,12 @@ def load_bids_data(self, validate): return bids_layout - def load_candidates_from_bids(self): + def load_candidates_from_bids(self) -> list[dict[str, Any]]: """ Loads the list of candidates from the BIDS study. List of participants and their information will be stored in participants_info. :return: list of dictionaries with participant information from BIDS - :rtype: list """ if self.verbose: @@ -149,7 +146,7 @@ def load_candidates_from_bids(self): return participants_info - def candidates_list_validation(self, participants_info): + def candidates_list_validation(self, participants_info: list[dict[str, Any]]): """ Validates whether the subjects listed in participants.tsv match the list of participant directory. If there is a mismatch, will exit with @@ -186,14 +183,13 @@ def candidates_list_validation(self, participants_info): if self.verbose: print('\t=> Passed validation of the list of participants\n') - def load_sessions_from_bids(self): + def load_sessions_from_bids(self) -> dict[str, list[str]]: """ Grep the list of sessions for each candidate directly from the BIDS structure. :return: dictionary with the list of sessions and candidates found in the BIDS structure - :rtype: dict """ if self.verbose: @@ -216,13 +212,12 @@ def load_sessions_from_bids(self): return cand_sessions - def load_modalities_from_bids(self): + def load_modalities_from_bids(self) -> list[dict[str, Any]]: """ Grep the list of modalities available for each session and candidate directly from the BIDS structure. :return: dictionary for candidate and session with list of modalities - :rtype: dict """ if self.verbose: @@ -253,20 +248,16 @@ def load_modalities_from_bids(self): return cand_session_modalities_list @staticmethod - def grep_file(files_list, match_pattern, derivative_pattern=None): + def grep_file(files_list: list[str], match_pattern: str, derivative_pattern: str | None = None) -> str | None: """ Grep a unique file based on a match pattern and returns it. - :param files_list : list of files to look into - :type files_list : list - :param match_pattern : pattern to use to find the file - :type match_pattern : str - :param derivative_pattern: derivative pattern to use if the file we look for - is a derivative file - :type derivative_pattern: str + :param files_list : list of files to look into + :param match_pattern : pattern to use to find the file + :param derivative_pattern : derivative pattern to use if the file we look for + is a derivative file :return: name of the first file that matches the pattern - :rtype: str """ for filename in files_list: diff --git a/python/lib/config.py b/python/lib/config.py new file mode 100644 index 000000000..e69de29bb diff --git a/python/lib/eeg.py b/python/lib/eeg.py index 0bc2b57fb..10ad3b25b 100755 --- a/python/lib/eeg.py +++ b/python/lib/eeg.py @@ -4,10 +4,13 @@ import json import os import sys +from typing import Any, Literal import lib.exitcode import lib.utilities as utilities +from lib.bidsreader import BidsReader from lib.candidate import Candidate +from lib.database import Database from lib.database_lib.config import Config from lib.database_lib.physiological_event_archive import PhysiologicalEventArchive from lib.database_lib.physiological_event_file import PhysiologicalEventFile @@ -76,37 +79,28 @@ class Eeg: db.disconnect() """ - def __init__(self, bids_reader, bids_sub_id, bids_ses_id, bids_modality, db, - verbose, data_dir, default_visit_label, loris_bids_eeg_rel_dir, - loris_bids_root_dir, dataset_tag_dict, dataset_type): + def __init__( + self, bids_reader: BidsReader, bids_sub_id: str, bids_ses_id: str | None, bids_modality: str, db: Database, + verbose: bool, data_dir: str, default_visit_label: str, loris_bids_eeg_rel_dir: str, + loris_bids_root_dir: str | None, dataset_tag_dict: dict[Any, Any], + dataset_type: Literal['raw', 'derivative'] | None, + ): """ Constructor method for the Eeg class. - :param bids_reader : dictionary with BIDS reader information - :type bids_reader : dict + :param bids_reader : The BIDS reader object :param bids_sub_id : BIDS subject ID (that will be used as PSCID) - :type bids_sub_id : str :param bids_ses_id : BIDS session ID (that will be used for the visit label) - :type bids_ses_id : str :param bids_modality: BIDS modality (a.k.a. EEG) - :tyoe bids_modality: str :param db : Database class object - :type db : object :param verbose : whether to be verbose - :type verbose : bool :param data_dir : LORIS data directory path (usually /data/PROJECT/data) - :type data_dir : str :param default_visit_label : default visit label to be used if no BIDS session are present in the BIDS structure - :type default_visit_label : str :param loris_bids_eeg_rel_dir: LORIS BIDS EEG relative dir path to data_dir - :type loris_bids_eeg_rel_dir: str :param loris_bids_root_dir : LORIS BIDS root directory path - :type loris_bids_root_dir : str :param dataset_tag_dict : Dict of dataset-inherited HED tags - :type dataset_tag_dict : dict :param dataset_type : raw | derivative. Type of the dataset - :type dataset_type : string """ # config diff --git a/python/lib/mri.py b/python/lib/mri.py index f7e8a5acd..29c47cc47 100644 --- a/python/lib/mri.py +++ b/python/lib/mri.py @@ -6,9 +6,13 @@ import re import sys +from bids.layout import BIDSFile + import lib.exitcode import lib.utilities as utilities +from lib.bidsreader import BidsReader from lib.candidate import Candidate +from lib.database import Database from lib.imaging import Imaging from lib.scanstsv import ScansTSV from lib.session import Session @@ -70,10 +74,11 @@ class Mri: db.disconnect() """ - def __init__(self, bids_reader, bids_sub_id, bids_ses_id, bids_modality, db, - verbose, data_dir, default_visit_label, - loris_bids_mri_rel_dir, loris_bids_root_dir): - + def __init__( + self, bids_reader: BidsReader, bids_sub_id: str, bids_ses_id: str | None, bids_modality: str, db: Database, + verbose: bool, data_dir: str, default_visit_label: str, loris_bids_mri_rel_dir: str, + loris_bids_root_dir : str | None, + ): # enumerate the different suffixes supported by BIDS per modality type self.possible_suffix_per_modality = { 'anat' : [ @@ -190,12 +195,11 @@ def get_loris_session_id(self): return loris_vl_info['ID'] - def grep_nifti_files(self): + def grep_nifti_files(self) -> list[BIDSFile]: """ Returns the list of NIfTI files found for the modality. :return: list of NIfTI files found for the modality - :rtype: list """ # grep all the possible suffixes for the modality @@ -209,18 +213,15 @@ def grep_nifti_files(self): # return the list of found NIfTI files return nii_files_list - def grep_bids_files(self, bids_type, extension): + def grep_bids_files(self, bids_type: str, extension: str) -> list[BIDSFile]: """ Greps the BIDS files and their layout information from the BIDSLayout and return that list. :param bids_type: the BIDS type to use to grep files (T1w, T2w, bold, dwi...) - :type bids_type: str :param extension: extension of the file to look for (nii.gz, json...) - :type extension: str :return: list of files from the BIDS layout - :rtype: list """ if self.bids_ses_id: @@ -239,25 +240,23 @@ def grep_bids_files(self, bids_type, extension): suffix = bids_type ) - def register_raw_file(self, nifti_file): + def register_raw_file(self, nifti_file: BIDSFile): """ Registers raw MRI files and related files into the files and parameter_file tables. :param nifti_file: NIfTI file object - :type nifti_file: pybids NIfTI file object """ # insert the NIfTI file self.fetch_and_insert_nifti_file(nifti_file) - def fetch_and_insert_nifti_file(self, nifti_file, derivatives=None): + def fetch_and_insert_nifti_file(self, nifti_file: BIDSFile, derivatives=None): """ Gather NIfTI file information to insert into the files and parameter_file tables. Once all the information has been gathered, it will call imaging.insert_imaging_file that will perform the insertion into the files and parameter_file tables. :param nifti_file : NIfTI file object - :type nifti_file : pybids NIfTI file object :param derivatives: whether the file to be registered is a derivative file :type derivatives: bool diff --git a/python/scripts/bids_import.py b/python/scripts/bids_import.py index f43a46fe2..5b4d0d254 100755 --- a/python/scripts/bids_import.py +++ b/python/scripts/bids_import.py @@ -7,6 +7,7 @@ import os import re import sys +from typing import Any, Literal import lib.exitcode import lib.physiological @@ -101,7 +102,7 @@ def main(): print(usage) sys.exit(lib.exitcode.MISSING_ARG) - if type and type not in ('raw', 'derivative'): + if type not in (None, 'raw', 'derivative'): print("--type must be one of 'raw', 'derivative'") print(usage) sys.exit(lib.exitcode.MISSING_ARG) @@ -130,21 +131,17 @@ def main(): ) -def input_error_checking(profile, bids_dir, usage): +def input_error_checking(profile: str, bids_dir: str, usage: str) -> Any: """ Checks whether the required inputs are set and that paths are valid. If the path to the config_file file valid, then it will import the file as a module so the database connection information can be used to connect. :param profile : path to the profile file with MySQL credentials - :type profile : str :param bids_dir: path to the BIDS directory to parse and insert into LORIS - :type bids_dir: str :param usage : script usage to be displayed when encountering an error - :type usage : st :return: config_file module with database credentials (config_file.mysql) - :rtype: module """ if not profile: @@ -182,33 +179,22 @@ def input_error_checking(profile, bids_dir, usage): def read_and_insert_bids( - bids_dir, data_dir, verbose, createcand, createvisit, - idsvalidation, nobidsvalidation, type, nocopy, db + bids_dir: str, data_dir: str, verbose: bool, createcand: bool, createvisit: bool, + idsvalidation: bool, nobidsvalidation: bool, type: Literal['raw', 'derivative'] | None, nocopy: bool, db: Database, ): """ Read the provided BIDS structure and import it into the database. :param bids_dir : path to the BIDS directory - :type bids_dir : str :param data_dir : data_dir config value - :type data_dir : string :param verbose : flag for more printing if set - :type verbose : bool :param createcand : allow database candidate creation if it did not exist already - :type createcand : bool :param createvisit : allow database visit creation if it did not exist already - :type createvisit : bool :param idsvalidation : allow pscid/candid validation in the BIDS directory name - :type idsvalidation : bool :param nobidsvalidation : disable bids dataset validation - :type nobidsvalidation : bool - :param type : raw | derivative. Type of the dataset - :type type : string + :param type : Type of the dataset :param nocopy : disable bids dataset copy in assembly_bids - :type nocopy : bool :param db : db object - :type db : object - """ # grep config settings from the Config module @@ -377,16 +363,13 @@ def read_and_insert_bids( db.disconnect() -def validateids(bids_dir, db, verbose): +def validateids(bids_dir: str, db: Database, verbose: bool): """ Validate that pscid and candid matches :param bids_dir : path to the BIDS directory - :type bids_dir : str :param db : database handler object - :type db : object - :param verbose : flag for more printing if set - :type verbose : bool + :param verbose : flag for more printing if set """ bids_folder = bids_dir.rstrip('/').split('/')[-1] @@ -405,21 +388,17 @@ def validateids(bids_dir, db, verbose): sys.exit(lib.exitcode.CANDIDATE_MISMATCH) -def create_loris_bids_directory(bids_reader, data_dir, verbose): +def create_loris_bids_directory(bids_reader: BidsReader, data_dir: str, verbose: bool) -> str: """ Creates the LORIS BIDS import root directory (with name and BIDS version) and copy over the dataset_description.json, README and participants.tsv files. - :param bids_reader: BIDS information handler object - :type bids_reader: object - :param data_dir : path of the LORIS data directory - :type data_dir : str - :param verbose : if true, prints out information while executing - :type verbose : bool + :param bids_reader : BIDS information handler object + :param data_dir : path of the LORIS data directory + :param verbose : if true, prints out information while executing :return: path to the LORIS BIDS import root directory - :rtype: str """ # making sure that there is a final / in bids_dir @@ -470,26 +449,21 @@ def create_loris_bids_directory(bids_reader, data_dir, verbose): return loris_bids_dirname -def grep_or_create_candidate_db_info(bids_reader, bids_id, db, createcand, verbose): +def grep_or_create_candidate_db_info( + bids_reader: BidsReader, bids_id: str, db: Database, createcand: bool, verbose: bool +) -> dict[str, Any]: """ Greps (or creates if candidate does not exist and createcand is true) the BIDS candidate in the LORIS candidate's table and return a list of candidates with their related fields from the database. :param bids_reader : BIDS information handler object - :type bids_reader : object :param bids_id : bids_id to be used (CandID or PSCID) - :type bids_id : str :param db : database handler object - :type db : object :param createcand : if true, creates the candidate in LORIS - :type createcand : bool :param verbose : if true, prints out information while executing - :type verbose : bool - :return: list of candidate's dictionaries. One entry in the list holds - a dictionary with field's values from the candidate table - :rtype: list + :return: The dictionary of the candidate database record """ candidate = Candidate(verbose=verbose, cand_id=bids_id) @@ -515,39 +489,27 @@ def grep_or_create_candidate_db_info(bids_reader, bids_id, db, createcand, verbo def grep_or_create_session_db_info( - bids_id, cand_id, visit_label, - db, createvisit, verbose, loris_bids_dir, - center_id, project_id, cohort_id, nocopy): + bids_id: str, cand_id: int, visit_label: str, db: Database, createvisit: bool, verbose: bool, + loris_bids_dir: str, center_id: int, project_id: int, cohort_id: int, nocopy: bool +) -> dict[str, Any]: """ Greps (or creates if session does not exist and createvisit is true) the BIDS session in the LORIS session's table and return a list of sessions with their related fields from the database. - :parma bids_id : BIDS ID of the session - :type bids_id : str - :param cand_id : CandID to use to create the session - :type cand_id : int - :param visit_label : Visit label to use to create the session - :type visit_label : str - :param db : database handler object - :type db : object - :param createvisit : if true, creates the session in LORIS - :type createvisit : bool - :param verbose : if true, prints out information while executing - :type verbose : bool - :param loris_bids_dir: LORIS BIDS import root directory to copy data - :type loris_bids_dir: str - :param center_id : CenterID to use to create the session - :type center_id : int - :param project_id : ProjectID to use to create the session - :type project_id : int - :param cohort_id : CohortID to use to create the session - :type cohort_id : int - :param nocopy : if true, skip the assembly_bids dataset copy - :type nocopy : bool + :parma bids_id : BIDS ID of the session + :param cand_id : CandID to use to create the session + :param visit_label : Visit label to use to create the session + :param db : database handler object + :param createvisit : if true, creates the session in LORIS + :param verbose : if true, prints out information while executing + :param loris_bids_dir : LORIS BIDS import root directory to copy data + :param center_id : CenterID to use to create the session + :param project_id : ProjectID to use to create the session + :param cohort_id : CohortID to use to create the session + :param nocopy : if true, skip the assembly_bids dataset copy :return: session information grepped from LORIS for cand_id and visit_label - :rtype: dict """ session = Session(db, verbose, cand_id, visit_label, center_id, project_id, cohort_id) @@ -567,44 +529,30 @@ def grep_or_create_session_db_info( return loris_vl_info -def grep_candidate_sessions_info(bids_ses, bids_id, cand_id, loris_bids_dir, - createvisit, verbose, db, default_vl, - center_id, project_id, cohort_id, nocopy): +def grep_candidate_sessions_info( + bids_ses: list[str], bids_id: str, cand_id: int, loris_bids_dir: str, createvisit: bool, verbose: bool, + db: Database, default_vl: str, center_id: int, project_id: int, cohort_id: int, nocopy: bool, +) -> list[dict[str, Any]]: """ Greps all session info dictionaries for a given candidate and aggregates them into a list, with one entry per session. If the session does not exist in LORIS and that createvisit is true, it will create the session first. - :param bids_ses : list of BIDS sessions to grep info or insert - :type bids_ses : list - :param bids_id : BIDS ID of the candidate - :type bids_id : str - :param cand_id : candidate's CandID - :type cand_id : int - :param loris_bids_dir: LORIS BIDS import root directory to copy data - :type loris_bids_dir: str - :param createvisit : if true, creates the visits in LORIS - :type createvisit : bool - :param verbose : if true, prints out information while executing - :type verbose : bool - :param db : database handler object - :type db : object - :param default_vl : default visit label from the Config module - :type default_vl : str - :param center_id : center ID associated to the candidate and visit - :type center_id : int - :param project_id : project ID associated to the candidate and visit - :type project_id : int - :param cohort_id : cohort ID associated to the candidate and visit - :type cohort_id : int - :param nocopy : if true, skip the assembly_bids dataset copy - :type nocopy : bool - - + :param bids_ses : list of BIDS sessions to grep info or insert + :param bids_id : BIDS ID of the candidate + :param cand_id : candidate's CandID + :param loris_bids_dir : LORIS BIDS import root directory to copy data + :param createvisit : if true, creates the visits in LORIS + :param verbose : if true, prints out information while executing + :param db : database handler object + :param default_vl : default visit label from the Config module + :param center_id : center ID associated to the candidate and visit + :param project_id : project ID associated to the candidate and visit + :param cohort_id : cohort ID associated to the candidate and visit + :param nocopy : if true, skip the assembly_bids dataset copy :return: list of all session's dictionaries for a given candidate - :rtype: list """ loris_sessions_info = [] From 109dce47b189d6be94d08d00761cfd14879d5619 Mon Sep 17 00:00:00 2001 From: Maxime Mulder Date: Mon, 2 Dec 2024 19:50:15 -0500 Subject: [PATCH 2/8] bids session dataclass --- python/lib/bidsreader.py | 37 +++++++++++++++++++++++------------ python/scripts/bids_import.py | 33 ++++++++++++++++++++----------- 2 files changed, 47 insertions(+), 23 deletions(-) diff --git a/python/lib/bidsreader.py b/python/lib/bidsreader.py index 5a71e30fb..247efc643 100755 --- a/python/lib/bidsreader.py +++ b/python/lib/bidsreader.py @@ -3,6 +3,7 @@ import json import re import sys +from dataclasses import dataclass from typing import Any from bids import BIDSLayout @@ -22,6 +23,18 @@ __license__ = "GPLv3" +@dataclass +class BidsSessionInfo: + """ + Information about a BIDS session, that is, the label of the subject and the session, and the + modalities of this session. + """ + + subject_label: str + session_label: str | None + modalities: list[str] + + class BidsReader: """ This class reads a BIDS structure into a data dictionary using BIDS grabbids. @@ -212,7 +225,7 @@ def load_sessions_from_bids(self) -> dict[str, list[str]]: return cand_sessions - def load_modalities_from_bids(self) -> list[dict[str, Any]]: + def load_modalities_from_bids(self) -> list[BidsSessionInfo]: """ Grep the list of modalities available for each session and candidate directly from the BIDS structure. @@ -223,24 +236,24 @@ def load_modalities_from_bids(self) -> list[dict[str, Any]]: if self.verbose: print('Grepping the different modalities from the BIDS layout...') - cand_session_modalities_list = [] + cand_session_modalities_list: list[BidsSessionInfo] = [] for subject, visit_list in self.cand_sessions_list.items(): if visit_list: for visit in visit_list: modalities = self.bids_layout.get_datatype(subject=subject, session=visit) - cand_session_modalities_list.append({ - 'bids_sub_id': subject, - 'bids_ses_id': visit, - 'modalities' : modalities - }) + cand_session_modalities_list.append(BidsSessionInfo( + subject_label = subject, + session_label = visit, + modalities = modalities, + )) else: modalities = self.bids_layout.get_datatype(subject=subject) - cand_session_modalities_list.append({ - 'bids_sub_id': subject, - 'bids_ses_id': None, - 'modalities' : modalities - }) + cand_session_modalities_list.append(BidsSessionInfo( + subject_label = subject, + session_label = None, + modalities = modalities, + )) if self.verbose: print('\t=> Done grepping the different modalities from the BIDS layout\n') diff --git a/python/scripts/bids_import.py b/python/scripts/bids_import.py index 5b4d0d254..50859e1a4 100755 --- a/python/scripts/bids_import.py +++ b/python/scripts/bids_import.py @@ -26,6 +26,11 @@ sys.path.append('/home/user/python') +bids_eeg_modalities = ['eeg', 'ieeg'] + +bids_mri_modalities = ['anat', 'dwi', 'fmap', 'func'] + + # to limit the traceback when raising exceptions. # sys.tracebacklimit = 0 @@ -319,21 +324,27 @@ def read_and_insert_bids( ) # read list of modalities per session / candidate and register data - for row in bids_reader.cand_session_modalities_list: - bids_session = row['bids_ses_id'] - visit_label = bids_session if bids_session else default_bids_vl - loris_bids_visit_rel_dir = 'sub-' + row['bids_sub_id'] + '/' + 'ses-' + visit_label + for bids_sub_dir_info in bids_reader.cand_session_modalities_list: + if bids_sub_dir_info.session_label is not None: + visit_label = bids_sub_dir_info.session_label + else: + visit_label = default_bids_vl + + loris_bids_visit_rel_dir = os.path.join( + f'sub-{bids_sub_dir_info.subject_label}', + f'ses-{visit_label}', + ) - for modality in row['modalities']: + for modality in bids_sub_dir_info.modalities: loris_bids_modality_rel_dir = loris_bids_visit_rel_dir + '/' + modality + '/' if not nocopy: lib.utilities.create_dir(loris_bids_root_dir + loris_bids_modality_rel_dir, verbose) - if modality == 'eeg' or modality == 'ieeg': + if modality in bids_eeg_modalities: Eeg( bids_reader = bids_reader, - bids_sub_id = row['bids_sub_id'], - bids_ses_id = row['bids_ses_id'], + bids_sub_id = bids_sub_dir_info.subject_label, + bids_ses_id = bids_sub_dir_info.session_label, bids_modality = modality, db = db, verbose = verbose, @@ -345,11 +356,11 @@ def read_and_insert_bids( dataset_type = type ) - elif modality in ['anat', 'dwi', 'fmap', 'func']: + elif modality in bids_mri_modalities: Mri( bids_reader = bids_reader, - bids_sub_id = row['bids_sub_id'], - bids_ses_id = row['bids_ses_id'], + bids_sub_id = bids_sub_dir_info.subject_label, + bids_ses_id = bids_sub_dir_info.session_label, bids_modality = modality, db = db, verbose = verbose, From 7b6391458f2d893bc4d35890c34578ec16a29f0c Mon Sep 17 00:00:00 2001 From: Maxime Mulder Date: Fri, 27 Dec 2024 09:57:54 -0500 Subject: [PATCH 3/8] add typed utilities function --- pyproject.toml | 1 + python/lib/util.py | 44 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 45 insertions(+) create mode 100644 python/lib/util.py diff --git a/pyproject.toml b/pyproject.toml index 6b5a79fa3..47c9a2d20 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -23,6 +23,7 @@ include = [ "python/lib/get_subject_session.py", "python/lib/logging.py", "python/lib/make_env.py", + "python/lib/util.py", "python/lib/validate_subject_info.py", ] typeCheckingMode = "strict" diff --git a/python/lib/util.py b/python/lib/util.py new file mode 100644 index 000000000..0c176f66a --- /dev/null +++ b/python/lib/util.py @@ -0,0 +1,44 @@ +from collections.abc import Callable, Iterable, Iterator +from typing import TypeVar + +T = TypeVar('T') + + +def find(predicate: Callable[[T], bool], iterable: Iterable[T]) -> T | None: + """ + Find the first element in an iterable that satisfies a predicate, or return `None` if no match + is found. + """ + + for item in iterable: + if predicate(item): + return item + + return None + + +T = TypeVar('T') # type: ignore +U = TypeVar('U') + + +def filter_map(function: Callable[[T], U | None], iterable: Iterable[T]) -> Iterator[U]: + """ + Apply a function to each element of an iterator and yields the results that are not `None`. + """ + + for item in iterable: + result = function(item) + if result is not None: + yield result + + +def try_parse_int(value: str) -> int | None: + """ + Parse a string into an integer (base 10), or return `None` if the string does not correspond + to an integer. + """ + + try: + return int(value) + except ValueError: + return None From a668c3b5032d575b739ea719945dcf624aba5c51 Mon Sep 17 00:00:00 2001 From: Maxime Mulder Date: Mon, 2 Dec 2024 20:06:07 -0500 Subject: [PATCH 4/8] bids participants dataclass --- pyproject.toml | 1 + python/lib/bids/participant.py | 74 ++++++++++++++++++++++++++++++++++ python/lib/bidsreader.py | 50 +++++++++-------------- python/lib/candidate.py | 51 ++++++++--------------- python/lib/eeg.py | 8 ++-- python/lib/mri.py | 8 ++-- python/scripts/bids_import.py | 12 +++--- 7 files changed, 126 insertions(+), 78 deletions(-) create mode 100644 python/lib/bids/participant.py diff --git a/pyproject.toml b/pyproject.toml index 47c9a2d20..74148566f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -15,6 +15,7 @@ select = ["E", "F", "I", "N", "UP", "W"] [tool.pyright] include = [ "python/tests", + "python/lib/bids", "python/lib/db", "python/lib/exception", "python/lib/config_file.py", diff --git a/python/lib/bids/participant.py b/python/lib/bids/participant.py new file mode 100644 index 000000000..62439ad5a --- /dev/null +++ b/python/lib/bids/participant.py @@ -0,0 +1,74 @@ +from dataclasses import dataclass + +import dateutil.parser +from bids import BIDSLayout + +import lib.utilities as utilities + + +@dataclass +class BidsParticipant: + """ + Information about a BIDS participant represented in an entry in the `participants.tsv` file of + a BIDS dataset. + """ + + id: str + birth_date: str | None = None + sex: str | None = None + age: str | None = None + site: str | None = None + cohort: str | None = None + project: str | None = None + # FIXME: Both "cohort" and "subproject" are used in scripts, this may be a bug. + subproject: str | None = None + + +def read_bids_participants_file(bids_layout: BIDSLayout) -> list[BidsParticipant] | None: + """ + Find, read and parse the `participants.tsv` file of a BIDS dataset. Return the BIDS participant + entries if a file is found, or `None` otherwise. + """ + + # Find the `participants.tsv` file in the BIDS dataset. + bids_participants_file_path = None + for bids_file_path in bids_layout.get(suffix='participants', return_type='filename'): # type: ignore + if 'participants.tsv' in bids_file_path: + bids_participants_file_path = bids_file_path # type: ignore + break + + # If no `participants.tsv` file is found, return `None`. + if bids_participants_file_path is None: + return None + + # Parse the BIDS participant entries from the `participants.tsv` file. + bids_participant_rows = utilities.read_tsv_file(bids_participants_file_path) # type: ignore + return list(map(read_bids_participant_row, bids_participant_rows)) # type: ignore + + +def read_bids_participant_row(row: dict[str, str]) -> BidsParticipant: + """ + Get a BIDS participant entry from a parsed TSV line of a `participants.tsv` file. + """ + + # Get the participant ID by removing the `sub-` prefix if it is present. + participant_id = row['participant_id'].replace('sub-', '') + + # Get the participant date of birth from one of the possible date of birth fields. + birth_date = None + for birth_date_name in ['date_of_birth', 'birth_date', 'dob']: + if birth_date_name in row: + birth_date = dateutil.parser.parse(row[birth_date_name]).strftime('%Y-%m-%d') + break + + # Create the BIDS participant object. + return BidsParticipant( + id = participant_id, + birth_date = birth_date, + sex = row.get('sex'), + age = row.get('age'), + site = row.get('site'), + cohort = row.get('cohort'), + project = row.get('project'), + subproject = row.get('subproject'), + ) diff --git a/python/lib/bidsreader.py b/python/lib/bidsreader.py index 247efc643..5156b3509 100755 --- a/python/lib/bidsreader.py +++ b/python/lib/bidsreader.py @@ -4,12 +4,11 @@ import re import sys from dataclasses import dataclass -from typing import Any from bids import BIDSLayout import lib.exitcode -import lib.utilities as utilities +from lib.bids.participant import BidsParticipant, read_bids_participants_file # import bids # BIDSLayoutIndexer is required for PyBIDS >= 0.12.1 @@ -76,7 +75,7 @@ def __init__(self, bids_dir: str, verbose: bool, validate: bool = True): print("WARNING: Cannot read dataset_description.json") # load BIDS candidates information - self.participants_info = self.load_candidates_from_bids() + self.bids_participants = self.load_candidates_from_bids() # load BIDS sessions information self.cand_sessions_list = self.load_sessions_from_bids() @@ -125,10 +124,10 @@ def load_bids_data(self, validate: bool): return bids_layout - def load_candidates_from_bids(self) -> list[dict[str, Any]]: + def load_candidates_from_bids(self) -> list[BidsParticipant]: """ Loads the list of candidates from the BIDS study. List of - participants and their information will be stored in participants_info. + participants and their information will be stored in bids_participants. :return: list of dictionaries with participant information from BIDS """ @@ -136,30 +135,23 @@ def load_candidates_from_bids(self) -> list[dict[str, Any]]: if self.verbose: print('Grepping candidates from the BIDS layout...') - # grep the participant.tsv file and parse it - participants_info = None - for file in self.bids_layout.get(suffix='participants', return_type='filename'): - # note file[0] returns the path to participants.tsv - if 'participants.tsv' in file: - participants_info = utilities.read_tsv_file(file) - else: - continue + bids_participants = read_bids_participants_file(self.bids_layout) - if participants_info: - self.candidates_list_validation(participants_info) + if bids_participants: + self.candidates_list_validation(bids_participants) else: bids_subjects = self.bids_layout.get_subjects() - participants_info = [{'participant_id': sub_id} for sub_id in bids_subjects] + bids_participants = [BidsParticipant(sub_id) for sub_id in bids_subjects] if self.verbose: print('\t=> List of participants found:') - for participant in participants_info: - print('\t\t' + participant['participant_id']) + for bids_participant in bids_participants: + print('\t\t' + bids_participant.id) print('\n') - return participants_info + return bids_participants - def candidates_list_validation(self, participants_info: list[dict[str, Any]]): + def candidates_list_validation(self, bids_participants: list[BidsParticipant]): """ Validates whether the subjects listed in participants.tsv match the list of participant directory. If there is a mismatch, will exit with @@ -175,18 +167,16 @@ def candidates_list_validation(self, participants_info: list[dict[str, Any]]): "participants.tsv and raw data found in the BIDS " "directory") - # check that all subjects listed in participants_info are also in + # check that all subjects listed in bids_participants are also in # subjects array and vice versa - for row in participants_info: - # remove the "sub-" in front of the subject ID if present - row['participant_id'] = row['participant_id'].replace('sub-', '') - if row['participant_id'] not in subjects: + for bids_participant in bids_participants: + if bids_participant.id not in subjects: print(mismatch_message) - print(row['participant_id'] + 'is missing from the BIDS Layout') + print(bids_participant.id + 'is missing from the BIDS Layout') print('List of subjects parsed by the BIDS layout: ' + ', '.join(subjects)) sys.exit(lib.exitcode.BIDS_CANDIDATE_MISMATCH) # remove the subject from the list of subjects - subjects.remove(row['participant_id']) + subjects.remove(bids_participant.id) # check that no subjects are left in subjects array if subjects: @@ -210,9 +200,9 @@ def load_sessions_from_bids(self) -> dict[str, list[str]]: cand_sessions = {} - for row in self.participants_info: - ses = self.bids_layout.get_sessions(subject=row['participant_id']) - cand_sessions[row['participant_id']] = ses + for bids_participant in self.bids_participants: + ses = self.bids_layout.get_sessions(subject=bids_participant.id) + cand_sessions[bids_participant.id] = ses if self.verbose: print('\t=> List of sessions found:\n') diff --git a/python/lib/candidate.py b/python/lib/candidate.py index 370433815..f3bdf53b8 100644 --- a/python/lib/candidate.py +++ b/python/lib/candidate.py @@ -3,9 +3,8 @@ import random import sys -from dateutil.parser import parse - import lib.exitcode +from lib.bids.participant import BidsParticipant __license__ = "GPLv3" @@ -59,16 +58,15 @@ def __init__(self, verbose, psc_id=None, cand_id=None, sex=None, dob=None): self.center_id = None self.project_id = None - def create_candidate(self, db, participants_info): + def create_candidate(self, db, bids_participants: list[BidsParticipant]): """ Creates a candidate using BIDS information provided in the - participants_info's list. + bids_participants's list. :param db : database handler object :type db : object - :param participants_info: list of dictionary with participants + :param bids_participants: list of dictionary with participants information from BIDS - :type participants_info: list :return: dictionary with candidate info from the candidate's table :rtype: dict @@ -81,25 +79,26 @@ def create_candidate(self, db, participants_info): if not self.cand_id: self.cand_id = self.generate_cand_id(db) - for row in participants_info: - if not row['participant_id'] == self.psc_id: + for bids_participant in bids_participants: + if bids_participant.id != self.psc_id: continue - self.grep_bids_dob(row) - if 'sex' in row: - self.map_sex(row['sex']) - if 'age' in row: - self.age = row['age'] + + self.dob = bids_participant.date_of_birth + if bids_participant.sex is not None: + self.map_sex(bids_participant.sex) + if bids_participant.age is not None: + self.age = bids_participant.age # three steps to find site: # 1. try matching full name from 'site' column in participants.tsv in db # 2. try extracting alias from pscid # 3. try finding previous site in candidate table - if 'site' in row and row['site'].lower() not in ("null", ""): + if bids_participant.site is not None and bids_participant.site.lower() not in ('', 'null'): # search site id in psc table by its full name site_info = db.pselect( "SELECT CenterID FROM psc WHERE Name = %s", - [row['site'], ] + [bids_participant.site, ] ) if len(site_info) > 0: self.center_id = site_info[0]['CenterID'] @@ -108,7 +107,7 @@ def create_candidate(self, db, participants_info): # search site id in psc table by its alias extracted from pscid db_sites = db.pselect("SELECT CenterID, Alias FROM psc") for site in db_sites: - if site['Alias'] in row['participant_id']: + if site['Alias'] in bids_participant.id: self.center_id = site['CenterID'] if self.center_id is None: @@ -124,11 +123,11 @@ def create_candidate(self, db, participants_info): # 1. find full name in 'project' column in participants.tsv # 2. find previous in candidate table - if 'project' in row and row['project'].lower() not in ("null", ""): + if bids_participant.project is not None and bids_participant.project.lower() not in ('', 'null'): # search project id in Project table by its full name project_info = db.pselect( "SELECT ProjectID FROM Project WHERE Name = %s", - [row['project'], ] + [bids_participant.project, ] ) if len(project_info) > 0: self.project_id = project_info[0]['ProjectID'] @@ -220,22 +219,6 @@ def map_sex(self, sex): if sex.lower() in ('f', 'female'): self.sex = 'Female' - def grep_bids_dob(self, subject_info): - """ - Greps the date of birth from the BIDS structure and add it to self.dob which - will be inserted into the DoB field of the candidate table - - :param subject_info: dictionary with all information present in the BIDS - participants.tsv file for a given candidate - :type subject_info: dict - """ - - dob_names = ['date_of_birth', 'birth_date', 'dob'] - for name in dob_names: - if name in subject_info: - dob = parse(subject_info[name]) - self.dob = dob.strftime('%Y-%m-%d') - @staticmethod def generate_cand_id(db): """ diff --git a/python/lib/eeg.py b/python/lib/eeg.py index 10ad3b25b..d1f4235fa 100755 --- a/python/lib/eeg.py +++ b/python/lib/eeg.py @@ -140,13 +140,13 @@ def __init__( self.hed_union = self.db.pselect(query=hed_query, args=()) self.cohort_id = None - for row in bids_reader.participants_info: - if not row['participant_id'] == self.psc_id: + for bids_participant in bids_reader.bids_participants: + if bids_participant.id != self.psc_id: continue - if 'cohort' in row: + if bids_participant.cohort is not None: cohort_info = db.pselect( "SELECT CohortID FROM cohort WHERE title = %s", - [row['cohort'], ] + [bids_participant.cohort, ] ) if len(cohort_info) > 0: self.cohort_id = cohort_info[0]['CohortID'] diff --git a/python/lib/mri.py b/python/lib/mri.py index 29c47cc47..3f7ea5d23 100644 --- a/python/lib/mri.py +++ b/python/lib/mri.py @@ -123,13 +123,13 @@ def __init__( self.center_id = self.loris_cand_info['RegistrationCenterID'] self.project_id = self.loris_cand_info['RegistrationProjectID'] self.cohort_id = None - for row in bids_reader.participants_info: - if not row['participant_id'] == self.psc_id: + for bids_participant in bids_reader.bids_participants: + if bids_participant.id != self.psc_id: continue - if 'cohort' in row: + if bids_participant.cohort is not None: cohort_info = db.pselect( "SELECT CohortID FROM cohort WHERE title = %s", - [row['cohort'], ] + [bids_participant.cohort, ] ) if len(cohort_info) > 0: self.cohort_id = cohort_info[0]['CohortID'] diff --git a/python/scripts/bids_import.py b/python/scripts/bids_import.py index 50859e1a4..7ecc20f65 100755 --- a/python/scripts/bids_import.py +++ b/python/scripts/bids_import.py @@ -215,7 +215,7 @@ def read_and_insert_bids( bids_reader = BidsReader(bids_dir, verbose, False) else: bids_reader = BidsReader(bids_dir, verbose) - if not bids_reader.participants_info \ + if not bids_reader.bids_participants \ or not bids_reader.cand_sessions_list \ or not bids_reader.cand_session_modalities_list: message = '\n\tERROR: could not properly parse the following' \ @@ -234,10 +234,10 @@ def read_and_insert_bids( single_project_id = None # loop through subjects - for bids_subject_info in bids_reader.participants_info: + for bids_participant in bids_reader.bids_participants: # greps BIDS information for the candidate - bids_id = bids_subject_info['participant_id'] + bids_id = bids_participant.id bids_sessions = bids_reader.cand_sessions_list[bids_id] # greps BIDS candidate's info from LORIS (creates the candidate if it @@ -257,9 +257,9 @@ def read_and_insert_bids( cohort_id = None # TODO: change subproject -> cohort in participants.tsv? - if 'subproject' in bids_subject_info: + if bids_participant.subproject is not None: # TODO: change subproject -> cohort in participants.tsv? - cohort = bids_subject_info['subproject'] + cohort = bids_participant.subproject cohort_info = db.pselect( "SELECT CohortID FROM cohort WHERE title = %s", [cohort, ] @@ -486,7 +486,7 @@ def grep_or_create_candidate_db_info( if not loris_cand_info and createcand: loris_cand_info = candidate.create_candidate( - db, bids_reader.participants_info + db, bids_reader.bids_participants ) if not loris_cand_info: print("Creating candidate failed. Cannot importing the files.\n") From 8ec002c090e2c35e803f5c4cf609c4b5005e5477 Mon Sep 17 00:00:00 2001 From: Maxime Mulder Date: Tue, 3 Dec 2024 14:20:26 -0500 Subject: [PATCH 5/8] factorize combination iteration --- python/lib/bidsreader.py | 11 +++++ python/scripts/bids_import.py | 76 +++++++++++++++++------------------ 2 files changed, 48 insertions(+), 39 deletions(-) diff --git a/python/lib/bidsreader.py b/python/lib/bidsreader.py index 5156b3509..e927357d1 100755 --- a/python/lib/bidsreader.py +++ b/python/lib/bidsreader.py @@ -3,6 +3,7 @@ import json import re import sys +from collections.abc import Generator from dataclasses import dataclass from bids import BIDSLayout @@ -250,6 +251,16 @@ def load_modalities_from_bids(self) -> list[BidsSessionInfo]: return cand_session_modalities_list + def iter_modality_combinations(self) -> Generator[tuple[str, str | None, str], None, None]: + """ + Iterate over the different subject / session / modality combinations present in the BIDS + dataset. + """ + + for cand_session_modalities in self.cand_session_modalities_list: + for modality in cand_session_modalities.modalities: + yield cand_session_modalities.subject_label, cand_session_modalities.session_label, modality + @staticmethod def grep_file(files_list: list[str], match_pattern: str, derivative_pattern: str | None = None) -> str | None: """ diff --git a/python/scripts/bids_import.py b/python/scripts/bids_import.py index 7ecc20f65..598c78421 100755 --- a/python/scripts/bids_import.py +++ b/python/scripts/bids_import.py @@ -324,51 +324,49 @@ def read_and_insert_bids( ) # read list of modalities per session / candidate and register data - for bids_sub_dir_info in bids_reader.cand_session_modalities_list: - if bids_sub_dir_info.session_label is not None: - visit_label = bids_sub_dir_info.session_label + for subject_label, session_label, modality in bids_reader.iter_modality_combinations(): + if session_label is not None: + visit_label = session_label else: visit_label = default_bids_vl - loris_bids_visit_rel_dir = os.path.join( - f'sub-{bids_sub_dir_info.subject_label}', + loris_bids_modality_rel_dir = os.path.join( + f'sub-{subject_label}', f'ses-{visit_label}', + modality, ) - for modality in bids_sub_dir_info.modalities: - loris_bids_modality_rel_dir = loris_bids_visit_rel_dir + '/' + modality + '/' - if not nocopy: - lib.utilities.create_dir(loris_bids_root_dir + loris_bids_modality_rel_dir, verbose) - - if modality in bids_eeg_modalities: - Eeg( - bids_reader = bids_reader, - bids_sub_id = bids_sub_dir_info.subject_label, - bids_ses_id = bids_sub_dir_info.session_label, - bids_modality = modality, - db = db, - verbose = verbose, - data_dir = data_dir, - default_visit_label = default_bids_vl, - loris_bids_eeg_rel_dir = loris_bids_modality_rel_dir, - loris_bids_root_dir = loris_bids_root_dir, - dataset_tag_dict = dataset_tag_dict, - dataset_type = type - ) - - elif modality in bids_mri_modalities: - Mri( - bids_reader = bids_reader, - bids_sub_id = bids_sub_dir_info.subject_label, - bids_ses_id = bids_sub_dir_info.session_label, - bids_modality = modality, - db = db, - verbose = verbose, - data_dir = data_dir, - default_visit_label = default_bids_vl, - loris_bids_mri_rel_dir = loris_bids_modality_rel_dir, - loris_bids_root_dir = loris_bids_root_dir - ) + if not nocopy: + lib.utilities.create_dir(loris_bids_root_dir + loris_bids_modality_rel_dir, verbose) + + if modality in bids_eeg_modalities: + Eeg( + bids_reader = bids_reader, + bids_sub_id = subject_label, + bids_ses_id = session_label, + bids_modality = modality, + db = db, + verbose = verbose, + data_dir = data_dir, + default_visit_label = default_bids_vl, + loris_bids_eeg_rel_dir = loris_bids_modality_rel_dir, + loris_bids_root_dir = loris_bids_root_dir, + dataset_tag_dict = dataset_tag_dict, + dataset_type = type + ) + elif modality in bids_mri_modalities: + Mri( + bids_reader = bids_reader, + bids_sub_id = subject_label, + bids_ses_id = session_label, + bids_modality = modality, + db = db, + verbose = verbose, + data_dir = data_dir, + default_visit_label = default_bids_vl, + loris_bids_mri_rel_dir = loris_bids_modality_rel_dir, + loris_bids_root_dir = loris_bids_root_dir + ) # disconnect from the database db.disconnect() From d9f344d96c7c7721e061d8d3f5252fdc977936ad Mon Sep 17 00:00:00 2001 From: Maxime Mulder Date: Tue, 3 Dec 2024 15:19:07 -0500 Subject: [PATCH 6/8] fix layout ignore --- python/lib/bidsreader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/lib/bidsreader.py b/python/lib/bidsreader.py index e927357d1..e00ef1bce 100755 --- a/python/lib/bidsreader.py +++ b/python/lib/bidsreader.py @@ -95,7 +95,7 @@ def load_bids_data(self, validate: bool): if self.verbose: print('Loading the BIDS dataset with BIDS layout library...\n') - exclude_arr = ['/code/', '/sourcedata/', '/log/', '.git/'] + exclude_arr = ['code', 'sourcedata', 'log', '.git'] force_arr = [re.compile(r"_annotations\.(tsv|json)$")] # BIDSLayoutIndexer is required for PyBIDS >= 0.12.1 From 8134558c90fd4aabc63a0bcd1f94ee83f1550b77 Mon Sep 17 00:00:00 2001 From: Maxime Mulder Date: Tue, 3 Dec 2024 14:48:28 -0500 Subject: [PATCH 7/8] skip files already inserted --- python/scripts/bids_import.py | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/python/scripts/bids_import.py b/python/scripts/bids_import.py index 598c78421..9e1000196 100755 --- a/python/scripts/bids_import.py +++ b/python/scripts/bids_import.py @@ -9,6 +9,9 @@ import sys from typing import Any, Literal +from bids import BIDSLayout +from bids.layout import BIDSFile + import lib.exitcode import lib.physiological import lib.utilities @@ -323,6 +326,9 @@ def read_and_insert_bids( hed_union=hed_union ) + # TODO: What if `loris_bids_root_dir` is `None` (nocopy) ? + loris_bids = BIDSLayout(loris_bids_root_dir) + # read list of modalities per session / candidate and register data for subject_label, session_label, modality in bids_reader.iter_modality_combinations(): if session_label is not None: @@ -330,6 +336,22 @@ def read_and_insert_bids( else: visit_label = default_bids_vl + loris_bids_modality_files: list[BIDSFile] = loris_bids.get( # type: ignore + subject=subject_label, + session=visit_label, + suffix=modality, + ) + + if loris_bids_modality_files != []: + print( + 'Files already inserted in LORIS, skipping:\n' + f'- Subject: {subject_label}\n' + f'- Session: {session_label}\n' + f'- Modality: {modality}' + ) + + continue + loris_bids_modality_rel_dir = os.path.join( f'sub-{subject_label}', f'ses-{visit_label}', From f27561a7118b743fcf3f830cc4fc4fbcbd959cda Mon Sep 17 00:00:00 2001 From: Maxime Mulder Date: Fri, 27 Dec 2024 05:15:47 -0500 Subject: [PATCH 8/8] wip --- pyproject.toml | 2 +- python/lib/bidsreader.py | 2 +- python/lib/candidate.py | 4 +- python/lib/db/models/cohort.py | 15 + python/lib/db/queries/candidate.py | 12 +- python/lib/db/queries/cohort.py | 16 + python/lib/db/queries/project.py | 19 +- python/lib/db/queries/site.py | 10 + python/lib/import_bids_dataset/database.py | 258 +++++++ .../lib/import_bids_dataset/dataset_files.py | 45 ++ .../participant.py | 55 +- python/lib/session.py | 5 +- python/scripts/bids_import.py | 659 ++++++------------ 13 files changed, 629 insertions(+), 473 deletions(-) create mode 100644 python/lib/db/models/cohort.py create mode 100644 python/lib/db/queries/cohort.py create mode 100644 python/lib/import_bids_dataset/database.py create mode 100644 python/lib/import_bids_dataset/dataset_files.py rename python/lib/{bids => import_bids_dataset}/participant.py (51%) diff --git a/pyproject.toml b/pyproject.toml index 74148566f..42320f8be 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -15,13 +15,13 @@ select = ["E", "F", "I", "N", "UP", "W"] [tool.pyright] include = [ "python/tests", - "python/lib/bids", "python/lib/db", "python/lib/exception", "python/lib/config_file.py", "python/lib/env.py", "python/lib/file_system.py", "python/lib/get_subject_session.py", + "python/lib/import_bids_dataset", "python/lib/logging.py", "python/lib/make_env.py", "python/lib/util.py", diff --git a/python/lib/bidsreader.py b/python/lib/bidsreader.py index e00ef1bce..89221dcf4 100755 --- a/python/lib/bidsreader.py +++ b/python/lib/bidsreader.py @@ -9,7 +9,7 @@ from bids import BIDSLayout import lib.exitcode -from lib.bids.participant import BidsParticipant, read_bids_participants_file +from lib.import_bids_dataset.participant import BidsParticipant, read_bids_participants_file # import bids # BIDSLayoutIndexer is required for PyBIDS >= 0.12.1 diff --git a/python/lib/candidate.py b/python/lib/candidate.py index f3bdf53b8..cc1de62dc 100644 --- a/python/lib/candidate.py +++ b/python/lib/candidate.py @@ -4,7 +4,7 @@ import sys import lib.exitcode -from lib.bids.participant import BidsParticipant +from lib.import_bids_dataset.participant import BidsParticipant __license__ = "GPLv3" @@ -83,7 +83,7 @@ def create_candidate(self, db, bids_participants: list[BidsParticipant]): if bids_participant.id != self.psc_id: continue - self.dob = bids_participant.date_of_birth + self.dob = bids_participant.birth_date if bids_participant.sex is not None: self.map_sex(bids_participant.sex) if bids_participant.age is not None: diff --git a/python/lib/db/models/cohort.py b/python/lib/db/models/cohort.py new file mode 100644 index 000000000..66b323a24 --- /dev/null +++ b/python/lib/db/models/cohort.py @@ -0,0 +1,15 @@ +from typing import Optional + +from sqlalchemy.orm import Mapped, mapped_column + +from lib.db.base import Base + + +class DbCohort(Base): + __tablename__ = 'cohort' + + id : Mapped[int] = mapped_column('CohortID', primary_key=True) + name : Mapped[str] = mapped_column('title') + use_edc : Mapped[Optional[bool]] = mapped_column('useEDC') + window_difference : Mapped[Optional[str]] = mapped_column('WindowDifference') + recruitment_target : Mapped[Optional[int]] = mapped_column('RecruitmentTarget') diff --git a/python/lib/db/queries/candidate.py b/python/lib/db/queries/candidate.py index 27a8bee5c..6f9a77ea4 100644 --- a/python/lib/db/queries/candidate.py +++ b/python/lib/db/queries/candidate.py @@ -6,9 +6,17 @@ def try_get_candidate_with_cand_id(db: Database, cand_id: int): """ - Get a candidate from the database using its CandID, or return `None` if no candidate is - found. + Get a candidate from the database using its CandID, or return `None` if no candidate is found. """ query = select(DbCandidate).where(DbCandidate.cand_id == cand_id) return db.execute(query).scalar_one_or_none() + + +def try_get_candidate_with_psc_id(db: Database, psc_id: str): + """ + Get a candidate from the database using its PSCID, or return `None` if no candidate is found. + """ + + query = select(DbCandidate).where(DbCandidate.psc_id == psc_id) + return db.execute(query).scalar_one_or_none() diff --git a/python/lib/db/queries/cohort.py b/python/lib/db/queries/cohort.py new file mode 100644 index 000000000..35fd599d3 --- /dev/null +++ b/python/lib/db/queries/cohort.py @@ -0,0 +1,16 @@ +from typing import Optional + +from sqlalchemy import select +from sqlalchemy.orm import Session as Database + +from lib.db.models.cohort import DbCohort + + +def try_get_cohort_with_name(db: Database, name: str) -> Optional[DbCohort]: + """ + Try to get a cohort from the database using its name, or return `None` if no cohort is found. + """ + + return db.execute(select(DbCohort) + .where(DbCohort.name == name) + ).scalar_one_or_none() diff --git a/python/lib/db/queries/project.py b/python/lib/db/queries/project.py index deeb04b6c..ac19f802d 100644 --- a/python/lib/db/queries/project.py +++ b/python/lib/db/queries/project.py @@ -1,10 +1,27 @@ +from typing import Optional + from sqlalchemy import select from sqlalchemy.orm import Session as Database +from lib.db.models.project import DbProject from lib.db.models.project_cohort import DbProjectCohort -def try_get_project_cohort_with_project_id_cohort_id(db: Database, project_id: int, cohort_id: int): +def try_get_project_with_name(db: Database, name: str) -> Optional[DbProject]: + """ + Try to get a project from the database using its name, or return `None` if no project is found. + """ + + return db.execute(select(DbProject) + .where(DbProject.name == name) + ).scalar_one_or_none() + + +def try_get_project_cohort_with_project_id_cohort_id( + db: Database, + project_id: int, + cohort_id: int, +) -> Optional[DbProjectCohort]: """ Get a project cohort relation from the database using its project ID and candidate ID, or return `None` if no relation is found. diff --git a/python/lib/db/queries/site.py b/python/lib/db/queries/site.py index 2c6834a73..6a2413ad9 100644 --- a/python/lib/db/queries/site.py +++ b/python/lib/db/queries/site.py @@ -23,6 +23,16 @@ def try_get_site_with_cand_id_visit_label(db: Database, cand_id: int, visit_labe ).scalar_one_or_none() +def try_get_site_with_name(db: Database, name: str) -> Optional[DbSite]: + """ + Get a site from the database using a site name, or return `None` if no site is found. + """ + + return db.execute(select(DbSite) + .where(DbSite.name == name) + ).scalar_one_or_none() + + def get_all_sites(db: Database) -> Sequence[DbSite]: """ Get a sequence of all sites from the database. diff --git a/python/lib/import_bids_dataset/database.py b/python/lib/import_bids_dataset/database.py new file mode 100644 index 000000000..d7e7ed871 --- /dev/null +++ b/python/lib/import_bids_dataset/database.py @@ -0,0 +1,258 @@ +import random +from typing import cast + +from sqlalchemy.orm import Session as Database + +import lib.exitcode +from lib.bidsreader import BidsReader +from lib.db.models.candidate import DbCandidate +from lib.db.models.cohort import DbCohort +from lib.db.models.session import DbSession +from lib.db.queries.candidate import try_get_candidate_with_cand_id, try_get_candidate_with_psc_id +from lib.db.queries.cohort import try_get_cohort_with_name +from lib.db.queries.config import get_config_with_setting_name +from lib.db.queries.project import try_get_project_with_name +from lib.db.queries.session import try_get_session_with_cand_id_visit_label +from lib.db.queries.site import get_all_sites, try_get_site_with_name +from lib.env import Env +from lib.import_bids_dataset.participant import BidsParticipant +from lib.logging import log, log_error_exit +from lib.util import try_parse_int + + +def check_or_create_bids_candidates_and_sessions( + env: Env, + bids_reader: BidsReader, + create_candidate: bool, + create_session: bool, +) -> int: + """ + Check that the candidates and sessions of a BIDS dataset exist in the LORIS database, or create + them using the information of the BIDS dataset if the relevant arguments are passed. Exit the + program with an error if a candidates or session cannot be created. Return the project ID of a + candidate. + """ + + # Since there should awalys be participants, 0 will be overwritten. + single_project_id = 0 + + for bids_participant in bids_reader.bids_participants: + + candidate = check_or_create_bids_candidate(env, bids_participant, create_candidate) + + single_project_id = candidate.registration_project_id + + cohort = None + if bids_participant.cohort is not None: + cohort = try_get_cohort_with_name(env.db, bids_participant.cohort) + + bids_sessions = bids_reader.cand_sessions_list[bids_participant.id] + + if bids_sessions == []: + default_visit_label = cast(str, get_config_with_setting_name(env.db, 'default_bids_vl').value) + check_or_create_bids_session(env, candidate, cohort, default_visit_label, create_session) + else: + for bids_session in bids_sessions: + check_or_create_bids_session(env, candidate, cohort, bids_session, create_session) + + env.db.commit() + + return single_project_id + + +def check_or_create_bids_candidate(env: Env, bids_participant: BidsParticipant, create_candidate: bool) -> DbCandidate: + """ + Check that the candidate of a BIDS `participants.tsv` record exists in the LORIS database, or + create them using the information of that record if the relevant argument is passed. Exit the + program with an error if the candidate cannot be created. + """ + + cand_id = try_parse_int(bids_participant.id) + if cand_id is not None: + candidate = try_get_candidate_with_cand_id(env.db, cand_id) + if candidate is not None: + return candidate + + candidate = try_get_candidate_with_psc_id(env.db, bids_participant.id) + if candidate is not None: + return candidate + + if not create_candidate: + log_error_exit( + env, + f"Candidate '{bids_participant.id}' not found. You can retry with the --createcandidate option.", + lib.exitcode.CANDIDATE_NOT_FOUND, + ) + + return create_bids_candidate(env, bids_participant) + + +def create_bids_candidate(env: Env, bids_participant: BidsParticipant) -> DbCandidate: + """ + Create a candidate using the information of a `participants.tsv` record, or exit the program + with an error if the candidate cannot be created. + """ + + psc_id = bids_participant.id + cand_id = generate_new_cand_id(env.db) + + # TODO: Convert to `Optional[date]` + birth_date = bids_participant.birth_date + + if bids_participant.sex is not None: + # TODO: Check that the sex exists in the database + sex = get_standard_sex_name(bids_participant.sex) + else: + sex = None + + site = None + if bids_participant.site is not None: + site = try_get_site_with_name(env.db, bids_participant.site) + + # If no site was found, try to extract it to match the PSCID with a site alias. + if site is None: + all_sites = get_all_sites(env.db) + for all_site in all_sites: + if all_site.alias in psc_id: + site = all_site + + project = None + if bids_participant.project is not None: + project = try_get_project_with_name(env.db, bids_participant.project) + + if site is None: + log_error_exit( + env, + ( + f"Could not find a site for candidate '{psc_id}'.\n" + "Please check that your psc table contains a site with an" + " alias matching the BIDS participant_id or a name matching the site mentioned in" + " participants.tsv's site column." + ), + lib.exitcode.PROJECT_CUSTOMIZATION_FAILURE, + ) + + if project is None: + log_error_exit( + env, + ( + f"Could not find a project for candidate '{cand_id}'\n." + "Please check that your project table contains a project with a" + " name matching the participants.tsv's project column" + ), + lib.exitcode.PROJECT_CUSTOMIZATION_FAILURE, + ) + + log( + env, + ( + "Creating candidate with:\n" + f"PSCID = {psc_id}\n" + f"CandID = {cand_id}\n" + f"SiteID = {site.id}\n" + f"ProjectID = {project.id}" + ) + ) + + candidate = DbCandidate( + cand_id = cand_id, + psc_id = psc_id, + date_of_birth = birth_date, + sex = sex, + registration_site_id = site.id, + registration_projec_id = project.id, + ) + + env.db.add(candidate) + env.db.flush() + + return candidate + + +def check_or_create_bids_session( + env: Env, + candidate: DbCandidate, + cohort: DbCohort | None, + visit_label: str, + create_session: bool, +) -> DbSession: + """ + Check that a session of a BIDS dataset exists the LORIS database, or create it using + information previously obtained from that dataset if the relevant argument is passed. Exit the + program with an error if the session cannot be created. + """ + + session = try_get_session_with_cand_id_visit_label(env.db, candidate.cand_id, visit_label) + if session is not None: + return session + + if not create_session: + log_error_exit( + env, + f"No session found for candidate {candidate.cand_id} and visit label '{visit_label}'." + ) + + if cohort is None: + log_error_exit( + env, + f"No cohort found for candidate {candidate.cand_id}, cannot create session.", + ) + + return create_bids_session(env, candidate, cohort, visit_label) + + +def create_bids_session(env: Env, candidate: DbCandidate, cohort: DbCohort, visit_label: str) -> DbSession: + """ + Create a session using information previously obtained from a BIDS dataset, or exit the program + with an error if the session cannot be created. + """ + + log( + env, + ( + "Creating visit with:\n" + f"CandID = {candidate.cand_id}\n" + f"Visit label = {visit_label}" + ) + ) + + session = DbSession( + candidate_id = candidate.id, + visit_label = visit_label, + current_stage = 'Not Started', + site_id = candidate.registration_site_id, + project_id = candidate.registration_project_id, + cohort_id = cohort.id, + ) + + env.db.add(session) + env.db.flush() + + return session + + +# TODO: Move this function to a more appropriate place. +def generate_new_cand_id(db: Database) -> int: + """ + Generate a new random CandID that is not already in the database. + """ + + while True: + cand_id = random.randint(100000, 999999) + candidate = try_get_candidate_with_cand_id(db, cand_id) + if candidate is None: + return cand_id + + +def get_standard_sex_name(sex: str) -> str: + """ + Convert a sex name to its standard value. + """ + + if sex.lower() in ['m', 'male']: + return 'Male' + + if sex.lower() in ['f', 'female']: + return 'Female' + + return sex diff --git a/python/lib/import_bids_dataset/dataset_files.py b/python/lib/import_bids_dataset/dataset_files.py new file mode 100644 index 000000000..984bd1e0b --- /dev/null +++ b/python/lib/import_bids_dataset/dataset_files.py @@ -0,0 +1,45 @@ +import os + +from bids import BIDSLayout + +import lib.utilities +from lib.env import Env +from lib.import_bids_dataset.participant import BidsParticipant, write_bids_participants_file + + +def add_dataset_files(env: Env, source_bids_dir_path: str, loris_bids_dir_path: str, verbose: bool): + """ + Add the non-acquisition files of a LORIS BIDS directory, based on the content of this directory + and the source directory it is imported from. + """ + + copy_static_dataset_files(source_bids_dir_path, loris_bids_dir_path, verbose) + + generate_participants_file(env, loris_bids_dir_path) + + +def copy_static_dataset_files(source_bids_dir_path: str, loris_bids_dir_path: str, verbose: bool): + """ + Copy the static files of the source BIDS dataset to te LORIS BIDS dataset. + """ + + for file_name in ['README', 'dataset_description.json']: + source_file_path = os.path.join(source_bids_dir_path, file_name) + if not os.path.isfile(source_file_path): + continue + + loris_file_path = os.path.join(loris_bids_dir_path, file_name) + + lib.utilities.copy_file(source_file_path, loris_file_path, verbose) # type: ignore + + +def generate_participants_file(env: Env, bids_dir_path: str): + """ + Generate the `participants.tsv` file of a BIDS dataset using the information present in the + directory and the LORIS database. + """ + + bids_layout = BIDSLayout(bids_dir_path) + bids_subjects: list[str] = bids_layout.get_subjects() # type: ignore + bids_participants = list(map(BidsParticipant, bids_subjects)) + write_bids_participants_file(bids_participants, bids_dir_path) diff --git a/python/lib/bids/participant.py b/python/lib/import_bids_dataset/participant.py similarity index 51% rename from python/lib/bids/participant.py rename to python/lib/import_bids_dataset/participant.py index 62439ad5a..0dfbb47d6 100644 --- a/python/lib/bids/participant.py +++ b/python/lib/import_bids_dataset/participant.py @@ -1,9 +1,13 @@ +import csv +import os +import re from dataclasses import dataclass import dateutil.parser from bids import BIDSLayout import lib.utilities as utilities +from lib.db.models.candidate import DbCandidate @dataclass @@ -20,8 +24,6 @@ class BidsParticipant: site: str | None = None cohort: str | None = None project: str | None = None - # FIXME: Both "cohort" and "subproject" are used in scripts, this may be a bug. - subproject: str | None = None def read_bids_participants_file(bids_layout: BIDSLayout) -> list[BidsParticipant] | None: @@ -48,17 +50,24 @@ def read_bids_participants_file(bids_layout: BIDSLayout) -> list[BidsParticipant def read_bids_participant_row(row: dict[str, str]) -> BidsParticipant: """ - Get a BIDS participant entry from a parsed TSV line of a `participants.tsv` file. + Get a BIDS participant entry from a `participants.tsv` line. """ - # Get the participant ID by removing the `sub-` prefix if it is present. - participant_id = row['participant_id'].replace('sub-', '') + # Get the participant ID and removing the `sub-` prefix if it is present. + participant_id = re.sub(r'^sub-', '', row['participant_id']) # Get the participant date of birth from one of the possible date of birth fields. birth_date = None - for birth_date_name in ['date_of_birth', 'birth_date', 'dob']: - if birth_date_name in row: - birth_date = dateutil.parser.parse(row[birth_date_name]).strftime('%Y-%m-%d') + for birth_date_field_ame in ['date_of_birth', 'birth_date', 'dob']: + if birth_date_field_ame in row: + birth_date = dateutil.parser.parse(row[birth_date_field_ame]).strftime('%Y-%m-%d') + break + + # Get the cohort name from one of the possible cohort fields. + cohort = None + for cohort_field_name in ['cohort', 'subproject']: + if cohort_field_name in row: + cohort = row[cohort_field_name] break # Create the BIDS participant object. @@ -68,7 +77,33 @@ def read_bids_participant_row(row: dict[str, str]) -> BidsParticipant: sex = row.get('sex'), age = row.get('age'), site = row.get('site'), - cohort = row.get('cohort'), project = row.get('project'), - subproject = row.get('subproject'), + cohort = cohort, + ) + + +def write_bids_participants_file(bids_participants: list[BidsParticipant], bids_dir_path: str): + participants_file_path = os.path.join(bids_dir_path, 'participants.tsv') + with open(participants_file_path, 'w') as participants_file: + writer = csv.writer(participants_file, delimiter='\t') + writer.writerow(['participant_id']) + for bids_participant in bids_participants: + writer.writerow([bids_participant.id]) + + +def get_bids_participant_from_candidate(candidate: DbCandidate) -> BidsParticipant: + """ + Generate a BIDS participant entry from a database candidate. + """ + + # Stringify the candidate date of birth if there is one. + birth_date = candidate.date_of_birth.strftime('%Y-%m-%d') if candidate.date_of_birth is not None else None + + # Create the BIDS participant object corresponding to the database candidate. + return BidsParticipant( + id = candidate.psc_id, + birth_date = birth_date, + sex = candidate.sex, + site = candidate.registration_site.name, + project = candidate.registration_project.name, ) diff --git a/python/lib/session.py b/python/lib/session.py index f86eb274e..b62889293 100644 --- a/python/lib/session.py +++ b/python/lib/session.py @@ -122,7 +122,10 @@ def get_session_info_from_loris(self): """ # TODO refactor bids_import pipeline to use same functions as dcm2bids below. To be done in different PR though loris_session_info = self.db.pselect( - "SELECT * FROM session WHERE CandID = %s AND Visit_label = %s", + ( + "SELECT * FROM session JOIN candidate ON session.CandidateID = candidate.ID" + " WHERE CandID = %s AND Visit_label = %s" + ), (self.cand_id, self.visit_label) ) diff --git a/python/scripts/bids_import.py b/python/scripts/bids_import.py index 9e1000196..f1c7520bb 100755 --- a/python/scripts/bids_import.py +++ b/python/scripts/bids_import.py @@ -2,26 +2,29 @@ """Script to import BIDS structure into LORIS.""" -import getopt import json import os import re import sys +from dataclasses import dataclass from typing import Any, Literal -from bids import BIDSLayout -from bids.layout import BIDSFile - import lib.exitcode import lib.physiological import lib.utilities from lib.bidsreader import BidsReader -from lib.candidate import Candidate from lib.database import Database -from lib.database_lib.config import Config +from lib.db.queries.candidate import try_get_candidate_with_cand_id +from lib.db.queries.config import get_config_with_setting_name from lib.eeg import Eeg +from lib.env import Env +from lib.import_bids_dataset.database import check_or_create_bids_candidates_and_sessions +from lib.import_bids_dataset.dataset_files import add_dataset_files +from lib.logging import log, log_error_exit, log_warning +from lib.lorisgetopt import LorisGetOpt +from lib.make_env import make_env from lib.mri import Mri -from lib.session import Session +from lib.util import try_parse_int __license__ = "GPLv3" @@ -34,257 +37,160 @@ bids_mri_modalities = ['anat', 'dwi', 'fmap', 'func'] +@dataclass +class Args: + source_bids_dir_path: str + type: Literal[None, 'raw', 'derivative'] + ids_validation: bool + bids_validation: bool + create_candidate: bool + create_session: bool + copy: bool + verbose: bool + + def __init__(self, options_dict: dict[str, Any]): + self.source_bids_dir_path = os.path.normpath(options_dict['directory']['value']) + self.type = options_dict['type']['value'] + self.ids_validation = options_dict['idsvalidation']['value'] + self.bids_validation = not options_dict['nobidsvalidation']['value'] + self.create_candidate = options_dict['createcandidate']['value'] + self.create_session = options_dict['createsession']['value'] + self.copy = not options_dict['nocopy']['value'] + self.verbose = options_dict['verbose']['value'] + + # to limit the traceback when raising exceptions. # sys.tracebacklimit = 0 def main(): - bids_dir = '' - verbose = False - createcand = False - createvisit = False - idsvalidation = False - nobidsvalidation = False - type = None - profile = '' - nocopy = False - - long_options = [ - "help", "profile=", "directory=", - "createcandidate", "createsession", "idsvalidation", - "nobidsvalidation", "nocopy", "type=", - "verbose" - ] - usage = ( - '\n' - 'usage : bids_import -d -p \n\n' - 'options: \n' - '\t-p, --profile : name of the python database config file in dicom-archive/.loris-mri\n' - '\t-d, --directory : BIDS directory to parse & insert into LORIS\n' - 'If directory is within $data_dir/assembly_bids, no copy will be performed' - '\t-c, --createcandidate : to create BIDS candidates in LORIS (optional)\n' - '\t-s, --createsession : to create BIDS sessions in LORIS (optional)\n' - '\t-i, --idsvalidation : to validate BIDS directory for a matching pscid/candid pair (optional)\n' - '\t-b, --nobidsvalidation : to disable BIDS validation for BIDS compliance\n' - '\t-a, --nocopy : to disable dataset copy in data assembly_bids\n' - '\t-t, --type : raw | derivative. Specify the dataset type.' - 'If not set, the pipeline will look for both raw and derivative files.\n' - 'Required if no dataset_description.json is found.\n' - '\t-v, --verbose : be verbose\n' + usage = ( + "\n" + "usage : bids_import -d -p \n" + "\n" + "options: \n" + "\t-p, --profile : name of the python database config file in dicom-archive/.loris-mri\n" + "\t-d, --directory : BIDS directory to parse & insert into LORIS\n" + "\t If directory is within $data_dir/assembly_bids, no copy will be performed\n" + "\t-c, --createcandidate : to create BIDS candidates in LORIS (optional)\n" + "\t-s, --createsession : to create BIDS sessions in LORIS (optional)\n" + "\t-i, --idsvalidation : to validate BIDS directory for a matching pscid/candid pair (optional)\n" + "\t-b, --nobidsvalidation : to disable BIDS validation for BIDS compliance\n" + "\t-a, --nocopy : to disable dataset copy in data assembly_bids\n" + "\t-t, --type : raw | derivative. Specify the dataset type.\n" + "\t If not set, the pipeline will look for both raw and derivative files.\n" + "\t Required if no dataset_description.json is found.\n" + "\t-v, --verbose : be verbose\n" ) - try: - opts, _ = getopt.getopt(sys.argv[1:], 'hp:d:csinat:v', long_options) - except getopt.GetoptError: - print(usage) - sys.exit(lib.exitcode.GETOPT_FAILURE) - - for opt, arg in opts: - if opt in ('-h', '--help'): - print(usage) - sys.exit() - elif opt in ('-p', '--profile'): - profile = os.environ['LORIS_CONFIG'] + "/.loris_mri/" + arg - elif opt in ('-d', '--directory'): - bids_dir = arg - elif opt in ('-v', '--verbose'): - verbose = True - elif opt in ('-c', '--createcandidate'): - createcand = True - elif opt in ('-s', '--createsession'): - createvisit = True - elif opt in ('-i', '--idsvalidation'): - idsvalidation = True - elif opt in ('-n', '--nobidsvalidation'): - nobidsvalidation = True - elif opt in ('-a', '--nocopy'): - nocopy = True - elif opt in ('-t', '--type'): - type = arg - - # input error checking and load config_file file - config_file = input_error_checking(profile, bids_dir, usage) - - dataset_json = bids_dir + "/dataset_description.json" - if not os.path.isfile(dataset_json) and not type: - print('No dataset_description.json found. Please run with the --type option.') - print(usage) - sys.exit(lib.exitcode.MISSING_ARG) - - if type not in (None, 'raw', 'derivative'): - print("--type must be one of 'raw', 'derivative'") - print(usage) - sys.exit(lib.exitcode.MISSING_ARG) - - # database connection - db = Database(config_file.mysql, verbose) - db.connect() - - config_obj = Config(db, verbose) - data_dir = config_obj.get_config('dataDirBasepath') - # making sure that there is a final / in data_dir - data_dir = data_dir if data_dir.endswith('/') else data_dir + "/" + options_dict = { + "profile": { + "value": None, "required": True, "expect_arg": True, "short_opt": "p", "is_path": False + }, + "directory": { + "value": None, "required": True, "expect_arg": True, "short_opt": "d", "is_path": True + }, + "createcandidate": { + "value": False, "required": False, "expect_arg": False, "short_opt": "cc", "is_path": False + }, + "createsession": { + "value": False, "required": False, "expect_arg": False, "short_opt": "cc", "is_path": False + }, + "idsvalidation": { + "value": False, "required": False, "expect_arg": False, "short_opt": "iv", "is_path": False + }, + "nobidsvalidation": { + "value": False, "required": False, "expect_arg": False, "short_opt": "nv", "is_path": False + }, + "nocopy": { + "value": False, "required": False, "expect_arg": False, "short_opt": "nc", "is_path": False + }, + "type": { + "value": None, "required": False, "expect_arg": True, "short_opt": "t", "is_path": False + }, + "verbose": { + "value": False, "required": False, "expect_arg": False, "short_opt": "v", "is_path": False + }, + "help": { + "value": False, "required": False, "expect_arg": False, "short_opt": "h", "is_path": False + }, + } + + # Get the CLI arguments and initiate the environment. + + loris_getopt_obj = LorisGetOpt(usage, options_dict, os.path.basename(__file__[:-3])) + + env = make_env(loris_getopt_obj) + + # Check the CLI arguments. + + type = loris_getopt_obj.options_dict['type']['value'] + directory = loris_getopt_obj.options_dict['directory']['value'] + + dataset_description_path = os.path.join(directory, 'dataset_description.json') + if not os.path.isfile(dataset_description_path) and type is None: + log_error_exit( + env, + "No 'dataset_description.json' file found. Please run with the --type option.", + lib.exitcode.MISSING_ARG, + ) + + if type not in [None, 'raw', 'derivative']: + log_error_exit( + env, + f"--type must be one of 'raw', 'derivative'\n{usage}", + lib.exitcode.MISSING_ARG, + ) + + args = Args(loris_getopt_obj.options_dict) # read and insert BIDS data read_and_insert_bids( - bids_dir, - data_dir, - verbose, - createcand, - createvisit, - idsvalidation, - nobidsvalidation, - type, - nocopy, - db + env, + args, + loris_getopt_obj.db, ) -def input_error_checking(profile: str, bids_dir: str, usage: str) -> Any: - """ - Checks whether the required inputs are set and that paths are valid. If - the path to the config_file file valid, then it will import the file as a - module so the database connection information can be used to connect. - - :param profile : path to the profile file with MySQL credentials - :param bids_dir: path to the BIDS directory to parse and insert into LORIS - :param usage : script usage to be displayed when encountering an error - - :return: config_file module with database credentials (config_file.mysql) - """ - - if not profile: - message = '\n\tERROR: you must specify a profile file using -p or ' \ - '--profile option' - print(message) - print(usage) - sys.exit(lib.exitcode.MISSING_ARG) - - if not bids_dir: - message = '\n\tERROR: you must specify a BIDS directory using -d or ' \ - '--directory option' - print(message) - print(usage) - sys.exit(lib.exitcode.MISSING_ARG) - - if os.path.isfile(profile): - sys.path.append(os.path.dirname(profile)) - config_file = __import__(os.path.basename(profile[:-3])) - else: - message = '\n\tERROR: you must specify a valid profile file.\n' + \ - profile + ' does not exist!' - print(message) - print(usage) - sys.exit(lib.exitcode.INVALID_PATH) - - if not os.path.isdir(bids_dir): - message = '\n\tERROR: you must specify a valid BIDS directory.\n' + \ - bids_dir + ' does not exist!' - print(message) - print(usage) - sys.exit(lib.exitcode.INVALID_PATH) - - return config_file - - -def read_and_insert_bids( - bids_dir: str, data_dir: str, verbose: bool, createcand: bool, createvisit: bool, - idsvalidation: bool, nobidsvalidation: bool, type: Literal['raw', 'derivative'] | None, nocopy: bool, db: Database, -): +def read_and_insert_bids(env: Env, args: Args, legacy_db: Database): """ Read the provided BIDS structure and import it into the database. - - :param bids_dir : path to the BIDS directory - :param data_dir : data_dir config value - :param verbose : flag for more printing if set - :param createcand : allow database candidate creation if it did not exist already - :param createvisit : allow database visit creation if it did not exist already - :param idsvalidation : allow pscid/candid validation in the BIDS directory name - :param nobidsvalidation : disable bids dataset validation - :param type : Type of the dataset - :param nocopy : disable bids dataset copy in assembly_bids - :param db : db object """ - # grep config settings from the Config module - config_obj = Config(db, verbose) - default_bids_vl = config_obj.get_config('default_bids_vl') - # Validate that pscid and candid matches - if idsvalidation: - validateids(bids_dir, db, verbose) + if args.ids_validation: + validateids(env, args.source_bids_dir_path) # load the BIDS directory - if nobidsvalidation: - bids_reader = BidsReader(bids_dir, verbose, False) - else: - bids_reader = BidsReader(bids_dir, verbose) - if not bids_reader.bids_participants \ - or not bids_reader.cand_sessions_list \ - or not bids_reader.cand_session_modalities_list: - message = '\n\tERROR: could not properly parse the following' \ - 'BIDS directory:' + bids_dir + '\n' - print(message) - sys.exit(lib.exitcode.UNREADABLE_FILE) - - loris_bids_root_dir = None - if not nocopy: - # create the LORIS_BIDS directory in data_dir based on Name and BIDS version - loris_bids_root_dir = create_loris_bids_directory( - bids_reader, data_dir, verbose + bids_reader = BidsReader(args.source_bids_dir_path, args.verbose, args.bids_validation) + + if ( + bids_reader.bids_participants == [] + or bids_reader.cand_sessions_list == {} + or bids_reader.cand_session_modalities_list == [] + ): + log_error_exit( + env, + f"Could not properly parse the following BIDS directory: {args.source_bids_dir_path}.", + lib.exitcode.UNREADABLE_FILE, ) - # Assumption all same project (for project-wide tags) - single_project_id = None + loris_data_dir_path = get_config_with_setting_name(env.db, 'dataDirBasepath').value - # loop through subjects - for bids_participant in bids_reader.bids_participants: - - # greps BIDS information for the candidate - bids_id = bids_participant.id - bids_sessions = bids_reader.cand_sessions_list[bids_id] + loris_bids_dir_path = None + if args.copy: + # create the LORIS_BIDS directory in data_dir based on Name and BIDS version + loris_bids_dir_path = create_loris_bids_directory(bids_reader, loris_data_dir_path, args.verbose) - # greps BIDS candidate's info from LORIS (creates the candidate if it - # does not exist yet in LORIS and the createcand flag is set to true) - loris_cand_info = grep_or_create_candidate_db_info( - bids_reader, bids_id, db, createcand, verbose - ) + # Assumption all same project (for project-wide tags) + single_project_id = None - if not nocopy: - # create the candidate's directory in the LORIS BIDS import directory - lib.utilities.create_dir(loris_bids_root_dir + "sub-" + bids_id, verbose) - - cand_id = loris_cand_info['CandID'] - center_id = loris_cand_info['RegistrationCenterID'] - project_id = loris_cand_info['RegistrationProjectID'] - single_project_id = project_id - - cohort_id = None - # TODO: change subproject -> cohort in participants.tsv? - if bids_participant.subproject is not None: - # TODO: change subproject -> cohort in participants.tsv? - cohort = bids_participant.subproject - cohort_info = db.pselect( - "SELECT CohortID FROM cohort WHERE title = %s", - [cohort, ] - ) - if len(cohort_info) > 0: - cohort_id = cohort_info[0]['CohortID'] - - # greps BIDS session's info for the candidate from LORIS (creates the - # session if it does not exist yet in LORIS and the createvisit is set - # to true. If no visit in BIDS structure, then use default visit_label - # stored in the Config module) - grep_candidate_sessions_info( - bids_sessions, bids_id, cand_id, loris_bids_root_dir, - createvisit, verbose, db, default_bids_vl, - center_id, project_id, cohort_id, nocopy - ) + check_or_create_bids_candidates_and_sessions(env, bids_reader, args.create_candidate, args.create_session) # Import root-level (dataset-wide) events.json # Assumption: Single project for project-wide tags bids_layout = bids_reader.bids_layout root_event_metadata_file = bids_layout.get_nearest( - bids_dir, + loris_bids_dir_path, return_type='tuple', strict=False, extension='json', @@ -294,28 +200,22 @@ def read_and_insert_bids( dataset_tag_dict = {} if not root_event_metadata_file: - message = '\nWARNING: no events metadata files (event.json) in ' \ - 'root directory' - print(message) + log_warning(env, "no events metadata files (event.json) in root directory") else: # copy the event file to the LORIS BIDS import directory - copy_file = str.replace( - root_event_metadata_file.path, - bids_layout.root, - "" - ) - event_metadata_path = loris_bids_root_dir + copy_file.lstrip('/') - lib.utilities.copy_file(root_event_metadata_file.path, event_metadata_path, verbose) + copy_file = str.replace(root_event_metadata_file.path, bids_layout.root, '') + event_metadata_path = os.path.join(loris_bids_dir_path, copy_file) + lib.utilities.copy_file(root_event_metadata_file.path, event_metadata_path, args.verbose) # TODO: Move hed_query = 'SELECT * FROM hed_schema_nodes WHERE 1' - hed_union = db.pselect(query=hed_query, args=()) + hed_union = legacy_db.pselect(query=hed_query, args=()) # load json data with open(root_event_metadata_file.path) as metadata_file: event_metadata = json.load(metadata_file) blake2 = lib.utilities.compute_blake2b_hash(root_event_metadata_file.path) - physio = lib.physiological.Physiological(db, verbose) + physio = lib.physiological.Physiological(legacy_db, args.verbose) _, dataset_tag_dict = physio.insert_event_metadata( event_metadata=event_metadata, event_metadata_file=event_metadata_path, @@ -326,40 +226,37 @@ def read_and_insert_bids( hed_union=hed_union ) - # TODO: What if `loris_bids_root_dir` is `None` (nocopy) ? - loris_bids = BIDSLayout(loris_bids_root_dir) + default_visit_label = get_config_with_setting_name(env.db, 'default_bids_vl').value # read list of modalities per session / candidate and register data for subject_label, session_label, modality in bids_reader.iter_modality_combinations(): if session_label is not None: visit_label = session_label else: - visit_label = default_bids_vl + visit_label = default_visit_label - loris_bids_modality_files: list[BIDSFile] = loris_bids.get( # type: ignore - subject=subject_label, - session=visit_label, - suffix=modality, + loris_modality_dir_rel_path = os.path.join( + f'sub-{subject_label}', + f'ses-{visit_label}', + modality, ) - if loris_bids_modality_files != []: - print( - 'Files already inserted in LORIS, skipping:\n' - f'- Subject: {subject_label}\n' - f'- Session: {session_label}\n' - f'- Modality: {modality}' + loris_modality_dir_path = os.path.join(loris_bids_dir_path, loris_modality_dir_rel_path) + if os.path.exists(loris_modality_dir_path): + log( + env, + ( + "Files already inserted in LORIS, skipping:\n" + f"- Subject: {subject_label}\n" + f"- Session: {visit_label}\n" + f"- Modality: {modality}" + ) ) continue - loris_bids_modality_rel_dir = os.path.join( - f'sub-{subject_label}', - f'ses-{visit_label}', - modality, - ) - - if not nocopy: - lib.utilities.create_dir(loris_bids_root_dir + loris_bids_modality_rel_dir, verbose) + if args.copy: + lib.utilities.create_dir(loris_modality_dir_path, args.verbose) if modality in bids_eeg_modalities: Eeg( @@ -367,14 +264,14 @@ def read_and_insert_bids( bids_sub_id = subject_label, bids_ses_id = session_label, bids_modality = modality, - db = db, - verbose = verbose, - data_dir = data_dir, - default_visit_label = default_bids_vl, - loris_bids_eeg_rel_dir = loris_bids_modality_rel_dir, - loris_bids_root_dir = loris_bids_root_dir, + db = legacy_db, + verbose = args.verbose, + data_dir = loris_data_dir_path, + default_visit_label = default_visit_label, + loris_bids_eeg_rel_dir = loris_modality_dir_rel_path, + loris_bids_root_dir = loris_bids_dir_path, dataset_tag_dict = dataset_tag_dict, - dataset_type = type + dataset_type = args.type ) elif modality in bids_mri_modalities: Mri( @@ -382,19 +279,19 @@ def read_and_insert_bids( bids_sub_id = subject_label, bids_ses_id = session_label, bids_modality = modality, - db = db, - verbose = verbose, - data_dir = data_dir, - default_visit_label = default_bids_vl, - loris_bids_mri_rel_dir = loris_bids_modality_rel_dir, - loris_bids_root_dir = loris_bids_root_dir + db = legacy_db, + verbose = args.verbose, + data_dir = loris_data_dir_path, + default_visit_label = default_visit_label, + loris_bids_mri_rel_dir = loris_modality_dir_rel_path, + loris_bids_root_dir = loris_bids_dir_path ) - # disconnect from the database - db.disconnect() + if args.copy: + add_dataset_files(env, args.source_bids_dir_path, loris_bids_dir_path, args.verbose) -def validateids(bids_dir: str, db: Database, verbose: bool): +def validateids(env: Env, bids_dir: str): """ Validate that pscid and candid matches @@ -403,23 +300,36 @@ def validateids(bids_dir: str, db: Database, verbose: bool): :param verbose : flag for more printing if set """ - bids_folder = bids_dir.rstrip('/').split('/')[-1] + bids_folder = bids_dir.split('/')[-1] bids_folder_parts = bids_folder.split('_') psc_id = bids_folder_parts[0] cand_id = bids_folder_parts[1] - candidate = Candidate(verbose, cand_id=cand_id) - loris_cand_info = candidate.get_candidate_info_from_loris(db) + cand_id = try_parse_int(cand_id) + if cand_id is None: + log_error_exit( + env, + f"{cand_id} is not a valid CandID.", + lib.exitcode.CANDIDATE_NOT_FOUND, + ) + + candidate = try_get_candidate_with_cand_id(env.db, cand_id) - if not loris_cand_info: - print("ERROR: could not find a candidate with cand_id " + cand_id + ".") - sys.exit(lib.exitcode.CANDID_NOT_FOUND) - if loris_cand_info['PSCID'] != psc_id: - print("ERROR: cand_id " + cand_id + " and psc_id " + psc_id + " do not match.") - sys.exit(lib.exitcode.CANDIDATE_MISMATCH) + if candidate is None: + log_error_exit( + env, + f"Could not find a candidate with CandID {cand_id}.", + lib.exitcode.CANDID_NOT_FOUND, + ) + if candidate.psc_id != psc_id: + log_error_exit( + env, + f"CandID {cand_id} and PSCID {psc_id} do not match in the database." + ) -def create_loris_bids_directory(bids_reader: BidsReader, data_dir: str, verbose: bool) -> str: + +def create_loris_bids_directory(bids_reader: BidsReader, loris_data_dir: str, verbose: bool) -> str: """ Creates the LORIS BIDS import root directory (with name and BIDS version) and copy over the dataset_description.json, README and participants.tsv @@ -432,180 +342,19 @@ def create_loris_bids_directory(bids_reader: BidsReader, data_dir: str, verbose: :return: path to the LORIS BIDS import root directory """ - # making sure that there is a final / in bids_dir - bids_dir = bids_reader.bids_dir - bids_dir = bids_dir if bids_dir.endswith('/') else bids_dir + "/" - # determine the root directory of the LORIS BIDS and create it if does not exist - name = re.sub("[^0-9a-zA-Z]+", "_", bids_reader.dataset_name) # get name of the dataset - version = re.sub(r"[^0-9a-zA-Z\.]+", "_", bids_reader.bids_version) # get BIDSVersion of the dataset + dataset_name = re.sub(r'[^0-9a-zA-Z]+', '_', bids_reader.dataset_name) # get name of the dataset + dataset_version = re.sub(r'[^0-9a-zA-Z\.]+', '_', bids_reader.bids_version) # get BIDSVersion of the dataset # the LORIS BIDS directory will be in data_dir/BIDS/ and named with the # concatenation of the dataset name and the BIDS version - loris_bids_dirname = lib.utilities.create_dir( - data_dir + "bids_imports/" + name + "_BIDSVersion_" + version + "/", + loris_bids_dir_path = lib.utilities.create_dir( + os.path.join(loris_data_dir, 'bids_imports', f'{dataset_name}_BIDSVersion_{dataset_version}'), verbose ) - # copy the dataset JSON file to the new directory - lib.utilities.copy_file( - bids_dir + "dataset_description.json", - loris_bids_dirname + "dataset_description.json", - verbose - ) - - # copy the README file to the new directory - if os.path.isfile(bids_dir + "README"): - lib.utilities.copy_file( - bids_dir + "README", - loris_bids_dirname + "README", - verbose - ) - - # copy the participant.tsv file to the new directory - if os.path.exists(loris_bids_dirname + "participants.tsv"): - lib.utilities.append_to_tsv_file( - bids_dir + "participants.tsv", - loris_bids_dirname + "participants.tsv", - "participant_id", - verbose - ) - else: - lib.utilities.copy_file( - bids_dir + "participants.tsv", - loris_bids_dirname + "participants.tsv", - verbose - ) - - return loris_bids_dirname - - -def grep_or_create_candidate_db_info( - bids_reader: BidsReader, bids_id: str, db: Database, createcand: bool, verbose: bool -) -> dict[str, Any]: - """ - Greps (or creates if candidate does not exist and createcand is true) the - BIDS candidate in the LORIS candidate's table and return a list of - candidates with their related fields from the database. - - :param bids_reader : BIDS information handler object - :param bids_id : bids_id to be used (CandID or PSCID) - :param db : database handler object - :param createcand : if true, creates the candidate in LORIS - :param verbose : if true, prints out information while executing - - :return: The dictionary of the candidate database record - """ - - candidate = Candidate(verbose=verbose, cand_id=bids_id) - loris_cand_info = candidate.get_candidate_info_from_loris(db) - - if not loris_cand_info: - candidate = Candidate(verbose, psc_id=bids_id) - loris_cand_info = candidate.get_candidate_info_from_loris(db) - - if not loris_cand_info and createcand: - loris_cand_info = candidate.create_candidate( - db, bids_reader.bids_participants - ) - if not loris_cand_info: - print("Creating candidate failed. Cannot importing the files.\n") - sys.exit(lib.exitcode.CANDIDATE_CREATION_FAILURE) - - if not loris_cand_info: - print("Candidate " + bids_id + " not found. You can retry with the --createcandidate option.\n") - sys.exit(lib.exitcode.CANDIDATE_NOT_FOUND) - - return loris_cand_info - - -def grep_or_create_session_db_info( - bids_id: str, cand_id: int, visit_label: str, db: Database, createvisit: bool, verbose: bool, - loris_bids_dir: str, center_id: int, project_id: int, cohort_id: int, nocopy: bool -) -> dict[str, Any]: - """ - Greps (or creates if session does not exist and createvisit is true) the - BIDS session in the LORIS session's table and return a list of - sessions with their related fields from the database. - - :parma bids_id : BIDS ID of the session - :param cand_id : CandID to use to create the session - :param visit_label : Visit label to use to create the session - :param db : database handler object - :param createvisit : if true, creates the session in LORIS - :param verbose : if true, prints out information while executing - :param loris_bids_dir : LORIS BIDS import root directory to copy data - :param center_id : CenterID to use to create the session - :param project_id : ProjectID to use to create the session - :param cohort_id : CohortID to use to create the session - :param nocopy : if true, skip the assembly_bids dataset copy - - :return: session information grepped from LORIS for cand_id and visit_label - """ - - session = Session(db, verbose, cand_id, visit_label, center_id, project_id, cohort_id) - loris_vl_info = session.get_session_info_from_loris() - - if not loris_vl_info and createvisit: - loris_vl_info = session.create_session() - - if not nocopy: - # create the visit directory for in the candidate folder of the LORIS - # BIDS import directory - lib.utilities.create_dir( - loris_bids_dir + "sub-" + bids_id + "/ses-" + visit_label, - verbose - ) - - return loris_vl_info - - -def grep_candidate_sessions_info( - bids_ses: list[str], bids_id: str, cand_id: int, loris_bids_dir: str, createvisit: bool, verbose: bool, - db: Database, default_vl: str, center_id: int, project_id: int, cohort_id: int, nocopy: bool, -) -> list[dict[str, Any]]: - """ - Greps all session info dictionaries for a given candidate and aggregates - them into a list, with one entry per session. If the session does not - exist in LORIS and that createvisit is true, it will create the session - first. - - :param bids_ses : list of BIDS sessions to grep info or insert - :param bids_id : BIDS ID of the candidate - :param cand_id : candidate's CandID - :param loris_bids_dir : LORIS BIDS import root directory to copy data - :param createvisit : if true, creates the visits in LORIS - :param verbose : if true, prints out information while executing - :param db : database handler object - :param default_vl : default visit label from the Config module - :param center_id : center ID associated to the candidate and visit - :param project_id : project ID associated to the candidate and visit - :param cohort_id : cohort ID associated to the candidate and visit - :param nocopy : if true, skip the assembly_bids dataset copy - - :return: list of all session's dictionaries for a given candidate - """ - - loris_sessions_info = [] - - if not bids_ses: - loris_ses_info = grep_or_create_session_db_info( - bids_id, cand_id, default_vl, db, - createvisit, verbose, loris_bids_dir, - center_id, project_id, cohort_id, nocopy - ) - loris_sessions_info.append(loris_ses_info) - else: - for visit_label in bids_ses: - loris_ses_info = grep_or_create_session_db_info( - bids_id, cand_id, visit_label, db, - createvisit, verbose, loris_bids_dir, - center_id, project_id, cohort_id, nocopy - ) - loris_sessions_info.append(loris_ses_info) - - return loris_sessions_info + return loris_bids_dir_path -if __name__ == "__main__": +if __name__ == '__main__': main()