Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Incremental BIDS import #1211

Draft
wants to merge 8 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,10 @@ include = [
"python/lib/env.py",
"python/lib/file_system.py",
"python/lib/get_subject_session.py",
"python/lib/import_bids_dataset",
"python/lib/logging.py",
"python/lib/make_env.py",
"python/lib/util.py",
"python/lib/validate_subject_info.py",
]
typeCheckingMode = "strict"
Expand Down
129 changes: 67 additions & 62 deletions python/lib/bidsreader.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,13 @@
import json
import re
import sys
from collections.abc import Generator
from dataclasses import dataclass

from bids import BIDSLayout

import lib.exitcode
import lib.utilities as utilities
from lib.import_bids_dataset.participant import BidsParticipant, read_bids_participants_file

# import bids
# BIDSLayoutIndexer is required for PyBIDS >= 0.12.1
Expand All @@ -21,6 +23,18 @@
__license__ = "GPLv3"


@dataclass
class BidsSessionInfo:
"""
Information about a BIDS session, that is, the label of the subject and the session, and the
modalities of this session.
"""

subject_label: str
session_label: str | None
modalities: list[str]


class BidsReader:
"""
This class reads a BIDS structure into a data dictionary using BIDS grabbids.
Expand All @@ -35,16 +49,13 @@ class BidsReader:
bids_reader = BidsReader(bids_dir)
"""

def __init__(self, bids_dir, verbose, validate = True):
def __init__(self, bids_dir: str, verbose: bool, validate: bool = True):
"""
Constructor method for the BidsReader class.

:param bids_dir: path to the BIDS structure to read
:type bids_dir: str
:param verbose : boolean to print verbose information
:type verbose : bool
:param bids_dir : path to the BIDS structure to read
:param verbose : boolean to print verbose information
:param validate : boolean to validate the BIDS dataset
:type validate : bool
"""

self.verbose = verbose
Expand All @@ -65,15 +76,15 @@ def __init__(self, bids_dir, verbose, validate = True):
print("WARNING: Cannot read dataset_description.json")

# load BIDS candidates information
self.participants_info = self.load_candidates_from_bids()
self.bids_participants = self.load_candidates_from_bids()

# load BIDS sessions information
self.cand_sessions_list = self.load_sessions_from_bids()

# load BIDS modality information
self.cand_session_modalities_list = self.load_modalities_from_bids()

def load_bids_data(self, validate):
def load_bids_data(self, validate: bool):
"""
Loads the BIDS study using the BIDSLayout function (part of the pybids
package) and return the object.
Expand All @@ -84,7 +95,7 @@ def load_bids_data(self, validate):
if self.verbose:
print('Loading the BIDS dataset with BIDS layout library...\n')

exclude_arr = ['/code/', '/sourcedata/', '/log/', '.git/']
exclude_arr = ['code', 'sourcedata', 'log', '.git']
force_arr = [re.compile(r"_annotations\.(tsv|json)$")]

# BIDSLayoutIndexer is required for PyBIDS >= 0.12.1
Expand Down Expand Up @@ -114,42 +125,34 @@ def load_bids_data(self, validate):

return bids_layout

def load_candidates_from_bids(self):
def load_candidates_from_bids(self) -> list[BidsParticipant]:
"""
Loads the list of candidates from the BIDS study. List of
participants and their information will be stored in participants_info.
participants and their information will be stored in bids_participants.

:return: list of dictionaries with participant information from BIDS
:rtype: list
"""

if self.verbose:
print('Grepping candidates from the BIDS layout...')

# grep the participant.tsv file and parse it
participants_info = None
for file in self.bids_layout.get(suffix='participants', return_type='filename'):
# note file[0] returns the path to participants.tsv
if 'participants.tsv' in file:
participants_info = utilities.read_tsv_file(file)
else:
continue
bids_participants = read_bids_participants_file(self.bids_layout)

if participants_info:
self.candidates_list_validation(participants_info)
if bids_participants:
self.candidates_list_validation(bids_participants)
else:
bids_subjects = self.bids_layout.get_subjects()
participants_info = [{'participant_id': sub_id} for sub_id in bids_subjects]
bids_participants = [BidsParticipant(sub_id) for sub_id in bids_subjects]

if self.verbose:
print('\t=> List of participants found:')
for participant in participants_info:
print('\t\t' + participant['participant_id'])
for bids_participant in bids_participants:
print('\t\t' + bids_participant.id)
print('\n')

return participants_info
return bids_participants

def candidates_list_validation(self, participants_info):
def candidates_list_validation(self, bids_participants: list[BidsParticipant]):
"""
Validates whether the subjects listed in participants.tsv match the
list of participant directory. If there is a mismatch, will exit with
Expand All @@ -165,18 +168,16 @@ def candidates_list_validation(self, participants_info):
"participants.tsv and raw data found in the BIDS "
"directory")

# check that all subjects listed in participants_info are also in
# check that all subjects listed in bids_participants are also in
# subjects array and vice versa
for row in participants_info:
# remove the "sub-" in front of the subject ID if present
row['participant_id'] = row['participant_id'].replace('sub-', '')
if row['participant_id'] not in subjects:
for bids_participant in bids_participants:
if bids_participant.id not in subjects:
print(mismatch_message)
print(row['participant_id'] + 'is missing from the BIDS Layout')
print(bids_participant.id + 'is missing from the BIDS Layout')
print('List of subjects parsed by the BIDS layout: ' + ', '.join(subjects))
sys.exit(lib.exitcode.BIDS_CANDIDATE_MISMATCH)
# remove the subject from the list of subjects
subjects.remove(row['participant_id'])
subjects.remove(bids_participant.id)

# check that no subjects are left in subjects array
if subjects:
Expand All @@ -186,24 +187,23 @@ def candidates_list_validation(self, participants_info):
if self.verbose:
print('\t=> Passed validation of the list of participants\n')

def load_sessions_from_bids(self):
def load_sessions_from_bids(self) -> dict[str, list[str]]:
"""
Grep the list of sessions for each candidate directly from the BIDS
structure.

:return: dictionary with the list of sessions and candidates found in the
BIDS structure
:rtype: dict
"""

if self.verbose:
print('Grepping list of sessions from the BIDS layout...')

cand_sessions = {}

for row in self.participants_info:
ses = self.bids_layout.get_sessions(subject=row['participant_id'])
cand_sessions[row['participant_id']] = ses
for bids_participant in self.bids_participants:
ses = self.bids_layout.get_sessions(subject=bids_participant.id)
cand_sessions[bids_participant.id] = ses

if self.verbose:
print('\t=> List of sessions found:\n')
Expand All @@ -216,57 +216,62 @@ def load_sessions_from_bids(self):

return cand_sessions

def load_modalities_from_bids(self):
def load_modalities_from_bids(self) -> list[BidsSessionInfo]:
"""
Grep the list of modalities available for each session and candidate directly
from the BIDS structure.

:return: dictionary for candidate and session with list of modalities
:rtype: dict
"""

if self.verbose:
print('Grepping the different modalities from the BIDS layout...')

cand_session_modalities_list = []
cand_session_modalities_list: list[BidsSessionInfo] = []

for subject, visit_list in self.cand_sessions_list.items():
if visit_list:
for visit in visit_list:
modalities = self.bids_layout.get_datatype(subject=subject, session=visit)
cand_session_modalities_list.append({
'bids_sub_id': subject,
'bids_ses_id': visit,
'modalities' : modalities
})
cand_session_modalities_list.append(BidsSessionInfo(
subject_label = subject,
session_label = visit,
modalities = modalities,
))
else:
modalities = self.bids_layout.get_datatype(subject=subject)
cand_session_modalities_list.append({
'bids_sub_id': subject,
'bids_ses_id': None,
'modalities' : modalities
})
cand_session_modalities_list.append(BidsSessionInfo(
subject_label = subject,
session_label = None,
modalities = modalities,
))

if self.verbose:
print('\t=> Done grepping the different modalities from the BIDS layout\n')

return cand_session_modalities_list

def iter_modality_combinations(self) -> Generator[tuple[str, str | None, str], None, None]:
"""
Iterate over the different subject / session / modality combinations present in the BIDS
dataset.
"""

for cand_session_modalities in self.cand_session_modalities_list:
for modality in cand_session_modalities.modalities:
yield cand_session_modalities.subject_label, cand_session_modalities.session_label, modality

@staticmethod
def grep_file(files_list, match_pattern, derivative_pattern=None):
def grep_file(files_list: list[str], match_pattern: str, derivative_pattern: str | None = None) -> str | None:
"""
Grep a unique file based on a match pattern and returns it.

:param files_list : list of files to look into
:type files_list : list
:param match_pattern : pattern to use to find the file
:type match_pattern : str
:param derivative_pattern: derivative pattern to use if the file we look for
is a derivative file
:type derivative_pattern: str
:param files_list : list of files to look into
:param match_pattern : pattern to use to find the file
:param derivative_pattern : derivative pattern to use if the file we look for
is a derivative file

:return: name of the first file that matches the pattern
:rtype: str
"""

for filename in files_list:
Expand Down
51 changes: 17 additions & 34 deletions python/lib/candidate.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,8 @@
import random
import sys

from dateutil.parser import parse

import lib.exitcode
from lib.import_bids_dataset.participant import BidsParticipant

__license__ = "GPLv3"

Expand Down Expand Up @@ -59,16 +58,15 @@ def __init__(self, verbose, psc_id=None, cand_id=None, sex=None, dob=None):
self.center_id = None
self.project_id = None

def create_candidate(self, db, participants_info):
def create_candidate(self, db, bids_participants: list[BidsParticipant]):
"""
Creates a candidate using BIDS information provided in the
participants_info's list.
bids_participants's list.

:param db : database handler object
:type db : object
:param participants_info: list of dictionary with participants
:param bids_participants: list of dictionary with participants
information from BIDS
:type participants_info: list

:return: dictionary with candidate info from the candidate's table
:rtype: dict
Expand All @@ -81,25 +79,26 @@ def create_candidate(self, db, participants_info):
if not self.cand_id:
self.cand_id = self.generate_cand_id(db)

for row in participants_info:
if not row['participant_id'] == self.psc_id:
for bids_participant in bids_participants:
if bids_participant.id != self.psc_id:
continue
self.grep_bids_dob(row)
if 'sex' in row:
self.map_sex(row['sex'])
if 'age' in row:
self.age = row['age']

self.dob = bids_participant.birth_date
if bids_participant.sex is not None:
self.map_sex(bids_participant.sex)
if bids_participant.age is not None:
self.age = bids_participant.age

# three steps to find site:
# 1. try matching full name from 'site' column in participants.tsv in db
# 2. try extracting alias from pscid
# 3. try finding previous site in candidate table

if 'site' in row and row['site'].lower() not in ("null", ""):
if bids_participant.site is not None and bids_participant.site.lower() not in ('', 'null'):
# search site id in psc table by its full name
site_info = db.pselect(
"SELECT CenterID FROM psc WHERE Name = %s",
[row['site'], ]
[bids_participant.site, ]
)
if len(site_info) > 0:
self.center_id = site_info[0]['CenterID']
Expand All @@ -108,7 +107,7 @@ def create_candidate(self, db, participants_info):
# search site id in psc table by its alias extracted from pscid
db_sites = db.pselect("SELECT CenterID, Alias FROM psc")
for site in db_sites:
if site['Alias'] in row['participant_id']:
if site['Alias'] in bids_participant.id:
self.center_id = site['CenterID']

if self.center_id is None:
Expand All @@ -124,11 +123,11 @@ def create_candidate(self, db, participants_info):
# 1. find full name in 'project' column in participants.tsv
# 2. find previous in candidate table

if 'project' in row and row['project'].lower() not in ("null", ""):
if bids_participant.project is not None and bids_participant.project.lower() not in ('', 'null'):
# search project id in Project table by its full name
project_info = db.pselect(
"SELECT ProjectID FROM Project WHERE Name = %s",
[row['project'], ]
[bids_participant.project, ]
)
if len(project_info) > 0:
self.project_id = project_info[0]['ProjectID']
Expand Down Expand Up @@ -220,22 +219,6 @@ def map_sex(self, sex):
if sex.lower() in ('f', 'female'):
self.sex = 'Female'

def grep_bids_dob(self, subject_info):
"""
Greps the date of birth from the BIDS structure and add it to self.dob which
will be inserted into the DoB field of the candidate table

:param subject_info: dictionary with all information present in the BIDS
participants.tsv file for a given candidate
:type subject_info: dict
"""

dob_names = ['date_of_birth', 'birth_date', 'dob']
for name in dob_names:
if name in subject_info:
dob = parse(subject_info[name])
self.dob = dob.strftime('%Y-%m-%d')

@staticmethod
def generate_cand_id(db):
"""
Expand Down
Empty file added python/lib/config.py
Empty file.
Loading
Loading