Skip to content

Commit

Permalink
Merge pull request #326 from bids-standard/consistent-entities
Browse files Browse the repository at this point in the history
Ensure consistent entities at all levels
  • Loading branch information
effigies authored Feb 1, 2019
2 parents 99178b7 + cf95dd7 commit f2a333c
Show file tree
Hide file tree
Showing 5 changed files with 36 additions and 34 deletions.
19 changes: 12 additions & 7 deletions bids/analysis/tests/test_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,25 +31,28 @@ def test_get_design_matrix_arguments(analysis):
kwargs = dict(run=1, subject='01', sparse=True)
result = analysis['run'].get_design_matrix(**kwargs)
result = result[0]
assert result.sparse.shape == (172, 7)
assert result.sparse.shape == (172, 9)
assert result.dense is None

kwargs = dict(run=1, subject='01', mode='dense', force=False)
result = analysis['run'].get_design_matrix(**kwargs)[0]
assert result.sparse is None
assert result.dense is None

kwargs = dict(run=1, subject='01', mode='dense', force=True, sampling_rate='highest')
kwargs = dict(run=1, subject='01', mode='dense', force=True,
sampling_rate='highest')
result = analysis['run'].get_design_matrix(**kwargs)[0]
assert result.sparse is None
assert result.dense.shape == (4800, 6)

kwargs = dict(run=1, subject='01', mode='dense', force=True, sampling_rate='TR')
kwargs = dict(run=1, subject='01', mode='dense', force=True,
sampling_rate='TR')
result = analysis['run'].get_design_matrix(**kwargs)[0]
assert result.sparse is None
assert result.dense.shape == (240, 6)

kwargs = dict(run=1, subject='01', mode='dense', force=True, sampling_rate=0.5)
kwargs = dict(run=1, subject='01', mode='dense', force=True,
sampling_rate=0.5)
result = analysis['run'].get_design_matrix(**kwargs)[0]
assert result.sparse is None
assert result.dense.shape == (240, 6)
Expand All @@ -72,11 +75,11 @@ def test_first_level_sparse_design_matrix(analysis):
result = analysis['run'].get_design_matrix(subject=['01'])
assert len(result) == 3
df = result[0].sparse
assert df.shape == (172, 7)
assert df.shape == (172, 9)
assert df['condition'].nunique() == 2
assert set(result[0][0].columns) == {'amplitude', 'onset', 'duration',
'condition', 'subject', 'run',
'task'}
'task', 'datatype', 'suffix'}


def test_post_first_level_sparse_design_matrix(analysis):
Expand All @@ -87,7 +90,9 @@ def test_post_first_level_sparse_design_matrix(analysis):
assert result[0].sparse.shape == (9, 2)
assert result[0].entities == {
'subject': '01',
'task': 'mixedgamblestask'}
'task': 'mixedgamblestask',
'datatype': 'func',
'suffix': 'bold'}

# Participant level and also check integer-based indexing
result = analysis['participant'].get_design_matrix()
Expand Down
21 changes: 13 additions & 8 deletions bids/variables/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,7 +201,7 @@ def _load_time_variables(layout, dataset=None, columns=None, scan_length=None,
# Add in all of the run's entities as new columns for
# index
for entity, value in entities.items():
if entity in BASE_ENTITIES:
if entity in ALL_ENTITIES:
df[entity] = value

if drop_na:
Expand Down Expand Up @@ -327,14 +327,20 @@ def _load_tsv_variables(layout, suffix, dataset=None, columns=None,
# file (for entities that vary by row), or from the full file path
# (for entities constant over all rows in the file). We extract both
# and store them in the main DataFrame alongside other variables (as
# they'll be extracted when the Column is initialized anyway).
# they'll be extracted when the BIDSVariable is initialized anyway).
for ent_name, ent_val in f.entities.items():
if ent_name in BASE_ENTITIES:
if ent_name in ALL_ENTITIES:
_data[ent_name] = ent_val

# Handling is a bit more convoluted for scans.tsv, because the first
# column contains the run filename, which we also need to parse.
if suffix == 'scans':

# Suffix is guaranteed to be present in each filename, so drop the
# constant column with value 'scans' to make way for it and prevent
# two 'suffix' columns.
_data.drop(columns='suffix', inplace=True)

image = _data['filename']
_data = _data.drop('filename', axis=1)
dn = f.dirname
Expand Down Expand Up @@ -369,12 +375,11 @@ def make_patt(x, regex_search=False):
# Filter rows on all selectors
comm_cols = list(set(_data.columns) & set(selectors.keys()))
for col in comm_cols:
for val in listify(selectors.get(col)):
ent_patts = [make_patt(x, regex_search=layout.regex_search)
for x in listify(selectors.get(col))]
patt = '|'.join(ent_patts)
ent_patts = [make_patt(x, regex_search=layout.regex_search)
for x in listify(selectors.get(col))]
patt = '|'.join(ent_patts)

_data = _data[_data[col].str.contains(patt)]
_data = _data[_data[col].str.contains(patt)]

level = {'scans': 'session', 'sessions': 'subject',
'participants': 'dataset'}[suffix]
Expand Down
22 changes: 7 additions & 15 deletions bids/variables/tests/test_collections.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,35 +57,27 @@ def test_run_variable_collection_to_df(run_coll):

# All variables sparse, wide format
df = run_coll.to_df()
assert df.shape == (4096, 13)
assert df.shape == (4096, 15)
wide_cols = {'onset', 'duration', 'subject', 'run', 'task',
'PTval', 'RT', 'gain', 'loss', 'parametric gain', 'respcat',
'respnum', 'trial_type'}
'respnum', 'trial_type', 'suffix', 'datatype'}
assert set(df.columns) == wide_cols

# All variables sparse, wide format
df = run_coll.to_df(format='long')
assert df.shape == (32768, 7)
assert df.shape == (32768, 9)
long_cols = {'amplitude', 'duration', 'onset', 'condition', 'run',
'task', 'subject'}
'task', 'subject', 'suffix', 'datatype'}
assert set(df.columns) == long_cols

# All variables dense, wide format
df = run_coll.to_df(sparse=False)
assert df.shape == (230400, 14)
# The inclusion of 'modality' and 'type' here is a minor bug that should
# be fixed at some point. There is no reason why to_df() should return
# more columns for a DenseRunVariable than a SparseRunVariable, but this
# is happening because these columns are not included in the original
# SparseRunVariable data, and are being rebuilt from the entity list in
# the DenseRunVariable init.
wide_cols |= {'datatype', 'suffix'}
assert set(df.columns) == wide_cols - {'trial_type'}

# All variables dense, wide format
df = run_coll.to_df(sparse=False, format='long')
assert df.shape == (1612800, 9)
long_cols |= {'datatype', 'suffix'}
assert set(df.columns) == long_cols


Expand All @@ -100,14 +92,14 @@ def test_merge_collections(run_coll, run_coll_list):
def test_get_collection_entities(run_coll_list):
coll = run_coll_list[0]
ents = coll.entities
assert {'run', 'task', 'subject'} == set(ents.keys())
assert {'run', 'task', 'subject', 'suffix', 'datatype'} == set(ents.keys())

merged = merge_collections(run_coll_list[:3])
ents = merged.entities
assert {'task', 'subject'} == set(ents.keys())
assert {'task', 'subject', 'suffix', 'datatype'} == set(ents.keys())
assert ents['subject'] == '01'

merged = merge_collections(run_coll_list[3:6])
ents = merged.entities
assert {'task', 'subject'} == set(ents.keys())
assert {'task', 'subject', 'suffix', 'datatype'} == set(ents.keys())
assert ents['subject'] == '02'
2 changes: 1 addition & 1 deletion bids/variables/tests/test_entities.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ def test_get_collections_merged(layout1):
vals = collection.variables['RT'].values
ents = collection.variables['RT'].index
assert len(ents) == len(vals) == 4096
assert set(ents.columns) == {'task', 'run', 'subject'}
assert set(ents.columns) == {'task', 'run', 'subject', 'suffix', 'datatype'}


def test_get_collections_unmerged(layout2):
Expand Down
6 changes: 3 additions & 3 deletions bids/variables/tests/test_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def test_load_events(layout1):
targ_cols = {'parametric gain', 'PTval', 'trial_type', 'respnum'}
assert not (targ_cols - set(variables.keys()))
assert isinstance(variables['parametric gain'], SparseRunVariable)
assert variables['parametric gain'].index.shape == (86, 3)
assert variables['parametric gain'].index.shape == (86, 5)
assert variables['parametric gain'].source == 'events'


Expand All @@ -51,12 +51,12 @@ def test_load_participants(layout1):
assert {'age', 'sex'} == set(dataset.variables.keys())
age = dataset.variables['age']
assert isinstance(age, SimpleVariable)
assert age.index.shape == (16, 1)
assert age.index.shape == (16, 2)
assert age.values.shape == (16,)

index = load_variables(layout1, types='participants', subject=['^1.*'])
age = index.get_nodes(level='dataset')[0].variables['age']
assert age.index.shape == (7, 1)
assert age.index.shape == (7, 2)
assert age.values.shape == (7,)


Expand Down

0 comments on commit f2a333c

Please sign in to comment.