-
Notifications
You must be signed in to change notification settings - Fork 124
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
ENH: Enable models for sparsely sampled fMRI series #414
base: master
Are you sure you want to change the base?
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -85,6 +85,27 @@ def load_variables(layout, types=None, levels=None, skip_empty=True, | |
return dataset | ||
|
||
|
||
def _get_timing_info(img_md): | ||
if 'RepetitionTime' in img_md: | ||
tr = img_md['RepetitionTime'] | ||
if 'DelayTime' in img_md: | ||
ta = tr - img_md['DelayTime'] | ||
elif 'SliceTiming' in img_md: | ||
slicetimes = sorted(img_md['SliceTiming']) | ||
# a, b ... z | ||
# z = final slice onset, b - a = slice duration | ||
ta = np.round(slicetimes[-1] + slicetimes[1] - slicetimes[0], 3) | ||
# If the "silence" is <1ms, consider acquisition continuous | ||
if np.abs(tr - ta) < 1e-3: | ||
ta = tr | ||
else: | ||
ta = tr | ||
elif 'VolumeTiming' in img_md: | ||
return NotImplemented | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should this be |
||
|
||
return tr, ta | ||
|
||
|
||
def _load_time_variables(layout, dataset=None, columns=None, scan_length=None, | ||
drop_na=True, events=True, physio=True, stim=True, | ||
regressors=True, skip_empty=True, scope='all', | ||
|
@@ -146,8 +167,9 @@ def _load_time_variables(layout, dataset=None, columns=None, scan_length=None, | |
if 'run' in entities: | ||
entities['run'] = int(entities['run']) | ||
|
||
tr = layout.get_metadata(img_f, suffix='bold', scope=scope, | ||
full_search=True)['RepetitionTime'] | ||
img_md = layout.get_metadata(img_f, suffix='bold', scope=scope, | ||
full_search=True) | ||
tr, ta = _get_timing_info(img_md) | ||
|
||
# Get duration of run: first try to get it directly from the image | ||
# header; if that fails, try to get NumberOfVolumes from the | ||
|
@@ -167,7 +189,8 @@ def _load_time_variables(layout, dataset=None, columns=None, scan_length=None, | |
raise ValueError(msg) | ||
|
||
run = dataset.get_or_create_node('run', entities, image_file=img_f, | ||
duration=duration, repetition_time=tr) | ||
duration=duration, repetition_time=tr, | ||
acquisition_time=ta) | ||
run_info = run.get_info() | ||
|
||
# Process event files | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -425,7 +425,7 @@ def _build_entity_index(self, run_info, sampling_rate): | |
self.timestamps = pd.concat(_timestamps, axis=0, sort=True) | ||
return pd.concat(index, axis=0, sort=True).reset_index(drop=True) | ||
|
||
def resample(self, sampling_rate, inplace=False, kind='linear'): | ||
def resample(self, sampling_rate, integration_window=None, inplace=False, kind='linear'): | ||
'''Resample the Variable to the specified sampling rate. | ||
|
||
Parameters | ||
|
@@ -442,7 +442,7 @@ def resample(self, sampling_rate, inplace=False, kind='linear'): | |
''' | ||
if not inplace: | ||
var = self.clone() | ||
var.resample(sampling_rate, True, kind) | ||
var.resample(sampling_rate, integration_window, True, kind) | ||
return var | ||
|
||
if sampling_rate == self.sampling_rate: | ||
|
@@ -453,18 +453,36 @@ def resample(self, sampling_rate, inplace=False, kind='linear'): | |
|
||
self.index = self._build_entity_index(self.run_info, sampling_rate) | ||
|
||
x = np.arange(n) | ||
num = len(self.index) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is on me I think, but we could probably use less confusing names than There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah, I can try to rewrite; I was mostly aiming for a minimal diff. |
||
|
||
from scipy.interpolate import interp1d | ||
f = interp1d(x, self.values.values.ravel(), kind=kind) | ||
x_new = np.linspace(0, n - 1, num=num) | ||
self.values = pd.DataFrame(f(x_new)) | ||
if integration_window is not None: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Rather than determine which approach to use based on whether In general, I wonder if it makes sense to implement the resampling operations ourselves, or if we should just do it via pandas (or something like traces, which wraps pandas). I would expect that resampling in pandas is probably going to be more efficient than rolling our own solution, while relieving of us of the burden of extensive testing. |
||
from scipy.sparse import lil_matrix | ||
old_times = np.arange(n) / old_sr | ||
new_times = np.arange(num) / sampling_rate | ||
integrator = lil_matrix((num, n), dtype=np.uint8) | ||
count = None | ||
for i, new_time in enumerate(new_times): | ||
cols = (old_times >= new_time) & (old_times < new_time + integration_window) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should the integration window be centered on |
||
# This could be determined analytically, but dodging off-by-one errors | ||
if count is None: | ||
count = np.sum(cols) | ||
integrator[i, cols] = 1 | ||
|
||
old_vals = self.values.values | ||
self.values = pd.DataFrame(integrator.tocsr().dot(old_vals) / count) | ||
else: | ||
from scipy.interpolate import interp1d | ||
x = np.arange(n) | ||
f = interp1d(x, self.values.values.ravel(), kind=kind) | ||
x_new = np.linspace(0, n - 1, num=num) | ||
self.values = pd.DataFrame(f(x_new)) | ||
|
||
assert len(self.values) == len(self.index) | ||
|
||
self.sampling_rate = sampling_rate | ||
|
||
def to_df(self, condition=True, entities=True, timing=True, sampling_rate=None): | ||
def to_df(self, condition=True, entities=True, timing=True, sampling_rate=None, | ||
integration_window=None): | ||
'''Convert to a DataFrame, with columns for name and entities. | ||
|
||
Parameters | ||
|
@@ -480,7 +498,8 @@ def to_df(self, condition=True, entities=True, timing=True, sampling_rate=None): | |
sampled uniformly). If False, omits them. | ||
''' | ||
if sampling_rate not in (None, self.sampling_rate): | ||
return self.resample(sampling_rate).to_df(condition, entities) | ||
return self.resample(sampling_rate, | ||
integration_window=integration_window).to_df(condition, entities) | ||
|
||
df = super(DenseRunVariable, self).to_df(condition, entities) | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is it worth testing for non-uniform slice onsets (and raise a not supported error)? I don't know if that ever happens in practice, but if it does, we should probably fail...
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
That feels like a validation problem. While we can put those checks in, ad hoc, I think it would make sense to either fail on load for validation problems or be able to insert a little boilerplate check like:
validate(slicetimes, 'SliceTiming')