Skip to content

Commit

Permalink
RF: PEP8
Browse files Browse the repository at this point in the history
  • Loading branch information
mbrockman1 committed Jun 1, 2017
1 parent 75c5133 commit 6e64232
Show file tree
Hide file tree
Showing 8 changed files with 307 additions and 167 deletions.
51 changes: 34 additions & 17 deletions cili/cleanup.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,15 @@
#-------------------------------------------------------------
# Masking


def find_nested_events(samples, outer, inner):
""" Returns indices of events in outer that contain events in inner
This is helpful for dealing with EyeLink blink events. Each is embedded
within a saccade event, and the EyeLink documentation states that data
within saccades that contain blinks is unreliable. So we use this method
to find those saccade events.
Parameters
----------
samples (cili Samples)
Expand All @@ -30,18 +31,21 @@ def find_nested_events(samples, outer, inner):
post_onsets = onsets + inner.duration
# convert to list of positional indices
max_onset = samples.index[-1]
last_idxs = post_onsets.apply(lambda x: max(0, samples.index.searchsorted(x, side="right")-1))
last_idxs = post_onsets.apply(lambda x: max(
0, samples.index.searchsorted(x, side="right") - 1))
# step back by one positional index to get pos. index of last samples of our events.
# stupid fix - don't nudge the index back for events whose duration went beyond the samples
end_safe_evs = post_onsets <= max_onset
last_idxs[end_safe_evs] = last_idxs[end_safe_evs] - 1
# get the time indices of the last samples of our events
last_onsets = last_idxs.apply(lambda x: samples.index[x])
idxs = outer.apply(has_overlapping_events, axis=1, args=[onsets, last_onsets])
idxs = outer.apply(has_overlapping_events, axis=1,
args=[onsets, last_onsets])
if len(idxs) == 0:
return pd.DataFrame()
return outer[idxs]


def has_overlapping_events(event, onsets, last_onsets):
""" Searches for onset/last_onset pairs overlapping with the event in 'event.'
Expand All @@ -57,9 +61,11 @@ def has_overlapping_events(event, onsets, last_onsets):
last_onsets (numpy array like)
Last indices of the potentially intersecting events.
"""
matches = last_onsets[(onsets <= event.name+event.duration) & (last_onsets >= event.name)]
matches = last_onsets[(onsets <= event.name +
event.duration) & (last_onsets >= event.name)]
return len(matches) > 0


def get_eyelink_mask_events(samples, events, find_recovery=True):
""" Finds events from EyeLink data that contain untrustworthy data.
Expand All @@ -79,11 +85,13 @@ def get_eyelink_mask_events(samples, events, find_recovery=True):
the proper ends for blink events.
"""
be = events.EBLINK.duration.to_frame()
be = pd.concat([be, find_nested_events(samples, events.ESACC.duration.to_frame(), be)])
be = pd.concat([be, find_nested_events(
samples, events.ESACC.duration.to_frame(), be)])
if find_recovery:
adjust_eyelink_recov_idxs(samples, be)
return be


def get_eyelink_mask_idxs(samples, events, find_recovery=True):
""" Calls get_eyelink_mask_events, finds indices from 'samples' within the returned events.
Expand All @@ -93,6 +101,7 @@ def get_eyelink_mask_idxs(samples, events, find_recovery=True):
bi = ev_row_idxs(samples, be)
return bi


def mask_eyelink_blinks(samples, events, mask_fields=["pup_l"], find_recovery=True):
""" Sets the value of all untrustworthy data points to NaN.
Expand All @@ -118,6 +127,7 @@ def mask_eyelink_blinks(samples, events, mask_fields=["pup_l"], find_recovery=Tr
samps.loc[indices, mask_fields] = float('nan')
return samps


def mask_zeros(samples, mask_fields=["pup_l"]):
""" Sets any 0 values in columns in mask_fields to NaN
Expand All @@ -133,6 +143,7 @@ def mask_zeros(samples, mask_fields=["pup_l"]):
samps[samps[f] == 0] = float("nan")
return samps


def interp_zeros(samples, interp_fields=["pup_l"]):
""" Replace 0s in 'samples' with linearly interpolated data.
Expand All @@ -151,6 +162,7 @@ def interp_zeros(samples, interp_fields=["pup_l"]):
samps.fillna(method="ffill", inplace=True)
return samps


def interp_eyelink_blinks(samples, events, find_recovery=True, interp_fields=["pup_l"]):
""" Replaces the value of all untrustworthy data points linearly interpolated data.
Expand All @@ -171,12 +183,14 @@ def interp_eyelink_blinks(samples, events, find_recovery=True, interp_fields=["p
interp_fields (list of strings)
The columns in which we should interpolate data.
"""
samps = mask_eyelink_blinks(samples, events, mask_fields=interp_fields, find_recovery=find_recovery)
samps = mask_eyelink_blinks(
samples, events, mask_fields=interp_fields, find_recovery=find_recovery)
# inplace=True causes a crash, so for now...
# fixed by #6284 ; will be in 0.14 release of pandas
samps = samps.interpolate(method="linear", axis=0, inplace=False)
return samps


def ev_row_idxs(samples, events):
""" Returns the indices in 'samples' contained in events from 'events.'
Expand All @@ -190,11 +204,12 @@ def ev_row_idxs(samples, events):
import numpy as np
idxs = []
for idx, dur in events.duration.items():
idxs.extend(list(range(idx, int(idx+dur))))
idxs.extend(list(range(idx, int(idx + dur))))
idxs = np.unique(idxs)
idxs = np.intersect1d(idxs, samples.index.tolist())
return idxs


def adjust_eyelink_recov_idxs(samples, events, z_thresh=.1, window=1000, kernel_size=100):
""" Extends event endpoint until the z-scored derivative of 'field's timecourse drops below thresh
Expand Down Expand Up @@ -225,35 +240,37 @@ def adjust_eyelink_recov_idxs(samples, events, z_thresh=.1, window=1000, kernel_
# find a pupil size field to use
p_fields = [f for f in samples.columns if f in PUP_FIELDS]
if len(p_fields) == 0:
return # if we can't find a pupil field, we won't make any adjustments
return # if we can't find a pupil field, we won't make any adjustments
field = p_fields[0]
# use pandas to take rolling mean. pandas' kernel looks backwards, so we need to pull a reverse...
dfs = np.gradient(samples[field].values)
reversed_dfs = dfs[::-1]
reversed_dfs_ravg = np.array(pd.rolling_mean(pd.Series(reversed_dfs),window=kernel_size, min_periods=1))
reversed_dfs_ravg = np.array(pd.rolling_mean(
pd.Series(reversed_dfs), window=kernel_size, min_periods=1))
dfs_ravg = reversed_dfs_ravg[::-1]
dfs_ravg = np.abs((dfs_ravg-np.mean(dfs_ravg))/np.std(dfs_ravg))
dfs_ravg = np.abs((dfs_ravg - np.mean(dfs_ravg)) / np.std(dfs_ravg))
samp_count = len(samples)
# search for drop beneath z_thresh after end index
new_durs = []
for idx, dur in events.duration.items():
try:
s_pos = samples.index.get_loc(idx + dur) - 1
e_pos = samples.index[min(s_pos+window, samp_count-1)]
s_pos = samples.index.get_loc(idx + dur) - 1
e_pos = samples.index[min(s_pos + window, samp_count - 1)]
except Exception as e:
# can't do much about that
s_pos = e_pos = 0
if s_pos == e_pos:
new_durs.append(dur)
continue
e_dpos = np.argmax(dfs_ravg[s_pos:e_pos] < z_thresh) # 0 if not found
new_end = samples.index[min(s_pos + e_dpos, samp_count-1)]
e_dpos = np.argmax(dfs_ravg[s_pos:e_pos] < z_thresh) # 0 if not found
new_end = samples.index[min(s_pos + e_dpos, samp_count - 1)]
new_durs.append(new_end - idx)
events.duration = new_durs

#-------------------------------------------------------------
# Filters


def butterworth_series(samples, fields=["pup_l"], filt_order=5, cutoff_freq=.01, inplace=False):
""" Applies a butterworth filter to the given fields
Expand All @@ -267,6 +284,6 @@ def butterworth_series(samples, fields=["pup_l"], filt_order=5, cutoff_freq=.01,
from numpy import array
samps = samples if inplace else samples.copy(deep=True)
B, A = signal.butter(filt_order, cutoff_freq, output="BA")
samps[fields] = samps[fields].apply(lambda x: signal.filtfilt(B,A,x), axis=0)
samps[fields] = samps[fields].apply(
lambda x: signal.filtfilt(B, A, x), axis=0)
return samps

35 changes: 22 additions & 13 deletions cili/extract.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
TIME_UNITS = 'time'
SAMP_UNITS = 'samples'


def extract_event_ranges(samples, events_dataframe, start_offset=0,
end_offset=0, round_indices=True, borrow_attributes=[]):
""" Extracts ranges from samples based on event timing.
Expand Down Expand Up @@ -52,7 +53,8 @@ def extract_event_ranges(samples, events_dataframe, start_offset=0,
r_times.columns = ['last_onset']
# sanity check - make sure no events start before the data, or end afterwards
if any(r_times.index < samples.index[0]):
raise ValueError("at least one event range starts before the first sample")
raise ValueError(
"at least one event range starts before the first sample")
if any(r_times.index > samples.index[-1]):
raise ValueError("at least one event range ends after the last sample")

Expand All @@ -65,14 +67,14 @@ def extract_event_ranges(samples, events_dataframe, start_offset=0,
# we're going to make a df with a hierarchical index.
samples['orig_idx'] = samples.index
midx = pd.MultiIndex.from_product([list(range(len(e_starts))), list(range(r_len))],
names=['event', 'onset'])
names=['event', 'onset'])
# get all of the samples!
# idxs = []
df = pd.DataFrame()
idx = 0
for stime, etime in r_times.itertuples():
# get the start time... add the number of indices that you want...
s_idx = np.where(samples.index > stime)[0][0]-1
s_idx = np.where(samples.index > stime)[0][0] - 1
e_idx = s_idx + r_len - 1
stime = samples.index[s_idx]
etime = samples.index[e_idx]
Expand All @@ -85,6 +87,7 @@ def extract_event_ranges(samples, events_dataframe, start_offset=0,
df.index = midx
return df


def extract_events(samples, events, offset=0, duration=0,
units='samples', borrow_attributes=[]):
""" Extracts ranges from samples based on event timing and sample count.
Expand Down Expand Up @@ -136,39 +139,45 @@ def extract_events(samples, events, offset=0, duration=0,
if units == TIME_UNITS:
# get the indices for the first event (minus the first index), then use
# the length of the first event as a template for all events
r_times = e_starts+offset
r_times = e_starts + offset
ev_idxs = np.logical_and(samples.index <= r_times.iloc[0] + duration,
samples.index > r_times.iloc[0])
r_dur = len(np.where(ev_idxs)[0]) + 1
r_idxs = [np.where(samples.index > rt)[0][0]-1 for rt in r_times]
r_idxs = [np.where(samples.index > rt)[0][0] - 1 for rt in r_times]
# sanity check - make sure no events start before the data, or end afterwards
if any(r_times < samples.index[0]):
raise ValueError("at least one event range starts before the first sample")
raise ValueError(
"at least one event range starts before the first sample")
if any(r_times > samples.index[-1]):
raise ValueError("at least one event range ends after the last sample")
raise ValueError(
"at least one event range ends after the last sample")
elif units == SAMP_UNITS:
# just find the indexes of the event starts, and offset by sample count
r_idxs = np.array([np.where(samples.index > et)[0][0]-1+offset for et in e_starts])
r_idxs = np.array([np.where(samples.index > et)[0]
[0] - 1 + offset for et in e_starts])
r_dur = duration
if any(r_idxs < 0):
raise ValueError("at least one event range starts before the first sample")
raise ValueError(
"at least one event range starts before the first sample")
if any(r_idxs >= len(samples)):
raise ValueError("at least one event range ends after the last sample")
raise ValueError(
"at least one event range ends after the last sample")
else:
raise ValueError("Not a valid unit!")

# make a hierarchical index
samples['orig_idx'] = samples.index
midx = pd.MultiIndex.from_product([list(range(len(e_starts))), list(range(r_dur))],
names=['event', 'onset'])
names=['event', 'onset'])
# get the samples
df = pd.DataFrame()
idx = 0
for s_idx in r_idxs:
# get the start time... add the number of indices that you want...
e_idx = s_idx + r_dur-1 # pandas.loc indexing is inclusive
e_idx = s_idx + r_dur - 1 # pandas.loc indexing is inclusive
# this deepcopy is heavy handed... but gets around some early pandas bugs
new_df = deepcopy(samples.loc[samples.index[s_idx] : samples.index[e_idx]])
new_df = deepcopy(
samples.loc[samples.index[s_idx]: samples.index[e_idx]])
for ba in borrow_attributes:
new_df[ba] = events.iloc[idx].get(ba, float('nan'))
df = pd.concat([df, new_df])
Expand Down
21 changes: 14 additions & 7 deletions cili/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,14 @@
import pandas.io.pytables as pt
from pandas.compat import u_safe as u, string_types, isidentifier


class SaveMixin(object):
""" Bakes in some save settings for NDFrame subclasses
You can still use the pandas methods, but for quick saving and loading
this mixin provides some setting you might want to reuse.
"""

def __init__(self, *args, **kwargs):
super(SaveMixin, self).__init__(*args, **kwargs)

Expand All @@ -28,14 +30,17 @@ def load_saved(cls, save_path):
def from_pd_obj(cls, pd_obj):
return cls(pd_obj._data.copy()).__finalize__(pd_obj)


class Samples(SaveMixin, pd.DataFrame):
"""Pandas DataFrame subclas for representing eye tracking timeseries data.
Indexes may be hierarchical.
"""

def __init__(self, *args, **kwargs):
super(Samples, self).__init__(*args, **kwargs)



class Events(object):
"""Pandas Panel-like object that gives you access to DataFrames via standard accessors.
Expand All @@ -48,6 +53,7 @@ class Events(object):
Right now, the best way way to make one of these is to use Events.from_dict().
"""

def __init__(self, *args, **kwargs):
super(Events, self).__init__(*args, **kwargs)
self.dframes = {}
Expand All @@ -62,14 +68,14 @@ def save(self, save_path):
def load_saved(cls, save_path):
obj = cls()
s = pt.HDFStore(save_path)
obj.dframes = dict([(k[1:],s[k]) for k in list(s.keys())])
obj.dframes = dict([(k[1:], s[k]) for k in list(s.keys())])
s.close()
return obj

@classmethod
def from_dict(cls, the_d):
""" Returns an Events instance containing the given DataFrames
Parameters
----------
the_d (dict)
Expand Down Expand Up @@ -115,7 +121,7 @@ def from_list_of_dicts(cls, events_list):
def _local_dir(self):
""" add the string-like attributes from the info_axis """
return [c for c in list(self.dframes.keys())
if isinstance(c, string_types) and isidentifier(c)]
if isinstance(c, string_types) and isidentifier(c)]

def __dir__(self):
"""
Expand All @@ -137,6 +143,7 @@ def __getattr__(self, name):
raise AttributeError("'%s' object has no attribute '%s'" %
(type(self).__name__, name))


def initialize_hdf5():
pt._TYPE_MAP.update({Events:u('wide'), Samples:u('frame'),})
pt._AXES_MAP.update({Events:[1, 2], Samples:[0],})
pt._TYPE_MAP.update({Events: u('wide'), Samples: u('frame'), })
pt._AXES_MAP.update({Events: [1, 2], Samples: [0], })
Loading

0 comments on commit 6e64232

Please sign in to comment.