RF: PEP8

mbrockman1 · Jun 1, 2017 · 6e64232 · 6e64232
1 parent 75c5133
commit 6e64232
Show file tree

Hide file tree

Showing 8 changed files with 307 additions and 167 deletions.
diff --git a/cili/cleanup.py b/cili/cleanup.py
@@ -4,14 +4,15 @@
 #-------------------------------------------------------------
 # Masking
 
+
 def find_nested_events(samples, outer, inner):
     """ Returns indices of events in outer that contain events in inner
-    
+
     This is helpful for dealing with EyeLink blink events. Each is embedded
     within a saccade event, and the EyeLink documentation states that data
     within saccades that contain blinks is unreliable. So we use this method
     to find those saccade events.
-    
+
     Parameters
     ----------
     samples (cili Samples)
@@ -30,18 +31,21 @@ def find_nested_events(samples, outer, inner):
     post_onsets = onsets + inner.duration
     # convert to list of positional indices
     max_onset = samples.index[-1]
-    last_idxs = post_onsets.apply(lambda x: max(0, samples.index.searchsorted(x, side="right")-1))
+    last_idxs = post_onsets.apply(lambda x: max(
+        0, samples.index.searchsorted(x, side="right") - 1))
     # step back by one positional index to get pos. index of last samples of our events.
     # stupid fix - don't nudge the index back for events whose duration went beyond the samples
     end_safe_evs = post_onsets <= max_onset
     last_idxs[end_safe_evs] = last_idxs[end_safe_evs] - 1
     # get the time indices of the last samples of our events
     last_onsets = last_idxs.apply(lambda x: samples.index[x])
-    idxs = outer.apply(has_overlapping_events, axis=1, args=[onsets, last_onsets])
+    idxs = outer.apply(has_overlapping_events, axis=1,
+                       args=[onsets, last_onsets])
     if len(idxs) == 0:
         return pd.DataFrame()
     return outer[idxs]
 
+
 def has_overlapping_events(event, onsets, last_onsets):
     """ Searches for onset/last_onset pairs overlapping with the event in 'event.'
 
@@ -57,9 +61,11 @@ def has_overlapping_events(event, onsets, last_onsets):
     last_onsets (numpy array like)
         Last indices of the potentially intersecting events.
     """
-    matches = last_onsets[(onsets <= event.name+event.duration) & (last_onsets >= event.name)]
+    matches = last_onsets[(onsets <= event.name +
+                           event.duration) & (last_onsets >= event.name)]
     return len(matches) > 0
 
+
 def get_eyelink_mask_events(samples, events, find_recovery=True):
     """ Finds events from EyeLink data that contain untrustworthy data.
 
@@ -79,11 +85,13 @@ def get_eyelink_mask_events(samples, events, find_recovery=True):
         the proper ends for blink events.
     """
     be = events.EBLINK.duration.to_frame()
-    be = pd.concat([be, find_nested_events(samples, events.ESACC.duration.to_frame(), be)])
+    be = pd.concat([be, find_nested_events(
+        samples, events.ESACC.duration.to_frame(), be)])
     if find_recovery:
         adjust_eyelink_recov_idxs(samples, be)
     return be
 
+
 def get_eyelink_mask_idxs(samples, events, find_recovery=True):
     """ Calls get_eyelink_mask_events, finds indices from 'samples' within the returned events.
 
@@ -93,6 +101,7 @@ def get_eyelink_mask_idxs(samples, events, find_recovery=True):
     bi = ev_row_idxs(samples, be)
     return bi
 
+
 def mask_eyelink_blinks(samples, events, mask_fields=["pup_l"], find_recovery=True):
     """ Sets the value of all untrustworthy data points to NaN.
 
@@ -118,6 +127,7 @@ def mask_eyelink_blinks(samples, events, mask_fields=["pup_l"], find_recovery=Tr
     samps.loc[indices, mask_fields] = float('nan')
     return samps
 
+
 def mask_zeros(samples, mask_fields=["pup_l"]):
     """ Sets any 0 values in columns in mask_fields to NaN
 
@@ -133,6 +143,7 @@ def mask_zeros(samples, mask_fields=["pup_l"]):
         samps[samps[f] == 0] = float("nan")
     return samps
 
+
 def interp_zeros(samples, interp_fields=["pup_l"]):
     """ Replace 0s in 'samples' with linearly interpolated data.
 
@@ -151,6 +162,7 @@ def interp_zeros(samples, interp_fields=["pup_l"]):
     samps.fillna(method="ffill", inplace=True)
     return samps
 
+
 def interp_eyelink_blinks(samples, events, find_recovery=True, interp_fields=["pup_l"]):
     """ Replaces the value of all untrustworthy data points linearly interpolated data.
 
@@ -171,12 +183,14 @@ def interp_eyelink_blinks(samples, events, find_recovery=True, interp_fields=["p
     interp_fields (list of strings)
         The columns in which we should interpolate data.
     """
-    samps = mask_eyelink_blinks(samples, events, mask_fields=interp_fields, find_recovery=find_recovery)
+    samps = mask_eyelink_blinks(
+        samples, events, mask_fields=interp_fields, find_recovery=find_recovery)
     # inplace=True causes a crash, so for now...
     # fixed by #6284 ; will be in 0.14 release of pandas
     samps = samps.interpolate(method="linear", axis=0, inplace=False)
     return samps
 
+
 def ev_row_idxs(samples, events):
     """ Returns the indices in 'samples' contained in events from 'events.'
 
@@ -190,11 +204,12 @@ def ev_row_idxs(samples, events):
     import numpy as np
     idxs = []
     for idx, dur in events.duration.items():
-        idxs.extend(list(range(idx, int(idx+dur))))
+        idxs.extend(list(range(idx, int(idx + dur))))
     idxs = np.unique(idxs)
     idxs = np.intersect1d(idxs, samples.index.tolist())
     return idxs
 
+
 def adjust_eyelink_recov_idxs(samples, events, z_thresh=.1, window=1000, kernel_size=100):
     """ Extends event endpoint until the z-scored derivative of 'field's timecourse drops below thresh
 
@@ -225,35 +240,37 @@ def adjust_eyelink_recov_idxs(samples, events, z_thresh=.1, window=1000, kernel_
     # find a pupil size field to use
     p_fields = [f for f in samples.columns if f in PUP_FIELDS]
     if len(p_fields) == 0:
-        return # if we can't find a pupil field, we won't make any adjustments
+        return  # if we can't find a pupil field, we won't make any adjustments
     field = p_fields[0]
     # use pandas to take rolling mean. pandas' kernel looks backwards, so we need to pull a reverse...
     dfs = np.gradient(samples[field].values)
     reversed_dfs = dfs[::-1]
-    reversed_dfs_ravg = np.array(pd.rolling_mean(pd.Series(reversed_dfs),window=kernel_size, min_periods=1))
+    reversed_dfs_ravg = np.array(pd.rolling_mean(
+        pd.Series(reversed_dfs), window=kernel_size, min_periods=1))
     dfs_ravg = reversed_dfs_ravg[::-1]
-    dfs_ravg = np.abs((dfs_ravg-np.mean(dfs_ravg))/np.std(dfs_ravg))
+    dfs_ravg = np.abs((dfs_ravg - np.mean(dfs_ravg)) / np.std(dfs_ravg))
     samp_count = len(samples)
     # search for drop beneath z_thresh after end index
     new_durs = []
     for idx, dur in events.duration.items():
         try:
-            s_pos = samples.index.get_loc(idx + dur)  - 1
-            e_pos = samples.index[min(s_pos+window, samp_count-1)]
+            s_pos = samples.index.get_loc(idx + dur) - 1
+            e_pos = samples.index[min(s_pos + window, samp_count - 1)]
         except Exception as e:
             # can't do much about that
             s_pos = e_pos = 0
         if s_pos == e_pos:
             new_durs.append(dur)
             continue
-        e_dpos = np.argmax(dfs_ravg[s_pos:e_pos] < z_thresh) # 0 if not found
-        new_end = samples.index[min(s_pos + e_dpos, samp_count-1)]
+        e_dpos = np.argmax(dfs_ravg[s_pos:e_pos] < z_thresh)  # 0 if not found
+        new_end = samples.index[min(s_pos + e_dpos, samp_count - 1)]
         new_durs.append(new_end - idx)
     events.duration = new_durs
 
 #-------------------------------------------------------------
 # Filters
 
+
 def butterworth_series(samples, fields=["pup_l"], filt_order=5, cutoff_freq=.01, inplace=False):
     """ Applies a butterworth filter to the given fields
 
@@ -267,6 +284,6 @@ def butterworth_series(samples, fields=["pup_l"], filt_order=5, cutoff_freq=.01,
     from numpy import array
     samps = samples if inplace else samples.copy(deep=True)
     B, A = signal.butter(filt_order, cutoff_freq, output="BA")
-    samps[fields] = samps[fields].apply(lambda x: signal.filtfilt(B,A,x), axis=0)
+    samps[fields] = samps[fields].apply(
+        lambda x: signal.filtfilt(B, A, x), axis=0)
     return samps
-
diff --git a/cili/extract.py b/cili/extract.py
@@ -6,6 +6,7 @@
 TIME_UNITS = 'time'
 SAMP_UNITS = 'samples'
 
+
 def extract_event_ranges(samples, events_dataframe, start_offset=0,
                          end_offset=0, round_indices=True, borrow_attributes=[]):
     """ Extracts ranges from samples based on event timing.
@@ -52,7 +53,8 @@ def extract_event_ranges(samples, events_dataframe, start_offset=0,
     r_times.columns = ['last_onset']
     # sanity check - make sure no events start before the data, or end afterwards
     if any(r_times.index < samples.index[0]):
-        raise ValueError("at least one event range starts before the first sample")
+        raise ValueError(
+            "at least one event range starts before the first sample")
     if any(r_times.index > samples.index[-1]):
         raise ValueError("at least one event range ends after the last sample")
 
@@ -65,14 +67,14 @@ def extract_event_ranges(samples, events_dataframe, start_offset=0,
     # we're going to make a df with a hierarchical index.
     samples['orig_idx'] = samples.index
     midx = pd.MultiIndex.from_product([list(range(len(e_starts))), list(range(r_len))],
-        names=['event', 'onset'])
+                                      names=['event', 'onset'])
     # get all of the samples!
     # idxs = []
     df = pd.DataFrame()
     idx = 0
     for stime, etime in r_times.itertuples():
         # get the start time... add the number of indices that you want...
-        s_idx = np.where(samples.index > stime)[0][0]-1
+        s_idx = np.where(samples.index > stime)[0][0] - 1
         e_idx = s_idx + r_len - 1
         stime = samples.index[s_idx]
         etime = samples.index[e_idx]
@@ -85,6 +87,7 @@ def extract_event_ranges(samples, events_dataframe, start_offset=0,
     df.index = midx
     return df
 
+
 def extract_events(samples, events, offset=0, duration=0,
                    units='samples', borrow_attributes=[]):
     """ Extracts ranges from samples based on event timing and sample count.
@@ -136,39 +139,45 @@ def extract_events(samples, events, offset=0, duration=0,
     if units == TIME_UNITS:
         # get the indices for the first event (minus the first index), then use
         # the length of the first event as a template for all events
-        r_times = e_starts+offset
+        r_times = e_starts + offset
         ev_idxs = np.logical_and(samples.index <= r_times.iloc[0] + duration,
                                  samples.index > r_times.iloc[0])
         r_dur = len(np.where(ev_idxs)[0]) + 1
-        r_idxs = [np.where(samples.index > rt)[0][0]-1 for rt in r_times]
+        r_idxs = [np.where(samples.index > rt)[0][0] - 1 for rt in r_times]
         # sanity check - make sure no events start before the data, or end afterwards
         if any(r_times < samples.index[0]):
-            raise ValueError("at least one event range starts before the first sample")
+            raise ValueError(
+                "at least one event range starts before the first sample")
         if any(r_times > samples.index[-1]):
-            raise ValueError("at least one event range ends after the last sample")
+            raise ValueError(
+                "at least one event range ends after the last sample")
     elif units == SAMP_UNITS:
         # just find the indexes of the event starts, and offset by sample count
-        r_idxs = np.array([np.where(samples.index > et)[0][0]-1+offset for et in e_starts])
+        r_idxs = np.array([np.where(samples.index > et)[0]
+                           [0] - 1 + offset for et in e_starts])
         r_dur = duration
         if any(r_idxs < 0):
-            raise ValueError("at least one event range starts before the first sample")
+            raise ValueError(
+                "at least one event range starts before the first sample")
         if any(r_idxs >= len(samples)):
-            raise ValueError("at least one event range ends after the last sample")
+            raise ValueError(
+                "at least one event range ends after the last sample")
     else:
         raise ValueError("Not a valid unit!")
 
     # make a hierarchical index
     samples['orig_idx'] = samples.index
     midx = pd.MultiIndex.from_product([list(range(len(e_starts))), list(range(r_dur))],
-        names=['event', 'onset'])
+                                      names=['event', 'onset'])
     # get the samples
     df = pd.DataFrame()
     idx = 0
     for s_idx in r_idxs:
         # get the start time... add the number of indices that you want...
-        e_idx = s_idx + r_dur-1 # pandas.loc indexing is inclusive
+        e_idx = s_idx + r_dur - 1  # pandas.loc indexing is inclusive
         # this deepcopy is heavy handed... but gets around some early pandas bugs
-        new_df = deepcopy(samples.loc[samples.index[s_idx] : samples.index[e_idx]])
+        new_df = deepcopy(
+            samples.loc[samples.index[s_idx]: samples.index[e_idx]])
         for ba in borrow_attributes:
             new_df[ba] = events.iloc[idx].get(ba, float('nan'))
         df = pd.concat([df, new_df])

diff --git a/cili/models.py b/cili/models.py
@@ -2,12 +2,14 @@
 import pandas.io.pytables as pt
 from pandas.compat import u_safe as u, string_types, isidentifier
 
+
 class SaveMixin(object):
     """ Bakes in some save settings for NDFrame subclasses
-    
+
     You can still use the pandas methods, but for quick saving and loading
     this mixin provides some setting you might want to reuse.
     """
+
     def __init__(self, *args, **kwargs):
         super(SaveMixin, self).__init__(*args, **kwargs)
 
@@ -28,14 +30,17 @@ def load_saved(cls, save_path):
     def from_pd_obj(cls, pd_obj):
         return cls(pd_obj._data.copy()).__finalize__(pd_obj)
 
+
 class Samples(SaveMixin, pd.DataFrame):
     """Pandas DataFrame subclas for representing eye tracking timeseries data.
 
     Indexes may be hierarchical.
     """
+
     def __init__(self, *args, **kwargs):
         super(Samples, self).__init__(*args, **kwargs)
-
+
+
 class Events(object):
     """Pandas Panel-like object that gives you access to DataFrames via standard accessors.
 
@@ -48,6 +53,7 @@ class Events(object):
 
     Right now, the best way way to make one of these is to use Events.from_dict().
     """
+
     def __init__(self, *args, **kwargs):
         super(Events, self).__init__(*args, **kwargs)
         self.dframes = {}
@@ -62,14 +68,14 @@ def save(self, save_path):
     def load_saved(cls, save_path):
         obj = cls()
         s = pt.HDFStore(save_path)
-        obj.dframes = dict([(k[1:],s[k]) for k in list(s.keys())])
+        obj.dframes = dict([(k[1:], s[k]) for k in list(s.keys())])
         s.close()
         return obj
 
     @classmethod
     def from_dict(cls, the_d):
         """ Returns an Events instance containing the given DataFrames
-        
+
         Parameters
         ----------
         the_d (dict)
@@ -115,7 +121,7 @@ def from_list_of_dicts(cls, events_list):
     def _local_dir(self):
         """ add the string-like attributes from the info_axis """
         return [c for c in list(self.dframes.keys())
-            if isinstance(c, string_types) and isidentifier(c)]
+                if isinstance(c, string_types) and isidentifier(c)]
 
     def __dir__(self):
         """
@@ -137,6 +143,7 @@ def __getattr__(self, name):
         raise AttributeError("'%s' object has no attribute '%s'" %
                              (type(self).__name__, name))
 
+
 def initialize_hdf5():
-    pt._TYPE_MAP.update({Events:u('wide'), Samples:u('frame'),})
-    pt._AXES_MAP.update({Events:[1, 2], Samples:[0],})
+    pt._TYPE_MAP.update({Events: u('wide'), Samples: u('frame'), })
+    pt._AXES_MAP.update({Events: [1, 2], Samples: [0], })