cleaned

sronilsson · sronilsson · commit 7aaea3fae9bd · 2024-02-13T21:05:35.000Z
diff --git a/simba/mixins/circular_statistics.py b/simba/mixins/circular_statistics.py
@@ -607,7 +607,9 @@ def sliding_angular_diff(
 
     @staticmethod
     @njit("(float32[:], float64[:], int64)")
-    def agg_angular_diff_timebins(data: np.ndarray, time_windows: np.ndarray, fps: int) -> np.ndarray:
+    def agg_angular_diff_timebins(
+        data: np.ndarray, time_windows: np.ndarray, fps: int
+    ) -> np.ndarray:
         """
         Compute the difference between the median angle in the current time-window versus the previous time window.
         For example, computes the difference between the mean angle in the first 1s of the video versus
@@ -678,9 +680,14 @@ def rao_spacing(data: np.array):
         data = np.sort(data)
         Ti, TiL = np.full((data.shape[0]), np.nan), np.full((data.shape[0]), np.nan)
         l = np.int8(360 / len(data))
-        Ti[-1] = np.rad2deg(np.pi - np.abs(np.pi - np.abs(np.deg2rad(data[0]) - np.deg2rad(data[-1]))))
+        Ti[-1] = np.rad2deg(
+            np.pi - np.abs(np.pi - np.abs(np.deg2rad(data[0]) - np.deg2rad(data[-1])))
+        )
         for j in prange(data.shape[0] - 1, -1, -1):
-            Ti[j] = np.rad2deg(np.pi - np.abs(np.pi - np.abs(np.deg2rad(data[j]) - np.deg2rad(data[j - 1]))))
+            Ti[j] = np.rad2deg(
+                np.pi
+                - np.abs(np.pi - np.abs(np.deg2rad(data[j]) - np.deg2rad(data[j - 1])))
+            )
         for k in prange(Ti.shape[0]):
             TiL[int(k)] = max((l, Ti[k])) - min((l, Ti[k]))
         S = np.sum(TiL)
@@ -689,7 +696,9 @@ def rao_spacing(data: np.array):
 
     @staticmethod
     @njit("(float32[:], float64[:], int64)")
-    def sliding_rao_spacing(data: np.ndarray, time_windows: np.ndarray, fps: int) -> np.ndarray:
+    def sliding_rao_spacing(
+        data: np.ndarray, time_windows: np.ndarray, fps: int
+    ) -> np.ndarray:
         """
         Jitted compute of the uniformity of a circular dataset in sliding windows.
 
@@ -726,11 +735,24 @@ def sliding_rao_spacing(data: np.ndarray, time_windows: np.ndarray, fps: int) ->
             window_size = int(time_windows[win_cnt] * fps)
             for i in range(window_size, data.shape[0]):
                 w_data = np.sort(data[i - window_size : i])
-                Ti, TiL = np.full((w_data.shape[0]), np.nan), np.full((w_data.shape[0]), np.nan)
+                Ti, TiL = np.full((w_data.shape[0]), np.nan), np.full(
+                    (w_data.shape[0]), np.nan
+                )
                 l = np.int8(360 / len(w_data))
-                Ti[-1] = np.rad2deg(np.pi - np.abs(np.pi - np.abs(np.deg2rad(w_data[0]) - np.deg2rad(w_data[-1]))))
+                Ti[-1] = np.rad2deg(
+                    np.pi
+                    - np.abs(
+                        np.pi - np.abs(np.deg2rad(w_data[0]) - np.deg2rad(w_data[-1]))
+                    )
+                )
                 for j in prange(w_data.shape[0] - 1, -1, -1):
-                    Ti[j] = np.rad2deg(np.pi - np.abs(np.pi - np.abs(np.deg2rad(w_data[j]) - np.deg2rad(w_data[j - 1]))))
+                    Ti[j] = np.rad2deg(
+                        np.pi
+                        - np.abs(
+                            np.pi
+                            - np.abs(np.deg2rad(w_data[j]) - np.deg2rad(w_data[j - 1]))
+                        )
+                    )
                 for k in prange(Ti.shape[0]):
                     TiL[int(k)] = max((l, Ti[k])) - min((l, Ti[k]))
                 S = np.sum(TiL)
@@ -965,7 +987,9 @@ def circular_hotspots(data: np.ndarray, bins: np.ndarray) -> np.ndarray:
 
     @staticmethod
     @njit("(float32[:], int64[:, :], float64, float64)")
-    def sliding_circular_hotspots(data: np.ndarray, bins: np.ndarray, time_window: float, fps: float) -> np.ndarray:
+    def sliding_circular_hotspots(
+        data: np.ndarray, bins: np.ndarray, time_window: float, fps: float
+    ) -> np.ndarray:
         """
         Jitted compute of sliding circular hotspots in a dataset. Calculates circular hotspots in a time-series dataset by sliding a time window
         across the data and computing hotspot statistics for specified circular bins.
diff --git a/simba/mixins/timeseries_features_mixin.py b/simba/mixins/timeseries_features_mixin.py
@@ -76,7 +76,8 @@ def hjort_parameters(data: np.ndarray) -> (float, float, float):
         dx = np.diff(np.ascontiguousarray(data))
         ddx = np.diff(np.ascontiguousarray(dx))
         x_var, dx_var = np.var(data), np.var(dx)
-        if (x_var <= 0) or (dx_var <= 0): return 0, 0, 0
+        if (x_var <= 0) or (dx_var <= 0):
+            return 0, 0, 0
 
         ddx_var = np.var(ddx)
         mobility = np.sqrt(dx_var / x_var)
@@ -209,7 +210,9 @@ def crossings(data: np.ndarray, val: float) -> int:
 
     @staticmethod
     @njit("(float32[:], float64,  float64[:], int64,)")
-    def sliding_crossings(data: np.ndarray, val: float, time_windows: np.ndarray, fps: int) -> np.ndarray:
+    def sliding_crossings(
+        data: np.ndarray, val: float, time_windows: np.ndarray, fps: int
+    ) -> np.ndarray:
         """
         Compute the number of crossings over sliding windows in a data array.
 
@@ -258,7 +261,9 @@ def sliding_crossings(data: np.ndarray, val: float, time_windows: np.ndarray, fp
 
     @staticmethod
     @njit("(float32[:], int64, int64, )", cache=True, fastmath=True)
-    def percentile_difference(data: np.ndarray, upper_pct: int, lower_pct: int) -> float:
+    def percentile_difference(
+        data: np.ndarray, upper_pct: int, lower_pct: int
+    ) -> float:
         """
         Jitted compute of the difference between the ``upper`` and ``lower`` percentiles of the data as
         a percentage of the median value. Helps understanding the spread or variability of the data within specified percentiles.
@@ -281,13 +286,20 @@ def percentile_difference(data: np.ndarray, upper_pct: int, lower_pct: int) -> f
         >>> 0.7401574764125177
 
         """
-        upper_val, lower_val = np.percentile(data, upper_pct), np.percentile(data, lower_pct)
+        upper_val, lower_val = np.percentile(data, upper_pct), np.percentile(
+            data, lower_pct
+        )
         return np.abs(upper_val - lower_val) / np.median(data)
 
     @staticmethod
     @njit("(float32[:], int64, int64, float64[:], int64, )", cache=True, fastmath=True)
-    def sliding_percentile_difference(data: np.ndarray,upper_pct: int,lower_pct: int,window_sizes: np.ndarray, fps: int) -> np.ndarray:
-
+    def sliding_percentile_difference(
+        data: np.ndarray,
+        upper_pct: int,
+        lower_pct: int,
+        window_sizes: np.ndarray,
+        fps: int,
+    ) -> np.ndarray:
         """
         Jitted computes the difference between the upper and lower percentiles within a sliding window for each position
         in the time series using various window sizes. It returns a 2D array where each row corresponds to a position in the time series,
@@ -352,7 +364,9 @@ def percent_beyond_n_std(data: np.ndarray, n: float) -> float:
 
     @staticmethod
     @njit("(float64[:], float64, float64[:], int64,)", cache=True, fastmath=True)
-    def sliding_percent_beyond_n_std(data: np.ndarray, n: float, window_sizes: np.ndarray, sample_rate: int) -> np.ndarray:
+    def sliding_percent_beyond_n_std(
+        data: np.ndarray, n: float, window_sizes: np.ndarray, sample_rate: int
+    ) -> np.ndarray:
         """
         Computed the percentage of data points that exceed 'n' standard deviations from the mean for each position in
         the time series using various window sizes. It returns a 2D array where each row corresponds to a position in the time series,
@@ -1015,7 +1029,9 @@ def sliding_longest_strike(
             (float32[:], float64, int64, types.misc.Omitted(True)),
         ]
     )
-    def time_since_previous_threshold(data: np.ndarray, threshold: float, fps: int, above: bool) -> np.ndarray:
+    def time_since_previous_threshold(
+        data: np.ndarray, threshold: float, fps: int, above: bool
+    ) -> np.ndarray:
         """
         Jitted compute of the time (in seconds) that has elapsed since the last occurrence of a value above (or below)
         a specified threshold in a time series. The time series is assumed to have a constant sample rate.
@@ -1548,4 +1564,4 @@ def acceleration(
             else:
                 results[wS:wE] = v - pv
             pv = v
-        return results
+        return results
diff --git a/simba/unsupervised/grid_search_visualizers.py b/simba/unsupervised/grid_search_visualizers.py
@@ -10,9 +10,10 @@
 
 from simba.mixins.unsupervised_mixin import UnsupervisedMixin
 from simba.unsupervised.enums import Clustering, Unsupervised
-from simba.utils.checks import (check_if_dir_exists, check_if_filepath_list_is_empty)
-from simba.utils.printing import stdout_success
+from simba.utils.checks import (check_if_dir_exists,
+                                check_if_filepath_list_is_empty)
 from simba.utils.enums import Formats
+from simba.utils.printing import stdout_success
 
 
 class GridSearchVisualizer(UnsupervisedMixin):
@@ -36,7 +37,10 @@ def __init__(self, model_dir: str, save_dir: str, settings: dict):
         check_if_dir_exists(in_dir=model_dir)
         self.save_dir, self.settings, self.model_dir = save_dir, settings, model_dir
         self.data_path = glob.glob(model_dir + f"/*.{Formats.PICKLE.value}")
-        check_if_filepath_list_is_empty(filepaths=self.data_path, error_msg=f"SIMBA ERROR: No pickle files found in {model_dir}")
+        check_if_filepath_list_is_empty(
+            filepaths=self.data_path,
+            error_msg=f"SIMBA ERROR: No pickle files found in {model_dir}",
+        )
 
     def __join_data(self, data: object):
         embedding_data = pd.DataFrame(
@@ -127,7 +131,7 @@ def continuous_visualizer(self, continuous_vars: list):
         )
 
 
-settings = {'PALETTE': 'Pastel1'}
+settings = {"PALETTE": "Pastel1"}
 # test = GridSearchVisualizer(model_dir='/Users/simon/Desktop/envs/troubleshooting/unsupervised/cluster_models',
 #                             save_dir='/Users/simon/Desktop/envs/troubleshooting/unsupervised/images',
 #                             settings=settings)
diff --git a/simba/unsupervised/tsne.py b/simba/unsupervised/tsne.py
@@ -14,8 +14,9 @@
 from sklearn.manifold import TSNE
 
 import simba
-#from simba.misc_tools import SimbaTimer, check_file_exist_and_readable
-#from simba.utils.enums import Paths
+
+# from simba.misc_tools import SimbaTimer, check_file_exist_and_readable
+# from simba.utils.enums import Paths
 
 
 class TSNEGridSearch(object):
diff --git a/simba/unsupervised/ui.py b/simba/unsupervised/ui.py
@@ -25,7 +25,12 @@ class UnsupervisedGUI(ConfigReader, PopUpMixin):
 
     def __init__(self, config_path: str):
         ConfigReader.__init__(self, config_path=config_path)
-        PopUpMixin.__init__( self, title="UNSUPERVISED ANALYSIS", config_path=config_path, size=(1000, 800),)
+        PopUpMixin.__init__(
+            self,
+            title="UNSUPERVISED ANALYSIS",
+            config_path=config_path,
+            size=(1000, 800),
+        )
         self.main_frm = Toplevel()
         self.main_frm.minsize(1000, 800)
         self.main_frm.wm_title("UNSUPERVISED ANALYSIS")
@@ -38,27 +43,84 @@ def __init__(self, config_path: str):
         self.visualization_tab = ttk.Frame(self.main_frm)
         self.metrics_tab = ttk.Frame(self.main_frm)
 
-        self.main_frm.add(self.create_dataset_tab,text=f'{"[CREATE DATASET]": ^20s}',compound="left",image=self.menu_icons["features"]["img"])
-        self.main_frm.add(self.dimensionality_reduction_tab,text=f'{"[DIMENSIONALITY REDUCTION]": ^20s}',compound="left",image=self.menu_icons["dimensionality_reduction"]["img"])
-        self.main_frm.add(self.clustering_tab,text=f'{"[CLUSTERING]": ^20s}',compound="left",image=self.menu_icons["cluster"]["img"])
-        self.main_frm.add(self.visualization_tab,text=f'{"[VISUALIZATION]": ^20s}',compound="left",image=self.menu_icons["visualize"]["img"])
-        self.main_frm.add(self.metrics_tab, text=f'{"[METRICS]": ^20s}', compound="left", image=self.menu_icons["metrics"]["img"])
+        self.main_frm.add(
+            self.create_dataset_tab,
+            text=f'{"[CREATE DATASET]": ^20s}',
+            compound="left",
+            image=self.menu_icons["features"]["img"],
+        )
+        self.main_frm.add(
+            self.dimensionality_reduction_tab,
+            text=f'{"[DIMENSIONALITY REDUCTION]": ^20s}',
+            compound="left",
+            image=self.menu_icons["dimensionality_reduction"]["img"],
+        )
+        self.main_frm.add(
+            self.clustering_tab,
+            text=f'{"[CLUSTERING]": ^20s}',
+            compound="left",
+            image=self.menu_icons["cluster"]["img"],
+        )
+        self.main_frm.add(
+            self.visualization_tab,
+            text=f'{"[VISUALIZATION]": ^20s}',
+            compound="left",
+            image=self.menu_icons["visualize"]["img"],
+        )
+        self.main_frm.add(
+            self.metrics_tab,
+            text=f'{"[METRICS]": ^20s}',
+            compound="left",
+            image=self.menu_icons["metrics"]["img"],
+        )
         self.main_frm.grid(row=0)
 
         self.clf_slice_options = [f"ALL CLASSIFIERS ({len(self.clf_names)})"]
-        for clf_name in self.clf_names: self.clf_slice_options.append(f"{clf_name}")
-        create_dataset_frm = LabelFrame(self.create_dataset_tab,text="CREATE DATASET",pady=5,padx=5,font=Formats.LABELFRAME_HEADER_FORMAT.value,fg="black")
-        self.feature_file_selected = FileSelect(create_dataset_frm, "FEATURE FILE (CSV)", lblwidth=25)
-        self.data_slice_dropdown = DropDownMenu(create_dataset_frm,"FEATURE SLICE:",UMLOptions.FEATURE_SLICE_OPTIONS.value,"25",com=lambda x: self.change_status_of_file_select())
+        for clf_name in self.clf_names:
+            self.clf_slice_options.append(f"{clf_name}")
+        create_dataset_frm = LabelFrame(
+            self.create_dataset_tab,
+            text="CREATE DATASET",
+            pady=5,
+            padx=5,
+            font=Formats.LABELFRAME_HEADER_FORMAT.value,
+            fg="black",
+        )
+        self.feature_file_selected = FileSelect(
+            create_dataset_frm, "FEATURE FILE (CSV)", lblwidth=25
+        )
+        self.data_slice_dropdown = DropDownMenu(
+            create_dataset_frm,
+            "FEATURE SLICE:",
+            UMLOptions.FEATURE_SLICE_OPTIONS.value,
+            "25",
+            com=lambda x: self.change_status_of_file_select(),
+        )
         self.data_slice_dropdown.setChoices(UMLOptions.FEATURE_SLICE_OPTIONS.value[0])
-        self.clf_slice_dropdown = DropDownMenu(create_dataset_frm, "CLASSIFIER SLICE:", self.clf_slice_options, "25")
+        self.clf_slice_dropdown = DropDownMenu(
+            create_dataset_frm, "CLASSIFIER SLICE:", self.clf_slice_options, "25"
+        )
         self.clf_slice_dropdown.setChoices(self.clf_slice_options[0])
         self.change_status_of_file_select()
-        self.bout_dropdown = DropDownMenu(create_dataset_frm,"BOUT AGGREGATION METHOD:",UMLOptions.BOUT_AGGREGATION_METHODS.value,"25")
-        self.bout_dropdown.setChoices(choice=UMLOptions.BOUT_AGGREGATION_METHODS.value[0])
-        self.min_bout_length = Entry_Box(create_dataset_frm, "MINIMUM BOUT LENGTH (MS): ", "25", validation="numeric")
+        self.bout_dropdown = DropDownMenu(
+            create_dataset_frm,
+            "BOUT AGGREGATION METHOD:",
+            UMLOptions.BOUT_AGGREGATION_METHODS.value,
+            "25",
+        )
+        self.bout_dropdown.setChoices(
+            choice=UMLOptions.BOUT_AGGREGATION_METHODS.value[0]
+        )
+        self.min_bout_length = Entry_Box(
+            create_dataset_frm, "MINIMUM BOUT LENGTH (MS): ", "25", validation="numeric"
+        )
         self.min_bout_length.entry_set(val=0)
-        self.create_btn = Button(create_dataset_frm,text="CREATE DATASET",fg="blue",command=lambda: self.create_dataset())
+        self.create_btn = Button(
+            create_dataset_frm,
+            text="CREATE DATASET",
+            fg="blue",
+            command=lambda: self.create_dataset(),
+        )
 
         create_dataset_frm.grid(row=0, column=0, sticky=NW)
         self.data_slice_dropdown.grid(row=0, column=0, sticky=NW)