SpikeInterface
diff --git a/‎.github/scripts/test_kilosort4_ci.py
Lines changed: 6 additions & 0 deletions b/‎.github/scripts/test_kilosort4_ci.py
Lines changed: 6 additions & 0 deletions
diff --git a/‎doc/how_to/combine_recordings.rst
Lines changed: 1 addition & 1 deletion b/‎doc/how_to/combine_recordings.rst
Lines changed: 1 addition & 1 deletion
diff --git a/‎doc/how_to/handle_drift.rst
Lines changed: 26 additions & 32 deletions b/‎doc/how_to/handle_drift.rst
Lines changed: 26 additions & 32 deletions
diff --git a/‎doc/how_to/index.rst
Lines changed: 1 addition & 0 deletions b/‎doc/how_to/index.rst
Lines changed: 1 addition & 0 deletions
diff --git a/‎doc/how_to/physical_units.rst
Lines changed: 107 additions & 0 deletions b/‎doc/how_to/physical_units.rst
Lines changed: 107 additions & 0 deletions
diff --git a/‎examples/how_to/handle_drift.py
Lines changed: 22 additions & 20 deletions b/‎examples/how_to/handle_drift.py
Lines changed: 22 additions & 20 deletions
diff --git a/‎examples/tutorials/core/plot_4_sorting_analyzer.py
Lines changed: 1 addition & 2 deletions b/‎examples/tutorials/core/plot_4_sorting_analyzer.py
Lines changed: 1 addition & 2 deletions
diff --git a/‎examples/tutorials/extractors/plot_1_read_various_formats.py
Lines changed: 1 addition & 6 deletions b/‎examples/tutorials/extractors/plot_1_read_various_formats.py
Lines changed: 1 addition & 6 deletions
diff --git a/‎pyproject.toml
Lines changed: 2 additions & 2 deletions b/‎pyproject.toml
Lines changed: 2 additions & 2 deletions
diff --git a/‎src/spikeinterface/benchmark/benchmark_base.py
Lines changed: 15 additions & 6 deletions b/‎src/spikeinterface/benchmark/benchmark_base.py
Lines changed: 15 additions & 6 deletions
@@ -112,6 +112,10 @@
     PARAMS_TO_TEST_DICT.update({"cluster_neighbors": 11})
     PARAMETERS_NOT_AFFECTING_RESULTS.append("cluster_neighbors")
 
+if parse(kilosort.__version__) >= parse("4.0.37"):
+    PARAMS_TO_TEST_DICT.update({"max_cluster_subset": 20})
+    PARAMETERS_NOT_AFFECTING_RESULTS.append("max_cluster_subset")
+
 
 PARAMS_TO_TEST = list(PARAMS_TO_TEST_DICT.keys())
 
@@ -254,6 +258,8 @@ def test_initialize_ops_arguments(self):
             "device",
             "save_preprocessed_copy",
         ]
+        if parse(kilosort.__version__) >= parse("4.0.37"):
+            expected_arguments += ["gui_mode"]
 
         self._check_arguments(
             initialize_ops,
 
@@ -4,7 +4,7 @@ Combine recordings in SpikeInterface
 In this tutorial we will walk through combining multiple recording objects. Sometimes this occurs due to hardware
 settings (e.g. Intan software has a default setting of new files every 1 minute) or the experimenter decides to
 split their recording into multiple files for different experimental conditions. If the probe has not been moved,
-however, then during sorting it would likely make sense to combine these individual reocrding objects into one
+however, then during sorting it would likely make sense to combine these individual recording objects into one
 recording object.
 
 **Why Combine?**
 
@@ -1455,38 +1455,32 @@ Plot the results
 We load back the results and use the widgets module to explore the
 estimated drift motion.
 
-For all methods we have 4 plots: \* top left: time vs estimated peak
-depth \* top right: time vs peak depth after motion correction \* bottom
-left: the average motion vector across depths and all motion across
-spatial depths (for non-rigid estimation) \* bottom right: if motion
-correction is non rigid, the motion vector across depths is plotted as a
-map, with the color code representing the motion in micrometers.
-
-A few comments on the figures: \* the preset **‘rigid_fast’** has only
-one motion vector for the entire probe because it is a “rigid” case. The
-motion amplitude is globally underestimated because it averages across
-depths. However, the corrected peaks are flatter than the non-corrected
-ones, so the job is partially done. The big jump at=600s when the probe
-start moving is recovered quite well. \* The preset **kilosort_like**
-gives better results because it is a non-rigid case. The motion vector
-is computed for different depths. The corrected peak locations are
-flatter than the rigid case. The motion vector map is still be a bit
-noisy at some depths (e.g around 1000um). \* The preset **dredge** is
-offcial DREDge re-implementation in spikeinterface. It give the best
-result : very fast and smooth motion estimation. Very few noise. This
-method also capture very well the non rigid motion gradient along the
-probe. The best method on the market at the moement. An enormous thanks
-to the dream team : Charlie Windolf, Julien Boussard, Erdem Varol, Liam
-Paninski. Note that in the first part of the recording before the
-imposed motion (0-600s) we clearly have a non-rigid motion: the upper
-part of the probe (2000-3000um) experience some drifts, but the lower
-part (0-1000um) is relatively stable. The method defined by this preset
-is able to capture this. \* The preset **nonrigid_accurate** this is the
-ancestor of “dredge” before it was published. It seems to give the good
-results on this recording but with bit more noise. \* The preset
-**dredge_fast** similar than dredge but faster (using grid_convolution).
-\* The preset **nonrigid_fast_and_accurate** a variant of
-nonrigid_accurate but faster (using grid_convolution).
+For all methods we have 4 plots:
+    * top left: time vs estimated peak
+    * top right: time vs peak depth after motion correction
+    * bottom left: the average motion vector across depths and all motion across spatial depths (for non-rigid estimation)
+    * bottom right: if motion correction is non rigid, the motion vector across depths is plotted as a map,
+      with the color code representing the motion in micrometers.
+
+A few comments on the figures:
+    * the preset **‘rigid_fast’** has only one motion vector for the entire probe because it is a “rigid” case. The
+      motion amplitude is globally underestimated because it averages across depths. However, the corrected peaks
+      are flatter than the non-corrected ones, so the job is partially done. The big jump at=600s when the probe start
+      moving is recovered quite well.
+    * The preset **kilosort_like** gives better results because it is a non-rigid case. The motion vector is computed
+      for different depths. The corrected peak locations are flatter than the rigid case. The motion vector map is still
+      be a bit noisy at some depths (e.g around 1000um).
+    * The preset **dredge** is official DREDge re-implementation in spikeinterface. It give the best result : very fast
+      and smooth motion estimation. Very few noise. This method also capture very well the non rigid motion gradient along
+      the probe. The best method on the market at the moement. An enormous thanks to the dream team : Charlie Windolf,
+      Julien Boussard, Erdem Varol, Liam Paninski. Note that in the first part of the recording before the imposed motion
+      (0-600s) we clearly have a non-rigid motion: the upper part of the probe (2000-3000um) experience some drifts, but the
+      lower part (0-1000um) is relatively stable. The method defined by this preset is able to capture this.
+    * The preset **nonrigid_accurate** this is the ancestor of “dredge” before it was published. It seems to give good results
+      on this recording but with bit more noise.
+    * The preset **dredge_fast** similar than dredge but faster (using grid_convolution).
+    * The preset **nonrigid_fast_and_accurate** a variant of
+      nonrigid_accurate but faster (using grid_convolution).
 
 .. code:: ipython3
 
 
@@ -17,4 +17,5 @@ Guides on how to solve specific, short problems in SpikeInterface. Learn how to.
     drift_with_lfp
     auto_curation_training
     auto_curation_prediction
+    physical_units
     customize_a_plot
@@ -0,0 +1,107 @@
+Working with physical units in SpikeInterface recordings
+========================================================
+
+In neurophysiology recordings, data is often stored in raw ADC (Analog-to-Digital Converter) integer values but needs to be analyzed in physical units.
+For extracellular recordings, this is typically microvolts (µV), but some recording devices may use different physical units.
+SpikeInterface provides tools to handle both situations.
+
+It's important to note that **most spike sorters work fine on raw digital (ADC) units** and scaling is not needed. Going a step further, some sorters, such as Kilosort 3, require their input to be in raw ADC units.
+The specific behavior however depends on the spike sorter, so it's important to understand the specific input requirements on a case per case basis.
+
+Many preprocessing tools are also linear transformations, and if the ADC is implemented as a linear transformation which is fairly common, then the overall effect can be preserved.
+That is, **preprocessing steps can often be applied either before or after unit conversion without affecting the outcome.**. That being said, there are rough edges to this approach.
+preprocessing algorithms like filtering, whitening, centering, interpolation and common reference require casting to float within the pipeline. We advise users to experiment
+with different approaches to find the best one for their specific use case.
+
+
+Therefore, **it is usually safe to work in raw ADC integer values unless a specific tool or analysis requires physical units**.
+If you are interested in visualizations, comparability across devices, or outputs with interpretable physical scales (e.g., microvolts), converting to physical units is recommended.
+Otherwise, remaining in raw units can simplify processing and preserve performance.
+
+Understanding Physical Units
+----------------------------
+
+Most recording devices store data in ADC units (integers) to save space and preserve the raw data.
+To convert these values to physical units, two parameters are needed:
+
+* **gain**: A multiplicative factor to scale the raw values
+* **offset**: An additive factor to shift the values
+
+The conversion formula is:
+
+.. code-block:: text
+
+    physical_value = raw_value * gain + offset
+
+
+Converting to Physical Units
+----------------------------
+
+SpikeInterface provides two preprocessing classes for converting recordings to physical units. Both wrap the
+``RecordingExtractor`` class and ensures that the data is returned in physical units when calling `get_traces <https://spikeinterface.readthedocs.io/en/stable/api.html#spikeinterface.core.BaseRecording.get_traces>`_
+
+1. ``scale_to_uV``: The primary function for extracellular recordings. SpikeInterface is centered around
+    extracellular recordings, and this function is designed to convert the data to microvolts (µV).
+2. ``scale_to_physical_units``: A general function for any physical unit conversion. This will allow you to extract the data in any
+    physical unit, not just microvolts. This is useful for other types of recordings, such as force measurements in Newtons but should be
+    handled with care.
+
+For most users working with extracellular recordings, ``scale_to_uV`` is the recommended choice if they want to work in physical units:
+
+.. code-block:: python
+
+    from spikeinterface.extractors import read_intan
+    from spikeinterface.preprocessing import scale_to_uV
+
+    # Load recording (data is in ADC units)
+    recording = read_intan("path/to/file.rhs")
+
+    # Convert to microvolts
+    recording_uv = scale_to_uV(recording)
+
+For recordings with non-standard units (e.g., force measurements in Newtons), use ``scale_to_physical_units``:
+
+.. code-block:: python
+
+    from spikeinterface.preprocessing import scale_to_physical_units
+
+    # Convert to physical units (whatever they may be)
+    recording_physical = scale_to_physical_units(recording)
+
+Both preprocessors automatically:
+
+1. Detect the appropriate gain and offset from the recording properties
+2. Apply the conversion to all channels
+3. Update the recording properties to reflect that data is now in physical units
+
+Setting Custom Physical Units
+-----------------------------
+
+While most extractors automatically set the appropriate ``gain_to_uV`` and ``offset_to_uV`` values,
+there might be cases where you want to set custom physical units. In these cases, you can set
+the following properties:
+
+* ``physical_unit``: The target physical unit (e.g., 'uV', 'mV', 'N')
+* ``gain_to_unit``: The gain to convert from raw values to the target unit
+* ``offset_to_unit``: The offset to convert from raw values to the target unit
+
+You need to set these properties for every channel, which allows for the case when there are different gains and offsets on different channels. Here's an example:
+
+.. code-block:: python
+
+    # Set custom physical units
+    num_channels = recording.get_num_channels()
+    values = ["volts"] * num_channels
+    recording.set_property(key='physical_unit', values=values)
+
+    gain_values = [0.001] * num_channels  # Convert from ADC to volts
+    recording.set_property(key='gain_to_unit', values=gain_values)  # Convert to volts
+
+    offset_values = [0] * num_channels  # No offset
+    recording.set_property(key='offset_to_unit', values=offset_values)  # No offset
+
+    # Apply the conversion using scale_to_physical_units
+    recording_physical = scale_to_physical_units(recording)
+
+This approach gives you full control over the unit conversion process while maintaining
+compatibility with SpikeInterface's preprocessing pipeline.
@@ -133,32 +133,33 @@ def preprocess_chain(rec):
 # We load back the results and use the widgets module to explore the estimated drift motion.
 #
 # For all methods we have 4 plots:
+#
 #   * top left: time vs estimated peak depth
 #   * top right: time vs peak depth after motion correction
 #   * bottom left: the average motion vector across depths and all motion across spatial depths (for non-rigid estimation)
 #   * bottom right: if motion correction is non rigid, the motion vector across depths is plotted as a map, with the color code representing the motion in micrometers.
 #
 # A few comments on the figures:
-# * the preset **'rigid_fast'** has only one motion vector for the entire probe because it is a "rigid" case.
-#   The motion amplitude is globally underestimated because it averages across depths.
-#   However, the corrected peaks are flatter than the non-corrected ones, so the job is partially done.
-#   The big jump at=600s when the probe start moving is recovered quite well.
-# * The preset **kilosort_like** gives better results because it is a non-rigid case.
-#   The motion vector is computed for different depths.
-#   The corrected peak locations are flatter than the rigid case.
-#   The motion vector map is still be a bit noisy at some depths (e.g around 1000um).
-# * The preset **dredge** is offcial DREDge re-implementation in spikeinterface.
-#   It give the best result : very fast and smooth motion estimation. Very few noise.
-#   This method also capture very well the non rigid motion gradient along the probe.
-#   The best method on the market at the moement.
-#   An enormous thanks to the dream team :  Charlie Windolf, Julien Boussard, Erdem Varol, Liam Paninski.
-#   Note that in the first part of the recording before the imposed motion (0-600s) we clearly have a non-rigid motion:
-#   the upper part of the probe (2000-3000um) experience some drifts, but the lower part (0-1000um) is relatively stable.
-#   The method defined by this preset is able to capture this.
-# * The preset **nonrigid_accurate** this is the ancestor of "dredge" before it was published.
-#   It seems to give the good results on this recording but with bit more noise.
-# * The preset **dredge_fast** similar than dredge but faster (using grid_convolution).
-# * The preset **nonrigid_fast_and_accurate** a variant of nonrigid_accurate but faster (using grid_convolution).
+#   * the preset **'rigid_fast'** has only one motion vector for the entire probe because it is a "rigid" case.
+#     The motion amplitude is globally underestimated because it averages across depths.
+#     However, the corrected peaks are flatter than the non-corrected ones, so the job is partially done.
+#     The big jump at=600s when the probe start moving is recovered quite well.
+#   * The preset **kilosort_like** gives better results because it is a non-rigid case.
+#     The motion vector is computed for different depths.
+#     The corrected peak locations are flatter than the rigid case.
+#     The motion vector map is still be a bit noisy at some depths (e.g around 1000um).
+#   * The preset **dredge** is offcial DREDge re-implementation in spikeinterface.
+#     It give the best result : very fast and smooth motion estimation. Very few noise.
+#     This method also capture very well the non rigid motion gradient along the probe.
+#     The best method on the market at the moement.
+#     An enormous thanks to the dream team :  Charlie Windolf, Julien Boussard, Erdem Varol, Liam Paninski.
+#     Note that in the first part of the recording before the imposed motion (0-600s) we clearly have a non-rigid motion:
+#     the upper part of the probe (2000-3000um) experience some drifts, but the lower part (0-1000um) is relatively stable.
+#     The method defined by this preset is able to capture this.
+#   * The preset **nonrigid_accurate** this is the ancestor of "dredge" before it was published.
+#     It seems to give the good results on this recording but with bit more noise.
+#   * The preset **dredge_fast** similar than dredge but faster (using grid_convolution).
+#   * The preset **nonrigid_fast_and_accurate** a variant of nonrigid_accurate but faster (using grid_convolution).
 #
 #
 
@@ -205,6 +206,7 @@ def preprocess_chain(rec):
 # We can see here that some clusters seem to be more compact on the 'y' axis, especially for the preset "nonrigid_accurate".
 #
 # Be aware that there are two ways to correct for the motion:
+#
 #   1. Interpolate traces and detect/localize peaks again  (`interpolate_recording()`)
 #   2. Compensate for drifts directly on peak locations (`correct_motion_on_peaks()`)
 #
 
@@ -43,9 +43,8 @@
 ##############################################################################
 # Let's now instantiate the recording and sorting objects:
 
-recording = se.MEArecRecordingExtractor(local_path)
+recording, sorting = se.read_mearec(local_path)
 print(recording)
-sorting = se.MEArecSortingExtractor(local_path)
 print(sorting)
 
 ###############################################################################
 
@@ -61,7 +61,7 @@
 # :py:class:`~spikeinterface.extractors.Spike2RecordingExtractor` object:
 #
 
-recording = se.Spike2RecordingExtractor(spike2_file_path, stream_id="0")
+recording = se.read_spike2(spike2_file_path, stream_id="0")
 print(recording)
 
 ##############################################################################
@@ -75,11 +75,6 @@
 print(sorting)
 print(type(sorting))
 
-##############################################################################
-#  The :py:func:`~spikeinterface.extractors.read_mearec` function is equivalent to:
-
-recording = se.MEArecRecordingExtractor(mearec_folder_path)
-sorting = se.MEArecSortingExtractor(mearec_folder_path)
 
 ##############################################################################
 # SI objects (:py:class:`~spikeinterface.core.BaseRecording` and :py:class:`~spikeinterface.core.BaseSorting`)
 
@@ -120,7 +120,7 @@ qualitymetrics = [
 ]
 
 test_core = [
-    "pytest",
+    "pytest<8.4.0",
     "pytest-dependency",
     "psutil",
 
@@ -146,7 +146,7 @@ test_preprocessing = [
 
 
 test = [
-    "pytest",
+    "pytest<8.4.0",
     "pytest-dependency",
     "pytest-cov",
     "psutil",
 
@@ -134,8 +134,6 @@ def create(cls, study_folder, datasets={}, cases={}, levels=None):
                 else:
                     analyzer = data
 
-                rec, gt_sorting = analyzer.recording, analyzer.sorting
-
             analyzers_path[key] = str(analyzer.folder.resolve())
 
             # recordings are pickled
@@ -180,7 +178,11 @@ def scan_folder(self):
             self.analyzers[key] = analyzer
             # the sorting is in memory here we take the saved one because comparisons need to pickle it later
             sorting = load(analyzer.folder / "sorting")
-            self.datasets[key] = analyzer.recording, sorting
+            if analyzer.has_recording():
+                recording = analyzer.recording
+            else:
+                recording = None
+            self.datasets[key] = recording, sorting
 
         with open(self.folder / "cases.pickle", "rb") as f:
             self.cases = pickle.load(f)
@@ -594,15 +596,22 @@ def load_folder(cls, folder):
             elif format == "sorting":
                 from spikeinterface.core import load_extractor
 
-                result[k] = load(folder / k)
+                sorting_folder = folder / k
+                if sorting_folder.exists():
+                    result[k] = load(sorting_folder)
             elif format == "Motion":
                 from spikeinterface.core.motion import Motion
 
-                result[k] = Motion.load(folder / k)
+                motion_folder = folder / k
+                if motion_folder.exists():
+                    result[k] = Motion.load(motion_folder)
             elif format == "zarr_templates":
                 from spikeinterface.core.template import Templates
 
-                result[k] = Templates.from_zarr(folder / k)
+                zarr_folder = folder / k
+                if zarr_folder.exists():
+
+                    result[k] = Templates.from_zarr(zarr_folder)
 
         return result