SpikeInterface · samuelgarcia · Jun 17, 2025 · Feb 25, 2025 · Feb 25, 2025 · Feb 25, 2025
diff --git a/src/spikeinterface/core/recording_tools.py b/src/spikeinterface/core/recording_tools.py
@@ -541,7 +541,7 @@ def get_random_recording_slices(
     chunk_duration : str | float | None, default "500ms"
         The duration of each chunk in 's' or 'ms'
     chunk_size : int | None
-        Size of a chunk in number of frames. This is ued only if chunk_duration is None.
+        Size of a chunk in number of frames. This is used only if chunk_duration is None.
         This is kept for backward compatibility, you should prefer 'chunk_duration=500ms' instead.
     concatenated : bool, default: True
         If True chunk are concatenated along time axis

diff --git a/src/spikeinterface/sorters/internal/spyking_circus2.py b/src/spikeinterface/sorters/internal/spyking_circus2.py
@@ -15,6 +15,7 @@
     cache_preprocessing,
     get_prototype_and_waveforms_from_recording,
     get_shuffled_recording_slices,
+    set_optimal_chunk_size,
 )
 from spikeinterface.core.basesorting import minimum_spike_dtype
 from spikeinterface.core.sparsity import compute_sparsity
@@ -44,8 +45,9 @@ class Spykingcircus2Sorter(ComponentsBasedSorter):
         "apply_preprocessing": True,
         "matched_filtering": True,
         "cache_preprocessing": {"mode": "memory", "memory_limit": 0.5, "delete_cache": True},
+        "chunk_preprocessing": {"memory_limit": 0.01},
         "multi_units_only": False,
-        "job_kwargs": {"n_jobs": 0.5},
+        "job_kwargs": {"n_jobs": 0.75},
         "seed": 42,
         "debug": False,
     }
@@ -73,6 +75,8 @@ class Spykingcircus2Sorter(ComponentsBasedSorter):
         "matched_filtering": "Boolean to specify whether circus 2 should detect peaks via matched filtering (slightly slower)",
         "cache_preprocessing": "How to cache the preprocessed recording. Mode can be memory, file, zarr, with extra arguments. In case of memory (default), \
                          memory_limit will control how much RAM can be used. In case of folder or zarr, delete_cache controls if cache is cleaned after sorting",
+        "chunk_preprocessing": "How much RAM (approximately) should be devoted to load data chunks. memory_limit will control how much RAM can be used\
+                         as a fraction of available memory. Otherwise, use total_memory to fix a hard limit",
         "multi_units_only": "Boolean to get only multi units activity (i.e. one template per electrode)",
         "job_kwargs": "A dictionary to specify how many jobs and which parameters they should used",
         "seed": "An int to control how chunks are shuffled while detecting peaks",
@@ -115,8 +119,8 @@ def _run_from_folder(cls, sorter_output_folder, params, verbose):
 
         job_kwargs = fix_job_kwargs(params["job_kwargs"])
         job_kwargs.update({"progress_bar": verbose})
-
         recording = cls.load_recording_from_folder(sorter_output_folder.parent, with_warnings=False)
+        job_kwargs = set_optimal_chunk_size(recording, job_kwargs, **params["chunk_preprocessing"])
 
         sampling_frequency = recording.get_sampling_frequency()
         num_channels = recording.get_num_channels()
@@ -343,8 +347,7 @@ def _run_from_folder(cls, sorter_output_folder, params, verbose):
             shutil.rmtree(sorting_folder)
 
         merging_params = params["merging"].copy()
-        if params["debug"]:
-            merging_params["debug_folder"] = sorter_output_folder / "merging"
+        merging_params["debug_folder"] = sorter_output_folder / "merging"
 
         if len(merging_params) > 0:
             if params["motion_correction"] and motion_folder is not None:
@@ -365,7 +368,10 @@ def _run_from_folder(cls, sorter_output_folder, params, verbose):
                 sorting.save(folder=curation_folder)
                 # np.save(fitting_folder / "amplitudes", guessed_amplitudes)
 
-            sorting = final_cleaning_circus(recording_w, sorting, templates, **merging_params, **job_kwargs)
+            final_analyzer = final_cleaning_circus(recording_w, sorting, templates, **merging_params, **job_kwargs)
+            final_analyzer.save_as(format="binary_folder", folder=sorter_output_folder / "final_analyzer")
+
+            sorting = final_analyzer.sorting
 
             if verbose:
                 print(f"Kept {len(sorting.unit_ids)} units after final merging")
@@ -424,4 +430,5 @@ def final_cleaning_circus(
         sparsity_overlap=sparsity_overlap,
         **job_kwargs,
     )
-    return final_sa.sorting
+
+    return final_sa
diff --git a/src/spikeinterface/sortingcomponents/clustering/circus.py b/src/spikeinterface/sortingcomponents/clustering/circus.py
@@ -23,7 +23,7 @@
 from spikeinterface.sortingcomponents.waveforms.hanning_filter import HanningFilter
 from spikeinterface.core.template import Templates
 from spikeinterface.core.sparsity import compute_sparsity
-from spikeinterface.sortingcomponents.tools import remove_empty_templates
+from spikeinterface.sortingcomponents.tools import remove_empty_templates, get_optimal_n_jobs
 import pickle, json
 from spikeinterface.core.node_pipeline import (
     run_node_pipeline,
@@ -66,6 +66,7 @@ class CircusClustering:
         "tmp_folder": None,
         "verbose": True,
         "debug": False,
+        "memory_limit": 0.25,
     }
 
     @classmethod
@@ -257,6 +258,10 @@ def main_function(cls, recording, peaks, params, job_kwargs=dict()):
         if params["noise_levels"] is None:
             params["noise_levels"] = get_noise_levels(recording, return_scaled=False, **job_kwargs)
 
+        job_kwargs_local = job_kwargs.copy()
+        ram_requested = recording.get_num_channels() * (nbefore + nafter) * len(unit_ids) * 4
+        job_kwargs_local = get_optimal_n_jobs(job_kwargs_local, ram_requested, params["memory_limit"])
+
         templates_array = estimate_templates(
             recording,
             spikes,
@@ -265,7 +270,7 @@ def main_function(cls, recording, peaks, params, job_kwargs=dict()):
             nafter,
             return_scaled=False,
             job_name=None,
-            **job_kwargs,
+            **job_kwargs_local,
         )
 
         best_channels = np.argmax(np.abs(templates_array[:, nbefore, :]), axis=1)

diff --git a/src/spikeinterface/sortingcomponents/matching/circus.py b/src/spikeinterface/sortingcomponents/matching/circus.py
@@ -50,11 +50,16 @@ def compress_templates(
     if remove_mean:
         templates_array -= templates_array.mean(axis=(1, 2))[:, None, None]
 
-    temporal, singular, spatial = np.linalg.svd(templates_array, full_matrices=False)
-    # Keep only the strongest components
-    temporal = temporal[:, :, :approx_rank].astype(np.float32)
-    singular = singular[:, :approx_rank].astype(np.float32)
-    spatial = spatial[:, :approx_rank, :].astype(np.float32)
+    num_templates, num_samples, num_channels = templates_array.shape
+    temporal = np.zeros((num_templates, num_samples, approx_rank), dtype=np.float32)
+    spatial = np.zeros((num_templates, approx_rank, num_channels), dtype=np.float32)
+    singular = np.zeros((num_templates, approx_rank), dtype=np.float32)
+
+    for i in range(num_templates):
+        i_temporal, i_singular, i_spatial = np.linalg.svd(templates_array[i], full_matrices=False)
+        temporal[i, :, : min(approx_rank, num_channels)] = i_temporal[:, :approx_rank]
+        spatial[i, : min(approx_rank, num_channels), :] = i_spatial[:approx_rank, :]
+        singular[i, : min(approx_rank, num_channels)] = i_singular[:approx_rank]
 
     if return_new_templates:
         templates_array = np.matmul(temporal * singular[:, np.newaxis, :], spatial)

diff --git a/src/spikeinterface/sortingcomponents/tools.py b/src/spikeinterface/sortingcomponents/tools.py
@@ -12,7 +12,7 @@
 from spikeinterface.core.sparsity import ChannelSparsity
 from spikeinterface.core.template import Templates
 from spikeinterface.core.waveform_tools import extract_waveforms_to_single_buffer
-from spikeinterface.core.job_tools import split_job_kwargs
+from spikeinterface.core.job_tools import split_job_kwargs, fix_job_kwargs
 from spikeinterface.core.sortinganalyzer import create_sorting_analyzer
 from spikeinterface.core.sparsity import ChannelSparsity
 from spikeinterface.core.analyzer_extension_core import ComputeTemplates
@@ -249,19 +249,132 @@ def check_probe_for_drift_correction(recording, dist_x_max=60):
         return True
 
 
-def cache_preprocessing(recording, mode="memory", memory_limit=0.5, delete_cache=True, **extra_kwargs):
-    save_kwargs, job_kwargs = split_job_kwargs(extra_kwargs)
+def set_optimal_chunk_size(recording, job_kwargs, memory_limit=0.5, total_memory=None):
+    """
+    Set the optimal chunk size for a job given the memory_limit and the number of jobs
 
-    if mode == "memory":
+    Parameters
+    ----------
+
+    recording: Recording
+        The recording object
+    job_kwargs: dict
+        The job kwargs
+    memory_limit: float
+        The memory limit in fraction of available memory
+    total_memory: str, Default None
+        The total memory to use for the job in bytes
+
+    Returns
+    -------
+
+    job_kwargs: dict
+        The updated job kwargs
+    """
+    job_kwargs = fix_job_kwargs(job_kwargs)
+    n_jobs = job_kwargs["n_jobs"]
+    if total_memory is None:
         if HAVE_PSUTIL:
             assert 0 < memory_limit < 1, "memory_limit should be in ]0, 1["
             memory_usage = memory_limit * psutil.virtual_memory().available
-            if recording.get_total_memory_size() < memory_usage:
+            num_channels = recording.get_num_channels()
+            dtype_size_bytes = recording.get_dtype().itemsize
+            chunk_size = memory_usage / ((num_channels * dtype_size_bytes) * n_jobs)
+            chunk_duration = chunk_size / recording.get_sampling_frequency()
+            job_kwargs = fix_job_kwargs(dict(chunk_duration=f"{chunk_duration}s"))
+        else:
+            print("psutil is required to use only a fraction of available memory")
+    else:
+        from spikeinterface.core.job_tools import convert_string_to_bytes
+
+        total_memory = convert_string_to_bytes(total_memory)
+        num_channels = recording.get_num_channels()
+        dtype_size_bytes = recording.get_dtype().itemsize
+        chunk_size = (num_channels * dtype_size_bytes) * n_jobs / total_memory
+        chunk_duration = chunk_size / recording.get_sampling_frequency()
+        job_kwargs = fix_job_kwargs(dict(chunk_duration=f"{chunk_duration}s"))
+    return job_kwargs
+
+
+def get_optimal_n_jobs(job_kwargs, ram_requested, memory_limit=0.25):
+    """
+    Set the optimal chunk size for a job given the memory_limit and the number of jobs
+
+    Parameters
+    ----------
+
+    recording: Recording
+        The recording object
+    ram_requested: dict
+        The amount of RAM (in bytes) requested for the job
+    memory_limit: float
+        The memory limit in fraction of available memory
+
+    Returns
+    -------
+
+    job_kwargs: dict
+        The updated job kwargs
+    """
+    job_kwargs = fix_job_kwargs(job_kwargs)
+    n_jobs = job_kwargs["n_jobs"]
+    if HAVE_PSUTIL:
+        assert 0 < memory_limit < 1, "memory_limit should be in ]0, 1["
+        memory_usage = memory_limit * psutil.virtual_memory().available
+        n_jobs = int(min(n_jobs, memory_usage // ram_requested))
+        job_kwargs.update(dict(n_jobs=n_jobs))
+    else:
+        print("psutil is required to use only a fraction of available memory")
+    return job_kwargs
+
+
+def cache_preprocessing(
+    recording, mode="memory", memory_limit=0.5, total_memory=None, delete_cache=True, **extra_kwargs
+):
+    """
+    Cache the preprocessing of a recording object
+
+    Parameters
+    ----------
+
+    recording: Recording
+        The recording object
+    mode: str
+        The mode to cache the preprocessing, can be 'memory', 'folder', 'zarr' or 'no-cache'
+    memory_limit: float
+        The memory limit in fraction of available memory
+    total_memory: str, Default None
+        The total memory to use for the job in bytes
+    delete_cache: bool
+        If True, delete the cache after the job
+    **extra_kwargs: dict
+        The extra kwargs for the job
+
+    Returns
+    -------
+
+    recording: Recording
+        The cached recording object
+    """
+
+    save_kwargs, job_kwargs = split_job_kwargs(extra_kwargs)
+
+    if mode == "memory":
+        if total_memory is None:
+            if HAVE_PSUTIL:
+                assert 0 < memory_limit < 1, "memory_limit should be in ]0, 1["
+                memory_usage = memory_limit * psutil.virtual_memory().available
+                if recording.get_total_memory_size() < memory_usage:
+                    recording = recording.save_to_memory(format="memory", shared=True, **job_kwargs)
+                else:
+                    print("Recording too large to be preloaded in RAM...")
+            else:
+                print("psutil is required to preload in memory given only a fraction of available memory")
+        else:
+            if recording.get_total_memory_size() < total_memory:
                 recording = recording.save_to_memory(format="memory", shared=True, **job_kwargs)
             else:
                 print("Recording too large to be preloaded in RAM...")
-        else:
-            print("psutil is required to preload in memory")
     elif mode == "folder":
         recording = recording.save_to_folder(**extra_kwargs)
     elif mode == "zarr":