Merge pull request #1338 from SachsLab/cboulay/1332_blackrock_too_many_files

apdavison · web-flow · commit 0071e4ef355d · 2024-05-07T10:10:43.000+02:00
Add support for Blackrock NSx files with per-sample PTP nanosecond timestamps
diff --git a/neo/rawio/blackrockrawio.py b/neo/rawio/blackrockrawio.py
@@ -7,6 +7,7 @@
   * Samuel Garcia - third version
   * Lyuba Zehl, Michael Denker - fourth version
   * Samuel Garcia, Julia Srenger - fifth version
+  * Chadwick Boulay - FileSpec 3.0 and 3.0-PTP
 
 This IO supports reading only.
 This IO is able to read:
@@ -17,6 +18,8 @@
   * 2.1
   * 2.2
   * 2.3
+  * 3.0
+  * 3.0 with PTP timestamps (Gemini systems)
 
 The neural data channels are 1 - 128.
 The analog inputs are 129 - 144. (129 - 137 AC coupled, 138 - 144 DC coupled)
@@ -191,12 +194,14 @@ def __init__(
             "2.2": self.__read_nsx_dataheader_variant_b,
             "2.3": self.__read_nsx_dataheader_variant_b,
             "3.0": self.__read_nsx_dataheader_variant_b,
+            "3.0-ptp": self.__read_nsx_dataheader_variant_c,
         }
         self.__nsx_data_reader = {
             "2.1": self.__read_nsx_data_variant_a,
             "2.2": self.__read_nsx_data_variant_b,
             "2.3": self.__read_nsx_data_variant_b,
             "3.0": self.__read_nsx_data_variant_b,
+            "3.0-ptp": self.__read_nsx_data_variant_c,
         }
         self.__nsx_params = {
             "2.1": self.__get_nsx_param_variant_a,
@@ -312,11 +317,17 @@ def _parse_header(self):
             # read nsx headers
             self.__nsx_basic_header[nsx_nb], self.__nsx_ext_header[nsx_nb] = self.__nsx_header_reader[spec](nsx_nb)
 
-            # Read nsx data header(s)
+            # The only way to know if it is the PTP-variant of file spec 3.0
+            # is to check for nanosecond timestamp resolution.
+            if "timestamp_resolution" in self.__nsx_basic_header[nsx_nb].dtype.names \
+                and self.__nsx_basic_header[nsx_nb]["timestamp_resolution"] == 1_000_000_000:
+                nsx_dataheader_reader = self.__nsx_dataheader_reader["3.0-ptp"]
+            else:
+                nsx_dataheader_reader = self.__nsx_dataheader_reader[spec]
             # for nsxdef get_analogsignal_shape(self, block_index, seg_index):
-            self.__nsx_data_header[nsx_nb] = self.__nsx_dataheader_reader[spec](nsx_nb)
+            self.__nsx_data_header[nsx_nb] = nsx_dataheader_reader(nsx_nb)
 
-        # nsx_to_load can be either int, list, 'max', all' (aka None)
+        # nsx_to_load can be either int, list, 'max', 'all' (aka None)
         # here make a list only
         if self.nsx_to_load is None or self.nsx_to_load == "all":
             self.nsx_to_load = list(self._avail_nsx)
@@ -359,7 +370,14 @@ def _parse_header(self):
         if len(self.nsx_to_load) > 0:
             for nsx_nb in self.nsx_to_load:
                 spec = self.__nsx_spec[nsx_nb]
-                self.nsx_datas[nsx_nb] = self.__nsx_data_reader[spec](nsx_nb)
+                # The only way to know if it is the PTP-variant of file spec 3.0
+                # is to check for nanosecond timestamp resolution.
+                if "timestamp_resolution" in self.__nsx_basic_header[nsx_nb].dtype.names \
+                    and self.__nsx_basic_header[nsx_nb]["timestamp_resolution"] == 1_000_000_000:
+                    _data_reader_fun = self.__nsx_data_reader["3.0-ptp"]
+                else:
+                    _data_reader_fun = self.__nsx_data_reader[spec]
+                self.nsx_datas[nsx_nb] = _data_reader_fun(nsx_nb)
 
                 sr = float(main_sampling_rate / self.__nsx_basic_header[nsx_nb]["period"])
                 self.sig_sampling_rates[nsx_nb] = sr
@@ -415,15 +433,28 @@ def _parse_header(self):
             for data_bl in range(self._nb_segment):
                 t_stop = 0.0
                 for nsx_nb in self.nsx_to_load:
+                    spec = self.__nsx_spec[nsx_nb]
+                    if "timestamp_resolution" in self.__nsx_basic_header[nsx_nb].dtype.names:
+                        ts_res = self.__nsx_basic_header[nsx_nb]["timestamp_resolution"]
+                    elif spec == "2.1":
+                        ts_res = self.__nsx_params[spec](nsx_nb)['timestamp_resolution']
+                    else:
+                        ts_res = 30_000
+                    period = self.__nsx_basic_header[nsx_nb]["period"]
+                    sec_per_samp = period / 30_000  # Maybe 30_000 should be ['sample_resolution']
                     length = self.nsx_datas[nsx_nb][data_bl].shape[0]
                     if self.__nsx_data_header[nsx_nb] is None:
                         t_start = 0.0
+                        t_stop = max(t_stop, length / self.sig_sampling_rates[nsx_nb])
                     else:
-                        t_start = (
-                            self.__nsx_data_header[nsx_nb][data_bl]["timestamp"]
-                            / self.__nsx_basic_header[nsx_nb]["timestamp_resolution"]
-                        )
-                    t_stop = max(t_stop, t_start + length / self.sig_sampling_rates[nsx_nb])
+                        timestamps = self.__nsx_data_header[nsx_nb][data_bl]["timestamp"]
+                        if hasattr(timestamps, "size") and timestamps.size == length:
+                            # FileSpec 3.0 with PTP -- use the per-sample timestamps
+                            t_start = timestamps[0] / ts_res
+                            t_stop = max(t_stop, timestamps[-1] / ts_res + sec_per_samp)
+                        else:
+                            t_start = timestamps / ts_res
+                            t_stop = max(t_stop, t_start + length / self.sig_sampling_rates[nsx_nb])
                     self._sigs_t_starts[nsx_nb].append(t_start)
 
                 if self._avail_files["nev"]:
@@ -794,6 +825,8 @@ def __read_nsx_header_variant_a(self, nsx_nb):
         ]
 
         nsx_basic_header = np.fromfile(filename, count=1, dtype=dt0)[0]
+        # Note: it is not possible to use recfunctions to append_fields of 'timestamp_resolution',
+        #  because the size of this object is used as the header size in later read operations.
 
         # "extended" header (last field of file_id: NEURALCD)
         # (to facilitate compatibility with higher file specs)
@@ -901,7 +934,7 @@ def __read_nsx_dataheader_variant_b(
     ):
         """
         Reads the nsx data header for each data block following the offset of
-        file spec 2.2 and 2.3.
+        file spec 2.2, 2.3, and 3.0.
         """
         filename = ".".join([self._filenames["nsx"], f"ns{nsx_nb}"])
 
@@ -932,6 +965,67 @@ def __read_nsx_dataheader_variant_b(
 
         return data_header
 
+    def __read_nsx_dataheader_variant_c(
+            self, nsx_nb, filesize=None, offset=None, ):
+        """
+        Reads the nsx data header for each data block for file spec 3.0 with PTP timestamps
+        """
+        filename = ".".join([self._filenames["nsx"], f"ns{nsx_nb}"])
+
+        filesize = self.__get_file_size(filename)
+
+        data_header = {}
+        index = 0
+
+        if offset is None:
+            offset = self.__nsx_basic_header[nsx_nb]["bytes_in_headers"]
+
+        ptp_dt = [
+            ("reserved", "uint8"),
+            ("timestamps", "uint64"),
+            ("num_data_points", "uint32"),
+            ("samples", "int16", self.__nsx_basic_header[nsx_nb]["channel_count"])
+        ]
+        npackets = int((filesize - offset) / np.dtype(ptp_dt).itemsize)
+        struct_arr = np.memmap(filename, dtype=ptp_dt, shape=npackets, offset=offset, mode="r")
+
+        if not np.all(struct_arr["num_data_points"] == 1):
+            # some packets have more than 1 sample. Not actually ptp. Revert to non-ptp variant.
+            return self.__read_nsx_dataheader_variant_b(nsx_nb, filesize=filesize, offset=offset)
+
+        # It is still possible there was a data break and the file has multiple segments.
+        # We can no longer rely on the presence of a header indicating a new segment,
+        # so we look for timestamp differences greater than double the expected interval.
+        _period = self.__nsx_basic_header[nsx_nb]["period"]  # 30_000 ^-1 s per sample
+        _nominal_rate = 30_000 / _period  # samples per sec;  maybe 30_000 should be ["sample_resolution"]
+        _clock_rate = self.__nsx_basic_header[nsx_nb]["timestamp_resolution"]  # clocks per sec
+        clk_per_samp = _clock_rate / _nominal_rate  # clk/sec / smp/sec = clk/smp
+        seg_thresh_clk = int(2 * clk_per_samp)
+        seg_starts = np.hstack(
+            (0, 1 + np.argwhere(np.diff(struct_arr["timestamps"]) > seg_thresh_clk).flatten())
+        )
+        for seg_ix, seg_start_idx in enumerate(seg_starts):
+            if seg_ix < (len(seg_starts) - 1):
+                seg_stop_idx = seg_starts[seg_ix + 1]
+            else:
+                seg_stop_idx = (len(struct_arr) - 1)
+            seg_offset = offset + seg_start_idx * struct_arr.dtype.itemsize
+            num_data_pts = seg_stop_idx - seg_start_idx
+            seg_struct_arr = np.memmap(
+                filename,
+                dtype=ptp_dt,
+                shape=num_data_pts,
+                offset=seg_offset,
+                mode="r"
+            )
+            data_header[seg_ix] = {
+                "header": None,
+                "timestamp": seg_struct_arr["timestamps"],  # Note, this is an array, not a scalar
+                "nb_data_points": num_data_pts,
+                "offset_to_data_block": seg_offset
+            }
+        return data_header
+
     def __read_nsx_data_variant_a(self, nsx_nb):
         """
         Extract nsx data from a 2.1 .nsx file
@@ -950,8 +1044,8 @@ def __read_nsx_data_variant_a(self, nsx_nb):
 
     def __read_nsx_data_variant_b(self, nsx_nb):
         """
-        Extract nsx data (blocks) from a 2.2 or 2.3 .nsx file. Blocks can arise
-        if the recording was paused by the user.
+        Extract nsx data (blocks) from a 2.2, 2.3, or 3.0 .nsx file.
+        Blocks can arise if the recording was paused by the user.
         """
         filename = ".".join([self._filenames["nsx"], f"ns{nsx_nb}"])
 
@@ -969,6 +1063,36 @@ def __read_nsx_data_variant_b(self, nsx_nb):
 
         return data
 
+    def __read_nsx_data_variant_c(self, nsx_nb):
+        """
+        Extract nsx data (blocks) from a 3.0 .nsx file with PTP timestamps
+        yielding a timestamp per sample. Blocks can arise
+        if the recording was paused by the user.
+        """
+        filename = ".".join([self._filenames["nsx"], f"ns{nsx_nb}"])
+
+        ptp_dt = [
+            ("reserved", "uint8"),
+            ("timestamps", "uint64"),
+            ("num_data_points", "uint32"),
+            ("samples", "int16", self.__nsx_basic_header[nsx_nb]["channel_count"])
+        ]
+
+        data = {}
+        for bl_id, bl_header in self.__nsx_data_header[nsx_nb].items():
+            struct_arr = np.memmap(
+                filename,
+                dtype=ptp_dt,
+                shape=bl_header["nb_data_points"],
+                offset=bl_header["offset_to_data_block"], mode="r"
+            )
+            # Does this concretize the data?
+            # If yes then investigate np.ndarray with buffer=file,
+            # offset=offset+13, and strides that skips 13-bytes per row.
+            data[bl_id] = struct_arr["samples"]
+
+        return data
+
     def __read_nev_header(self, ext_header_variants):
         """
         Extract nev header information from a of specific .nsx header variant
diff --git a/neo/test/rawiotest/test_blackrockrawio.py b/neo/test/rawiotest/test_blackrockrawio.py
@@ -28,6 +28,7 @@ class TestBlackrockRawIO(
         "blackrock/FileSpec2.3001",
         "blackrock/blackrock_2_1/l101210-001",
         "blackrock/blackrock_3_0/file_spec_3_0",
+        "blackrock/blackrock_3_0_ptp/20231027-125608-001",
     ]
 
     @unittest.skipUnless(HAVE_SCIPY, "requires scipy")
@@ -184,6 +185,32 @@ def test_compare_blackrockio_with_matlabloader_v21(self):
                         matlab_digievents = mts_ml[mid_ml == int(label)]
                         assert_equal(python_digievents, matlab_digievents)
 
+    def test_blackrockrawio_ptp_timestamps(self):
+        dirname = self.get_local_path("blackrock/blackrock_3_0_ptp/20231027-125608-001")
+        reader = BlackrockRawIO(filename=dirname)
+        reader.parse_header()
+
+        # 1 segment; no pauses or detectable packet drops. Was ~2.1 seconds long
+        self.assertEqual(1, reader.block_count())
+        self.assertEqual(1, reader.segment_count(0))
+        t_start = reader.segment_t_start(0, 0)
+        t_stop = reader.segment_t_stop(0, 0)
+        self.assertAlmostEqual(2.1, t_stop - t_start, places=1)
+
+        # 2 streams - ns2 and ns6; each with 65 channels
+        # 65 ns2 (1 kHz) channels, on the even channels -- every other from 2-130
+        # 65 ns6 (RAW; 30 kHz) channels, on the odd channels -- every other from 1-129
+        expected_rates = [1_000, 30_000]
+        n_streams = reader.signal_streams_count()
+        self.assertEqual(len(expected_rates), n_streams)
+        for strm_ix in range(n_streams):
+            reader.get_signal_sampling_rate(strm_ix)
+            self.assertEqual(65, reader.signal_channels_count(strm_ix))
+            self.assertAlmostEqual(expected_rates[strm_ix], reader.get_signal_sampling_rate(strm_ix), places=1)
+
+        # Spikes enabled on channels 1-129 but channel 129 had 0 events.
+        self.assertEqual(128, reader.spike_channels_count())
+
 
 if __name__ == "__main__":
     unittest.main()