Skip to content

Commit 0378030

Browse files
committed
Add the ability to pass sig and hea streams to rdrecord, rdheader and rdann, in case the file is not read from disk.
1 parent 34b989e commit 0378030

File tree

3 files changed

+183
-142
lines changed

3 files changed

+183
-142
lines changed

wfdb/io/_signal.py

+152-121
Original file line numberDiff line numberDiff line change
@@ -1066,6 +1066,7 @@ def _rd_segment(
10661066
ignore_skew,
10671067
no_file=False,
10681068
sig_data=None,
1069+
sig_stream=None,
10691070
return_res=64,
10701071
):
10711072
"""
@@ -1211,6 +1212,7 @@ def _rd_segment(
12111212
sampto=sampto,
12121213
no_file=no_file,
12131214
sig_data=sig_data,
1215+
sig_stream=sig_stream,
12141216
)
12151217

12161218
# Copy over the wanted signals
@@ -1235,6 +1237,7 @@ def _rd_dat_signals(
12351237
sampto,
12361238
no_file=False,
12371239
sig_data=None,
1240+
sig_stream=None,
12381241
):
12391242
"""
12401243
Read all signals from a WFDB dat file.
@@ -1324,20 +1327,31 @@ def _rd_dat_signals(
13241327
if no_file:
13251328
data_to_read = sig_data
13261329
elif fmt in COMPRESSED_FMTS:
1327-
data_to_read = _rd_compressed_file(
1328-
file_name=file_name,
1329-
dir_name=dir_name,
1330-
pn_dir=pn_dir,
1331-
fmt=fmt,
1332-
sample_offset=byte_offset,
1333-
n_sig=n_sig,
1334-
samps_per_frame=samps_per_frame,
1335-
start_frame=sampfrom,
1336-
end_frame=sampto,
1337-
)
1330+
if sig_stream is not None:
1331+
data_to_read = _rd_compressed_stream(
1332+
fp=sig_stream,
1333+
fmt=fmt,
1334+
sample_offset=byte_offset,
1335+
n_sig=n_sig,
1336+
samps_per_frame=samps_per_frame,
1337+
start_frame=sampfrom,
1338+
end_frame=sampto,
1339+
)
1340+
else:
1341+
data_to_read = _rd_compressed_file(
1342+
file_name=file_name,
1343+
dir_name=dir_name,
1344+
pn_dir=pn_dir,
1345+
fmt=fmt,
1346+
sample_offset=byte_offset,
1347+
n_sig=n_sig,
1348+
samps_per_frame=samps_per_frame,
1349+
start_frame=sampfrom,
1350+
end_frame=sampto,
1351+
)
13381352
else:
13391353
data_to_read = _rd_dat_file(
1340-
file_name, dir_name, pn_dir, fmt, start_byte, n_read_samples
1354+
file_name, dir_name, pn_dir, fmt, start_byte, n_read_samples, sig_stream
13411355
)
13421356

13431357
if extra_flat_samples:
@@ -1577,7 +1591,7 @@ def _required_byte_num(mode, fmt, n_samp):
15771591
return int(n_bytes)
15781592

15791593

1580-
def _rd_dat_file(file_name, dir_name, pn_dir, fmt, start_byte, n_samp):
1594+
def _rd_dat_file(file_name, dir_name, pn_dir, fmt, start_byte, n_samp, sig_stream):
15811595
"""
15821596
Read data from a dat file, either local or remote, into a 1d numpy
15831597
array.
@@ -1635,8 +1649,14 @@ def _rd_dat_file(file_name, dir_name, pn_dir, fmt, start_byte, n_samp):
16351649
element_count = n_samp
16361650
byte_count = n_samp * BYTES_PER_SAMPLE[fmt]
16371651

1652+
# Memory Stream
1653+
if sig_stream is not None:
1654+
sig_stream.seek(start_byte)
1655+
sig_data = np.frombuffer(
1656+
sig_stream.read(), dtype=np.dtype(DATA_LOAD_TYPES[fmt]), count=element_count
1657+
)
16381658
# Local dat file
1639-
if pn_dir is None:
1659+
elif pn_dir is None:
16401660
with open(os.path.join(dir_name, file_name), "rb") as fp:
16411661
fp.seek(start_byte)
16421662
sig_data = np.fromfile(
@@ -1651,7 +1671,6 @@ def _rd_dat_file(file_name, dir_name, pn_dir, fmt, start_byte, n_samp):
16511671

16521672
return sig_data
16531673

1654-
16551674
def _blocks_to_samples(sig_data, n_samp, fmt):
16561675
"""
16571676
Convert uint8 blocks into signal samples for unaligned dat formats.
@@ -1770,6 +1789,123 @@ def _blocks_to_samples(sig_data, n_samp, fmt):
17701789
return sig
17711790

17721791

1792+
def _rd_compressed_stream(
1793+
fp,
1794+
fmt,
1795+
sample_offset,
1796+
n_sig,
1797+
samps_per_frame,
1798+
start_frame,
1799+
end_frame,
1800+
):
1801+
signature = fp.read(4)
1802+
if signature != b"fLaC":
1803+
raise ValueError(f"{fp.name} is not a FLAC file")
1804+
fp.seek(0)
1805+
1806+
with soundfile.SoundFile(fp) as sf:
1807+
# Determine the actual resolution of the FLAC stream and the
1808+
# data type will use when reading it. Note that soundfile
1809+
# doesn't support int8.
1810+
if sf.subtype == "PCM_S8":
1811+
format_bits = 8
1812+
read_dtype = "int16"
1813+
elif sf.subtype == "PCM_16":
1814+
format_bits = 16
1815+
read_dtype = "int16"
1816+
elif sf.subtype == "PCM_24":
1817+
format_bits = 24
1818+
read_dtype = "int32"
1819+
else:
1820+
raise ValueError(f"unknown subtype in {fp.name} ({sf.subtype})")
1821+
1822+
max_bits = int(fmt) - 500
1823+
if format_bits > max_bits:
1824+
raise ValueError(
1825+
f"wrong resolution in {fp.name} "
1826+
f"({format_bits}, expected <= {max_bits})"
1827+
)
1828+
1829+
if sf.channels != n_sig:
1830+
raise ValueError(
1831+
f"wrong number of channels in {fp.name} "
1832+
f"({sf.channels}, expected {n_sig})"
1833+
)
1834+
1835+
# Read the samples.
1836+
start_samp = start_frame * samps_per_frame[0]
1837+
end_samp = end_frame * samps_per_frame[0]
1838+
sf.seek(start_samp + sample_offset)
1839+
1840+
# We could do this:
1841+
# sig_data = sf.read(end_samp - start_samp, dtype=read_dtype)
1842+
# However, sf.read fails for huge blocks (over 2**24 total
1843+
# samples) due to a bug in libsndfile:
1844+
# https://github.com/libsndfile/libsndfile/issues/431
1845+
# So read the data in chunks instead.
1846+
n_samp = end_samp - start_samp
1847+
sig_data = np.empty((n_samp, n_sig), dtype=read_dtype)
1848+
CHUNK_SIZE = 1024 * 1024
1849+
for chunk_start in range(0, n_samp, CHUNK_SIZE):
1850+
chunk_end = chunk_start + CHUNK_SIZE
1851+
chunk_data = sf.read(out=sig_data[chunk_start:chunk_end])
1852+
samples_read = chunk_data.shape[0]
1853+
if samples_read != CHUNK_SIZE:
1854+
sig_data = sig_data[: chunk_start + samples_read]
1855+
break
1856+
1857+
# If we read an 8-bit stream as int16 or a 24-bit stream as
1858+
# int32, soundfile shifts each sample left by 8 bits. We
1859+
# want to undo this shift (and, in the case of 8-bit data,
1860+
# convert to an int8 array.)
1861+
if format_bits == 8:
1862+
# np.right_shift(sig_data, 8, dtype='int8') doesn't work.
1863+
# This seems wrong, but the numpy documentation is unclear.
1864+
sig_data2 = np.empty(sig_data.shape, dtype="int8")
1865+
sig_data = np.right_shift(sig_data, 8, out=sig_data2)
1866+
elif format_bits == 24:
1867+
# Shift 32-bit array in-place.
1868+
np.right_shift(sig_data, 8, out=sig_data)
1869+
1870+
# Suppose we have 3 channels and 2 samples per frame. The array
1871+
# returned by sf.read looks like this:
1872+
#
1873+
# channel 0 channel 1 channel 2
1874+
# time 0 [0,0] [0,1] [0,2]
1875+
# time 1 [1,0] [1,1] [1,2]
1876+
# time 2 [2,0] [2,1] [2,2]
1877+
# time 3 [3,0] [3,1] [3,2]
1878+
#
1879+
# We reshape this first into the following:
1880+
#
1881+
# channel 0 channel 1 channel 2
1882+
# time 0 [0,0,0] [0,0,1] [0,0,2]
1883+
# time 1 [0,1,0] [0,1,1] [0,1,2]
1884+
# time 2 [1,0,0] [1,0,1] [1,0,2]
1885+
# time 3 [1,1,0] [1,1,1] [1,1,2]
1886+
#
1887+
# Then we transpose axes 1 and 2:
1888+
#
1889+
# channel 0 channel 1 channel 2
1890+
# time 0 [0,0,0] [0,1,0] [0,2,0]
1891+
# time 1 [0,0,1] [0,1,1] [0,2,1]
1892+
# time 2 [1,0,0] [1,1,0] [1,2,0]
1893+
# time 3 [1,0,1] [1,1,1] [1,2,1]
1894+
#
1895+
# Then when we reshape the array to 1D, the result is in dat file
1896+
# order:
1897+
#
1898+
# channel 0 channel 1 channel 2
1899+
# time 0 [0] [2] [4]
1900+
# time 1 [1] [3] [5]
1901+
# time 2 [6] [8] [10]
1902+
# time 3 [7] [9] [11]
1903+
1904+
sig_data = sig_data.reshape(-1, samps_per_frame[0], n_sig)
1905+
sig_data = sig_data.transpose(0, 2, 1)
1906+
return sig_data.reshape(-1)
1907+
1908+
17731909
def _rd_compressed_file(
17741910
file_name,
17751911
dir_name,
@@ -1834,112 +1970,7 @@ def _rd_compressed_file(
18341970
file_name = os.path.join(dir_name, file_name)
18351971

18361972
with _coreio._open_file(pn_dir, file_name, "rb") as fp:
1837-
signature = fp.read(4)
1838-
if signature != b"fLaC":
1839-
raise ValueError(f"{fp.name} is not a FLAC file")
1840-
fp.seek(0)
1841-
1842-
with soundfile.SoundFile(fp) as sf:
1843-
# Determine the actual resolution of the FLAC stream and the
1844-
# data type will use when reading it. Note that soundfile
1845-
# doesn't support int8.
1846-
if sf.subtype == "PCM_S8":
1847-
format_bits = 8
1848-
read_dtype = "int16"
1849-
elif sf.subtype == "PCM_16":
1850-
format_bits = 16
1851-
read_dtype = "int16"
1852-
elif sf.subtype == "PCM_24":
1853-
format_bits = 24
1854-
read_dtype = "int32"
1855-
else:
1856-
raise ValueError(f"unknown subtype in {fp.name} ({sf.subtype})")
1857-
1858-
max_bits = int(fmt) - 500
1859-
if format_bits > max_bits:
1860-
raise ValueError(
1861-
f"wrong resolution in {fp.name} "
1862-
f"({format_bits}, expected <= {max_bits})"
1863-
)
1864-
1865-
if sf.channels != n_sig:
1866-
raise ValueError(
1867-
f"wrong number of channels in {fp.name} "
1868-
f"({sf.channels}, expected {n_sig})"
1869-
)
1870-
1871-
# Read the samples.
1872-
start_samp = start_frame * samps_per_frame[0]
1873-
end_samp = end_frame * samps_per_frame[0]
1874-
sf.seek(start_samp + sample_offset)
1875-
1876-
# We could do this:
1877-
# sig_data = sf.read(end_samp - start_samp, dtype=read_dtype)
1878-
# However, sf.read fails for huge blocks (over 2**24 total
1879-
# samples) due to a bug in libsndfile:
1880-
# https://github.com/libsndfile/libsndfile/issues/431
1881-
# So read the data in chunks instead.
1882-
n_samp = end_samp - start_samp
1883-
sig_data = np.empty((n_samp, n_sig), dtype=read_dtype)
1884-
CHUNK_SIZE = 1024 * 1024
1885-
for chunk_start in range(0, n_samp, CHUNK_SIZE):
1886-
chunk_end = chunk_start + CHUNK_SIZE
1887-
chunk_data = sf.read(out=sig_data[chunk_start:chunk_end])
1888-
samples_read = chunk_data.shape[0]
1889-
if samples_read != CHUNK_SIZE:
1890-
sig_data = sig_data[: chunk_start + samples_read]
1891-
break
1892-
1893-
# If we read an 8-bit stream as int16 or a 24-bit stream as
1894-
# int32, soundfile shifts each sample left by 8 bits. We
1895-
# want to undo this shift (and, in the case of 8-bit data,
1896-
# convert to an int8 array.)
1897-
if format_bits == 8:
1898-
# np.right_shift(sig_data, 8, dtype='int8') doesn't work.
1899-
# This seems wrong, but the numpy documentation is unclear.
1900-
sig_data2 = np.empty(sig_data.shape, dtype="int8")
1901-
sig_data = np.right_shift(sig_data, 8, out=sig_data2)
1902-
elif format_bits == 24:
1903-
# Shift 32-bit array in-place.
1904-
np.right_shift(sig_data, 8, out=sig_data)
1905-
1906-
# Suppose we have 3 channels and 2 samples per frame. The array
1907-
# returned by sf.read looks like this:
1908-
#
1909-
# channel 0 channel 1 channel 2
1910-
# time 0 [0,0] [0,1] [0,2]
1911-
# time 1 [1,0] [1,1] [1,2]
1912-
# time 2 [2,0] [2,1] [2,2]
1913-
# time 3 [3,0] [3,1] [3,2]
1914-
#
1915-
# We reshape this first into the following:
1916-
#
1917-
# channel 0 channel 1 channel 2
1918-
# time 0 [0,0,0] [0,0,1] [0,0,2]
1919-
# time 1 [0,1,0] [0,1,1] [0,1,2]
1920-
# time 2 [1,0,0] [1,0,1] [1,0,2]
1921-
# time 3 [1,1,0] [1,1,1] [1,1,2]
1922-
#
1923-
# Then we transpose axes 1 and 2:
1924-
#
1925-
# channel 0 channel 1 channel 2
1926-
# time 0 [0,0,0] [0,1,0] [0,2,0]
1927-
# time 1 [0,0,1] [0,1,1] [0,2,1]
1928-
# time 2 [1,0,0] [1,1,0] [1,2,0]
1929-
# time 3 [1,0,1] [1,1,1] [1,2,1]
1930-
#
1931-
# Then when we reshape the array to 1D, the result is in dat file
1932-
# order:
1933-
#
1934-
# channel 0 channel 1 channel 2
1935-
# time 0 [0] [2] [4]
1936-
# time 1 [1] [3] [5]
1937-
# time 2 [6] [8] [10]
1938-
# time 3 [7] [9] [11]
1939-
1940-
sig_data = sig_data.reshape(-1, samps_per_frame[0], n_sig)
1941-
sig_data = sig_data.transpose(0, 2, 1)
1942-
return sig_data.reshape(-1)
1973+
return _rd_compressed_stream(fp, fmt, sample_offset, n_sig, samps_per_frame, start_frame, end_frame)
19431974

19441975

19451976
def _skew_sig(

wfdb/io/annotation.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -1883,6 +1883,7 @@ def rdann(
18831883
pn_dir=None,
18841884
return_label_elements=["symbol"],
18851885
summarize_labels=False,
1886+
ann_stream=None,
18861887
):
18871888
"""
18881889
Read a WFDB annotation file record_name.extension and return an
@@ -1947,7 +1948,10 @@ def rdann(
19471948
)
19481949

19491950
# Read the file in byte pairs
1950-
filebytes = load_byte_pairs(record_name, extension, pn_dir)
1951+
if ann_stream is not None:
1952+
filebytes = np.frombuffer(ann_stream.read(), "<u1").reshape([-1, 2]).astype(np.int32)
1953+
else:
1954+
filebytes = load_byte_pairs(record_name, extension, pn_dir).astype(np.int32)
19511955

19521956
# Get WFDB annotation fields from the file bytes
19531957
(sample, label_store, subtype, chan, num, aux_note) = proc_ann_bytes(

0 commit comments

Comments
 (0)