Skip to content

Commit

Permalink
Fix reading truncated data when the next segment offset is set (#326)
Browse files Browse the repository at this point in the history
  • Loading branch information
adamreeve authored Mar 4, 2024
1 parent c00f42e commit 1a118e0
Show file tree
Hide file tree
Showing 3 changed files with 82 additions and 19 deletions.
50 changes: 33 additions & 17 deletions nptdms/reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,11 @@ def __init__(self, tdms_file):
self._index_file_path = filepath
self._index_file = open(self._index_file_path, "rb")

if self._file is not None:
self._data_file_size = _get_file_size(self._file)
else:
self._data_file_size = None

def close(self):
if self._file is None and self._index_file is None:
# Already closed
Expand Down Expand Up @@ -314,21 +319,31 @@ def _read_lead_in(self, file, segment_position, is_index_file=False):
segment_incomplete = next_segment_offset == 0xFFFFFFFFFFFFFFFF
if segment_incomplete:
# Segment size is unknown. This can happen if LabVIEW crashes.
next_segment_pos = self._get_data_file_size()
next_segment_pos = self._data_file_size
else:
next_segment_pos = (
segment_position + next_segment_offset + lead_size)
if self._data_file_size is not None and next_segment_pos > self._data_file_size:
# The raw data offset is incorrect, and there is less data than expected in this segment
next_segment_pos = self._data_file_size
segment_incomplete = True

if segment_incomplete:
if next_segment_pos < data_position:
# Metadata wasn't completely written and don't have any data in this segment,
# don't try to read any metadata
log.warning("Last segment metadata is incomplete")
raise EOFError
# Try to read until the end of the file if we have complete metadata
log.warning(
"Last segment of file has unknown size, "
"will attempt to read to the end of the file")
else:
log.debug("Next segment offset = %d, raw data offset = %d, data size = %d b",
next_segment_offset, raw_data_offset, next_segment_offset - raw_data_offset)
next_segment_pos = (
segment_position + next_segment_offset + lead_size)
else:
# Try to read until the end of the file if we have complete metadata
log.warning(
"Last segment of file has less data than expected, "
"will attempt to read to the end of the file")

log.debug("Next segment offset = %d, raw data offset = %d, expected data size = %d b, actual data size = %d b",
next_segment_offset, raw_data_offset,
next_segment_offset - raw_data_offset,
next_segment_pos - data_position)

return segment_position, toc_mask, data_position, next_segment_pos, segment_incomplete

Expand All @@ -346,13 +361,6 @@ def _verify_segment_start(self, segment):
position) +
"Check that the tdms_index file matches the tdms data file.")

def _get_data_file_size(self):
current_pos = self._file.tell()
self._file.seek(0, os.SEEK_END)
end_pos = self._file.tell()
self._file.seek(current_pos, os.SEEK_SET)
return end_pos

def _update_object_metadata(self, segment):
""" Update object metadata using the metadata read from a single segment
"""
Expand Down Expand Up @@ -509,3 +517,11 @@ def _array_equal(a, b, chunk_size=100):
if not (a[offset:offset+chunk_size] == b[offset:offset+chunk_size]).all():
return False
return True


def _get_file_size(file):
current_pos = file.tell()
file.seek(0, os.SEEK_END)
end_pos = file.tell()
file.seek(current_pos, os.SEEK_SET)
return end_pos
44 changes: 44 additions & 0 deletions nptdms/test/test_tdms_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -482,6 +482,20 @@ def test_read_with_index_file(test_file, expected_data):
compare_arrays(channel_obj.data, expected_channel_data)


def test_read_index_file_only():
""" Test reading the index file directly
"""
test_file, expected_data = scenarios.single_segment_with_two_channels().values
with test_file.get_tempfile_with_index() as tdms_file_path:
with TdmsFile.open(tdms_file_path + "_index") as tdms_file:
for ((group, channel), expected_channel_data) in expected_data.items():
channel_obj = tdms_file[group][channel]
assert len(channel_obj) == len(expected_channel_data)
with pytest.raises(RuntimeError) as exc_info:
channel_obj[:]
assert "Data cannot be read from index file only" in str(exc_info.value)


@pytest.mark.skipif(sys.version_info < (3, 4), reason="pathlib only available in stdlib since 3.4")
def test_read_file_passed_as_pathlib_path():
""" Test reading a file when using a pathlib Path object
Expand Down Expand Up @@ -764,6 +778,36 @@ def test_incomplete_segment_with_string_data():
assert len(channel) == 0


def test_truncated_interleaved_data():
"""
Test when a segment is truncated within a row of interleaved data,
and the next segment offset is set but is beyond the end of the file.
"""
test_file = GeneratedFile()
test_file.add_segment(
("kTocMetaData", "kTocRawData", "kTocNewObjList", "kTocInterleavedData"),
segment_objects_metadata(
channel_metadata("/'group'/'channel1'", 3, 4),
channel_metadata("/'group'/'channel2'", 3, 4),
),
"01 00 00 00" "02 00 00 00"
"03 00 00 00" "04 00 00 00"
"05 00 00 00" "06 00 00 00"
"07 00 00 00",
data_size_override=4 * 2 * 4
)
with test_file.get_tempfile() as temp_file:
with TdmsFile.open(temp_file.file) as tdms_file:
group = tdms_file['group']
chan1 = group['channel1']
chan2 = group['channel2']
for chan in [chan1, chan2]:
chan_data = chan[:]
assert chan[-1] == chan_data[-1]
assert len(chan) == 3
assert len(chan_data) == 3


def test_truncated_metadata_in_last_segment():
""" Test the scenario where writing the file was aborted with part of the metadata written
"""
Expand Down
7 changes: 5 additions & 2 deletions nptdms/test/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -223,7 +223,9 @@ class GeneratedFile(object):
def __init__(self):
self._content = []

def add_segment(self, toc, metadata, data, incomplete=False, binary_data=False, version=4713):
def add_segment(
self, toc, metadata, data, incomplete=False, binary_data=False, version=4713,
data_size_override=None):
metadata_bytes = _hex_to_bytes(metadata)
data_bytes = data if binary_data else _hex_to_bytes(data)
if toc is not None:
Expand All @@ -246,7 +248,8 @@ def add_segment(self, toc, metadata, data, incomplete=False, binary_data=False,
raise ValueError("Unrecognised TOC value: %s" % toc_item)
lead_in += struct.pack('<i', toc_mask)
lead_in += struct.pack('<l', version)
next_segment_offset = len(metadata_bytes) + len(data_bytes)
data_len = data_size_override if data_size_override is not None else len(data_bytes)
next_segment_offset = len(metadata_bytes) + data_len
raw_data_offset = len(metadata_bytes)
if incomplete:
lead_in += _hex_to_bytes('FF' * 8)
Expand Down

0 comments on commit 1a118e0

Please sign in to comment.