Merge pull request #777 from OpenSimulationInterface/760-bug-in-osi-docu-on-tracefiles

pmai · web-flow · commit 77587cfd12c3 · 2024-02-26T12:01:38.000+01:00
Removal of the historical .txt trace file format and related scripts
diff --git a/doc/architecture/formatting_scripts.adoc b/doc/architecture/formatting_scripts.adoc
@@ -7,28 +7,6 @@ endif::[]
 The OSI repository contains Python scripts for converting trace files from one format to another.
 The formatting scripts are stored in `open-simulation-interface/format/`
 
-**txt2osi.py**
-
-`txt2osi.py` converts plain-text trace files to binary `.osi` trace files.
-This script takes the following parameters:
-
-`--data`, `-d`::
-String containing the path to the file with serialized data.
-
-`--type`, `-t`::
-Optional string describing the message type used to serialize data.
-`'SensorView'`, `'GroundTruth'`, or `'SensorData'` are permitted values.
-The default value is `'SensorView'`.
-
-`--output`, `-o`::
-Optional string containing the name of the output file.
-The default value is `'converted.osi'`.
-
-`--compress`, `-c`::
-Optional Boolean controlling whether to compress the output to an lzma file.
-`True`, or `False` are permitted values.
-The default value is `False`.
-
 **osi2read.py**
 
 `osi2read.py` converts trace files to human-readable `.txth` trace files.
diff --git a/doc/architecture/trace_file_formats.adoc b/doc/architecture/trace_file_formats.adoc
@@ -5,19 +5,20 @@ endif::[]
 [#top-osi_trace_file_formats]
 = OSI trace file formats
 
-There are multiple formats for storing multiple serialized OSI messages in one trace file.
+There are two formats for storing multiple serialized OSI messages in one trace file.
 
 *.osi::
 Binary trace file.
 Messages are separated by a length specification before each message.
 The length is represented by a four-byte, little-endian, unsigned integer.
 The length does not include the integer itself.
 
-*.txt::
-Plain-text trace file.
-Messages are separated by `$$__$$`.
-
 *.txth::
 Human-readable plain-text trace file.
 Messages are separated by newlines.
+
+NOTE: Previous releases of OSI also supported a so-called plain-text trace file format, with file extension `.txt`.
+This legacy format did not contain plain-text, but rather binary protobuf messages separated by a special separator.
+For obvious reasons the format was deprecated and fully replaced with the `.osi` binary file format.
+This release no longer contains any support for the legacy `.txt` file format.
 These files may be used for manual checks.
diff --git a/format/OSITrace.py b/format/OSITrace.py
@@ -13,8 +13,6 @@
 
 warnings.simplefilter("default")
 
-SEPARATOR = b"$$__$$"
-SEPARATOR_LENGTH = len(SEPARATOR)
 BUFFER_SIZE = 1000000
 
 
@@ -47,7 +45,7 @@ def __init__(self, path=None, type_name="SensorView"):
         self.retrieved_scenario_size = 0
         self._int_length = len(struct.pack("<L", 0))
 
-    def from_file(self, path, type_name="SensorView", max_index=-1, format_type=None):
+    def from_file(self, path, type_name="SensorView", max_index=-1):
         """Import a scenario from a file"""
 
         if path.lower().endswith((".lzma", ".xz")):
@@ -56,69 +54,7 @@ def from_file(self, path, type_name="SensorView", max_index=-1, format_type=None
             self.scenario_file = open(path, "rb")
 
         self.type_name = type_name
-        self.format_type = format_type
-
-        if self.format_type == "separated":
-            # warnings.warn("The separated trace files will be completely removed in the near future. Please convert them to *.osi files with the converter in the main OSI repository.", PendingDeprecationWarning)
-            self.timestep_count = self.retrieve_message_offsets(max_index)
-        else:
-            self.timestep_count = self.retrieve_message()
-
-    def retrieve_message_offsets(self, max_index):
-        """
-        Retrieve the offsets of all the messages of the scenario and store them
-        in the `message_offsets` attribute of the object
-
-        It returns the number of discovered timesteps
-        """
-        scenario_size = get_size_from_file_stream(self.scenario_file)
-
-        if max_index == -1:
-            max_index = float("inf")
-
-        buffer_deque = deque(maxlen=2)
-
-        self.message_offsets = [0]
-        eof = False
-
-        self.scenario_file.seek(0)
-
-        while not eof and len(self.message_offsets) <= max_index:
-            found = -1  # SEP offset in buffer
-            buffer_deque.clear()
-
-            while found == -1 and not eof:
-                new_read = self.scenario_file.read(BUFFER_SIZE)
-                buffer_deque.append(new_read)
-                buffer = b"".join(buffer_deque)
-                found = buffer.find(SEPARATOR)
-                eof = len(new_read) != BUFFER_SIZE
-
-            buffer_offset = self.scenario_file.tell() - len(buffer)
-            message_offset = found + buffer_offset + SEPARATOR_LENGTH
-            self.message_offsets.append(message_offset)
-
-            self.scenario_file.seek(message_offset)
-
-            while eof and found != -1:
-                buffer = buffer[found + SEPARATOR_LENGTH :]
-                found = buffer.find(SEPARATOR)
-
-                buffer_offset = scenario_size - len(buffer)
-
-                message_offset = found + buffer_offset + SEPARATOR_LENGTH
-
-                if message_offset >= scenario_size:
-                    break
-                self.message_offsets.append(message_offset)
-
-        if eof:
-            self.retrieved_scenario_size = scenario_size
-        else:
-            self.retrieved_scenario_size = self.message_offsets[-1]
-            self.message_offsets.pop()
-
-        return len(self.message_offsets)
+        self.timestep_count = self.retrieve_message()
 
     def retrieve_message(self):
         scenario_size = get_size_from_file_stream(self.scenario_file)
@@ -180,42 +116,21 @@ def get_messages_in_index_range(self, begin, end):
             for abs_message_offset in self.message_offsets[begin:end]
         ]
 
-        if self.format_type == "separated":
-            message_sequence_len = abs_last_offset - abs_first_offset - SEPARATOR_LENGTH
-            serialized_messages_extract = self.scenario_file.read(message_sequence_len)
-
-            for rel_index, rel_message_offset in enumerate(rel_message_offsets):
-                rel_begin = rel_message_offset
-                rel_end = (
-                    rel_message_offsets[rel_index + 1] - SEPARATOR_LENGTH
-                    if rel_index + 1 < len(rel_message_offsets)
-                    else message_sequence_len
-                )
-                message = MESSAGES_TYPE[self.type_name]()
-                serialized_message = serialized_messages_extract[rel_begin:rel_end]
-                message.ParseFromString(serialized_message)
-                yield message
-
-        elif self.format_type is None:
-            message_sequence_len = abs_last_offset - abs_first_offset
-            serialized_messages_extract = self.scenario_file.read(message_sequence_len)
-
-            for rel_index, rel_message_offset in enumerate(rel_message_offsets):
-                rel_begin = rel_message_offset + self._int_length
-                rel_end = (
-                    rel_message_offsets[rel_index + 1]
-                    if rel_index + 1 < len(rel_message_offsets)
-                    else message_sequence_len
-                )
-
-                message = MESSAGES_TYPE[self.type_name]()
-                serialized_message = serialized_messages_extract[rel_begin:rel_end]
-                message.ParseFromString(serialized_message)
-                yield message
+        message_sequence_len = abs_last_offset - abs_first_offset
+        serialized_messages_extract = self.scenario_file.read(message_sequence_len)
 
-        else:
-            self.scenario_file.close()
-            raise Exception(f"The defined format {self.format_type} does not exist.")
+        for rel_index, rel_message_offset in enumerate(rel_message_offsets):
+            rel_begin = rel_message_offset + self._int_length
+            rel_end = (
+                rel_message_offsets[rel_index + 1]
+                if rel_index + 1 < len(rel_message_offsets)
+                else message_sequence_len
+            )
+
+            message = MESSAGES_TYPE[self.type_name]()
+            serialized_message = serialized_messages_extract[rel_begin:rel_end]
+            message.ParseFromString(serialized_message)
+            yield message
 
     def make_readable(self, name, interval=None, index=None):
         self.scenario_file.seek(0)
diff --git a/format/osi2read.py b/format/osi2read.py
@@ -1,9 +1,8 @@
 """
-This program converts serialized txt/osi trace files into a human readable txth file. 
+This program converts serialized osi trace files into a human readable txth file. 
 
 Example usage:
     python3 osi2read.py -d trace.osi -o myreadableosifile
-    python3 osi2read.py -d trace.txt -f separated -o myreadableosifile
 """
 
 from OSITrace import OSITrace
@@ -19,7 +18,7 @@ def command_line_arguments():
     dir_path = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
 
     parser = argparse.ArgumentParser(
-        description="Convert a serialized osi/txt trace file to a readable txth output.",
+        description="Convert a serialized osi trace file to a readable txth output.",
         prog="osi2read converter",
     )
     parser.add_argument(
@@ -42,15 +41,6 @@ def command_line_arguments():
         type=str,
         required=False,
     )
-    parser.add_argument(
-        "--format",
-        "-f",
-        help="Set the format type of the trace.",
-        choices=["separated", None],
-        default=None,
-        type=str,
-        required=False,
-    )
 
     return parser.parse_args()
 
@@ -61,7 +51,7 @@ def main():
 
     # Initialize the OSI trace class
     trace = OSITrace()
-    trace.from_file(path=args.data, type_name=args.type, format_type=args.format)
+    trace.from_file(path=args.data, type_name=args.type)
 
     args.output = args.output.split(".", 1)[0] + ".txth"
 
diff --git a/format/txt2osi.py b/format/txt2osi.py