Merge bitcoin/bitcoin#23737: test: make feature_init more robust

MarcoFalke · MarcoFalke · commit d69af93223c4 · 2022-01-03T08:51:09.000+01:00
8904f17 test: enable txindex in feature_init (James O'Beirne) 93db6d8 test: feature_init: retain debug.log and improve detection (James O'Beirne) 24fcf6e test: feature_init: tweak all .ldb files of a certain type (James O'Beirne) a8ffbc0 test: add TestNode.wait_for_debug_log (James O'Beirne) a2fb62b test: introduce TestNode.debug_log_bytes (James O'Beirne) Pull request description: Hopefully fixes #23646. This makes a few changes to feature_init (along the lines of the discussion here: bitcoin/bitcoin#23289 (comment)). The changes are detailed in the specific commits, but a summary might be: - more robust log-line detection (at the expense of maybe being slower) - retain all debug.log content - perturb .ldb files in a more complete way Top commit has no ACKs. Tree-SHA512: 9c8900998fdb96d3cc92bd30c4eeda2a47e9787e6432546d458c17f24eb9e2b6b3eb573db225172888ca71d7a62260f497f83827597b982b9e35c5452cb6e514
diff --git a/test/functional/feature_init.py b/test/functional/feature_init.py
@@ -44,14 +44,9 @@ def run_test(self):
         def sigterm_node():
             node.process.terminate()
             node.process.wait()
-            node.debug_log_path.unlink()
-            node.debug_log_path.touch()
 
         def check_clean_start():
             """Ensure that node restarts successfully after various interrupts."""
-            # TODO: add -txindex=1 to fully test index initiatlization.
-            # See https://github.com/bitcoin/bitcoin/pull/23289#discussion_r735159180 for
-            # a discussion of the related bug.
             node.start()
             node.wait_for_rpc_connection()
             assert_equal(200, node.getblockcount())
@@ -71,56 +66,33 @@ def check_clean_start():
             'net thread start',
             'addcon thread start',
             'loadblk thread start',
-            # TODO: reenable - see above TODO
-            # 'txindex thread start',
-            'msghand thread start'
+            'txindex thread start',
+            'msghand thread start',
+            'net thread start',
+            'addcon thread start',
         ]
         if self.is_wallet_compiled():
             lines_to_terminate_after.append('Verifying wallet')
 
         for terminate_line in lines_to_terminate_after:
             self.log.info(f"Starting node and will exit after line '{terminate_line}'")
-            node.start(
-                # TODO: add -txindex=1 to fully test index initiatlization.
-                # extra_args=['-txindex=1'],
-            )
-            logfile = open(node.debug_log_path, 'r', encoding='utf8')
-
-            MAX_SECS_TO_WAIT = 30
-            start = time.time()
-            num_lines = 0
-
-            while True:
-                line = logfile.readline()
-                if line:
-                    num_lines += 1
-
-                if line and terminate_line.lower() in line.lower():
-                    self.log.debug(f"Terminating node after {num_lines} log lines seen")
-                    sigterm_node()
-                    break
-
-                if (time.time() - start) > MAX_SECS_TO_WAIT:
-                    raise AssertionError(
-                        f"missed line {terminate_line}; terminating now after {num_lines} lines")
+            node.start(extra_args=['-txindex=1'])
 
-                if node.process.poll() is not None:
-                    raise AssertionError(f"node failed to start (line: '{terminate_line}')")
+            num_total_logs = node.wait_for_debug_log([terminate_line], ignore_case=True)
+            self.log.debug(f"Terminating node after {num_total_logs} log lines seen")
+            sigterm_node()
 
         check_clean_start()
-        num_total_logs = len(node.debug_log_path.read_text().splitlines())
         self.stop_node(0)
 
         self.log.info(
             f"Terminate at some random point in the init process (max logs: {num_total_logs})")
 
         for _ in range(40):
-            terminate_after = random.randint(1, num_total_logs)
-            self.log.debug(f"Starting node and will exit after {terminate_after} lines")
-            node.start(
-                # TODO: add -txindex=1 to fully test index initiatlization.
-                # extra_args=['-txindex=1'],
-            )
+            num_logs = len(Path(node.debug_log_path).read_text().splitlines())
+            additional_lines = random.randint(1, num_total_logs)
+            self.log.debug(f"Starting node and will exit after {additional_lines} lines")
+            node.start(extra_args=['-txindex=1'])
             logfile = open(node.debug_log_path, 'r', encoding='utf8')
 
             MAX_SECS_TO_WAIT = 10
@@ -132,7 +104,8 @@ def check_clean_start():
                 if line:
                     num_lines += 1
 
-                if num_lines >= terminate_after or (time.time() - start) > MAX_SECS_TO_WAIT:
+                if num_lines >= (num_logs + additional_lines) or \
+                        (time.time() - start) > MAX_SECS_TO_WAIT:
                     self.log.debug(f"Terminating node after {num_lines} log lines seen")
                     sigterm_node()
                     break
@@ -152,11 +125,12 @@ def check_clean_start():
         }
 
         for file_patt, err_fragment in files_to_disturb.items():
-            target_file = list(node.chain_path.glob(file_patt))[0]
+            target_files = list(node.chain_path.glob(file_patt))
 
-            self.log.info(f"Tweaking file to ensure failure {target_file}")
-            bak_path = str(target_file) + ".bak"
-            target_file.rename(bak_path)
+            for target_file in target_files:
+                self.log.info(f"Tweaking file to ensure failure {target_file}")
+                bak_path = str(target_file) + ".bak"
+                target_file.rename(bak_path)
 
             # TODO: at some point, we should test perturbing the files instead of removing
             # them, e.g.
@@ -170,14 +144,16 @@ def check_clean_start():
             # investigate doing this later.
 
             node.assert_start_raises_init_error(
-                # TODO: add -txindex=1 to fully test index initiatlization.
-                # extra_args=['-txindex=1'],
+                extra_args=['-txindex=1'],
                 expected_msg=err_fragment,
                 match=ErrorMatch.PARTIAL_REGEX,
             )
 
-            self.log.info(f"Restoring file from {bak_path} and restarting")
-            Path(bak_path).rename(target_file)
+            for target_file in target_files:
+                bak_path = str(target_file) + ".bak"
+                self.log.debug(f"Restoring file from {bak_path} and restarting")
+                Path(bak_path).rename(target_file)
+
             check_clean_start()
             self.stop_node(0)
 
diff --git a/test/functional/test_framework/test_node.py b/test/functional/test_framework/test_node.py
@@ -389,14 +389,17 @@ def chain_path(self) -> Path:
     def debug_log_path(self) -> Path:
         return self.chain_path / 'debug.log'
 
+    def debug_log_bytes(self) -> int:
+        with open(self.debug_log_path, encoding='utf-8') as dl:
+            dl.seek(0, 2)
+            return dl.tell()
+
     @contextlib.contextmanager
     def assert_debug_log(self, expected_msgs, unexpected_msgs=None, timeout=2):
         if unexpected_msgs is None:
             unexpected_msgs = []
         time_end = time.time() + timeout * self.timeout_factor
-        with open(self.debug_log_path, encoding='utf-8') as dl:
-            dl.seek(0, 2)
-            prev_size = dl.tell()
+        prev_size = self.debug_log_bytes()
 
         yield
 
@@ -419,6 +422,42 @@ def assert_debug_log(self, expected_msgs, unexpected_msgs=None, timeout=2):
             time.sleep(0.05)
         self._raise_assertion_error('Expected messages "{}" does not partially match log:\n\n{}\n\n'.format(str(expected_msgs), print_log))
 
+    def wait_for_debug_log(self, expected_msgs, timeout=10, ignore_case=False) -> int:
+        """
+        Block until we see a particular debug log message fragment or until we exceed the timeout.
+        Return:
+            the number of log lines we encountered when matching
+        """
+        time_end = time.time() + timeout * self.timeout_factor
+        prev_size = self.debug_log_bytes()
+        re_flags = re.MULTILINE | (re.IGNORECASE if ignore_case else 0)
+
+        while True:
+            found = True
+            with open(self.debug_log_path, encoding='utf-8') as dl:
+                dl.seek(prev_size)
+                log = dl.read()
+
+            for expected_msg in expected_msgs:
+                if re.search(re.escape(expected_msg), log, flags=re_flags) is None:
+                    found = False
+
+            if found:
+                num_logs = len(log.splitlines())
+                return num_logs
+
+            if time.time() >= time_end:
+                print_log = " - " + "\n - ".join(log.splitlines())
+                break
+
+            # No sleep here because we want to detect the message fragment as fast as
+            # possible.
+
+        self._raise_assertion_error(
+            'Expected messages "{}" does not partially match log:\n\n{}\n\n'.format(
+                str(expected_msgs), print_log))
+        return -1  # useless return to satisfy linter
+
     @contextlib.contextmanager
     def profile_with_perf(self, profile_name: str):
         """