Fix CI testing numbering of cli.py (#60)

danielholanda · jeremyfowers · web-flow · commit aaf08a21a1fe · 2023-12-05T12:52:52.000-08:00
* Remove the SetSuccess stage (and the need for it)

Signed-off-by: Jeremy Fowers &lt;jeremy.fowers@amd.com&gt;

* Add a comment about deepcopy

Signed-off-by: Jeremy Fowers &lt;jeremy.fowers@amd.com&gt;

* Fix CI testing order

* Move large test to bottom

---------

Signed-off-by: Jeremy Fowers &lt;jeremy.fowers@amd.com&gt;
Co-authored-by: Jeremy Fowers &lt;jeremy.fowers@amd.com&gt;
diff --git a/test/cli.py b/test/cli.py
@@ -261,104 +261,7 @@ def test_003_cli_build_dir(self):
 
         assert_success_of_builds(test_scripts, cache_dir)
 
-    def test_021_cli_report(self):
-        # NOTE: this is not a unit test, it relies on other command
-        # If this test is failing, make sure the following tests are passing:
-        # - test_cli_corpus
-
-        test_scripts = common.test_scripts_dot_py.keys()
-
-        # Build the test corpus so we have builds to report
-        testargs = [
-            "turnkey",
-            "benchmark",
-            bash(f"{corpus_dir}/*.py"),
-            "--cache-dir",
-            cache_dir,
-        ]
-        with patch.object(sys, "argv", flatten(testargs)):
-            turnkeycli()
-
-        testargs = [
-            "turnkey",
-            "cache",
-            "report",
-            "--cache-dir",
-            cache_dir,
-        ]
-        with patch.object(sys, "argv", testargs):
-            turnkeycli()
-
-        # Read generated CSV file
-        summary_csv_path = report.get_report_name()
-        with open(summary_csv_path, "r", encoding="utf8") as summary_csv:
-            summary = list(csv.DictReader(summary_csv))
-
-        # Check if csv file contains all expected rows and columns
-        expected_cols = [
-            "model_name",
-            "author",
-            "class",
-            "parameters",
-            "hash",
-            "runtime",
-            "device_type",
-            "device",
-            "mean_latency",
-            "throughput",
-            "all_build_stages",
-            "completed_build_stages",
-        ]
-        linear_summary = summary[1]
-        assert len(summary) == len(test_scripts)
-        for elem in expected_cols:
-            assert (
-                elem in linear_summary
-            ), f"Couldn't find expected key {elem} in results spreadsheet"
-
-        # Check whether all rows we expect to be populated are actually populated
-        assert (
-            linear_summary["model_name"] == "linear2"
-        ), f"Wrong model name found {linear_summary['model_name']}"
-        assert (
-            linear_summary["author"] == "turnkey"
-        ), f"Wrong author name found {linear_summary['author']}"
-        assert (
-            linear_summary["class"] == "TwoLayerModel"
-        ), f"Wrong class found {linear_summary['model_class']}"
-        assert (
-            linear_summary["hash"] == "80b93950"
-        ), f"Wrong hash found {linear_summary['hash']}"
-        assert (
-            linear_summary["runtime"] == "ort"
-        ), f"Wrong runtime found {linear_summary['runtime']}"
-        assert (
-            linear_summary["device_type"] == "x86"
-        ), f"Wrong device type found {linear_summary['device_type']}"
-        assert (
-            float(linear_summary["mean_latency"]) > 0
-        ), f"latency must be >0, got {linear_summary['x86_latency']}"
-        assert (
-            float(linear_summary["throughput"]) > 100
-        ), f"throughput must be >100, got {linear_summary['throughput']}"
-
-        # Make sure the report.get_dict() API works
-        result_dict = report.get_dict(
-            summary_csv_path, ["all_build_stages", "completed_build_stages"]
-        )
-        for result in result_dict.values():
-            # All of the models should have exported to ONNX, so the "onnx_exported" value
-            # should be True for all of them
-            assert "export_pytorch" in yaml.safe_load(result["all_build_stages"])
-            assert (
-                "export_pytorch"
-                in yaml.safe_load(result["completed_build_stages"]).keys()
-            )
-            assert (
-                yaml.safe_load(result["completed_build_stages"])["export_pytorch"] > 0
-            )
-
-    def test_005_cli_list(self):
+    def test_004_cli_list(self):
         # NOTE: this is not a unit test, it relies on other command
         # If this test is failing, make sure the following tests are passing:
         # - test_cli_corpus
@@ -391,7 +294,7 @@ def test_005_cli_list(self):
             script_name = common.strip_dot_py(test_script)
             assert script_name in f.getvalue(), f"{script_name} {f.getvalue()}"
 
-    def test_006_cli_delete(self):
+    def test_005_cli_delete(self):
         # NOTE: this is not a unit test, it relies on other command
         # If this test is failing, make sure the following tests are passing:
         # - test_cli_corpus
@@ -453,7 +356,7 @@ def test_006_cli_delete(self):
             script_name = common.strip_dot_py(test_script)
             assert script_name not in f.getvalue()
 
-    def test_007_cli_stats(self):
+    def test_006_cli_stats(self):
         # NOTE: this is not a unit test, it relies on other command
         # If this test is failing, make sure the following tests are passing:
         # - test_cli_corpus
@@ -531,7 +434,7 @@ def test_007_cli_stats(self):
                 ]
                 assert isinstance(stats_dict["task"], str), stats_dict["task"]
 
-    def test_008_cli_version(self):
+    def test_007_cli_version(self):
         # Get the version number
         with redirect_stdout(io.StringIO()) as f:
             testargs = [
@@ -544,7 +447,7 @@ def test_008_cli_version(self):
         # Make sure we get back a 3-digit number
         assert len(f.getvalue().split(".")) == 3
 
-    def test_009_cli_turnkey_args(self):
+    def test_008_cli_turnkey_args(self):
         # NOTE: this is not a unit test, it relies on other command
         # If this test is failing, make sure the following tests are passing:
         # - test_cli_single
@@ -570,7 +473,7 @@ def test_009_cli_turnkey_args(self):
 
     # TODO: Investigate why this test is failing only on Windows CI failing
     @unittest.skipIf(platform.system() == "Windows", "Windows CI only failure")
-    def test_011_cli_benchmark(self):
+    def test_009_cli_benchmark(self):
         # Test the first model in the corpus
         test_script = list(common.test_scripts_dot_py.keys())[0]
 
@@ -588,7 +491,7 @@ def test_011_cli_benchmark(self):
 
     # TODO: Investigate why this test is non-deterministically failing
     @unittest.skip("Flaky test")
-    def test_013_cli_labels(self):
+    def test_010_cli_labels(self):
         # Only build models labels with test_group::a
         testargs = [
             "turnkey",
@@ -638,7 +541,7 @@ def test_013_cli_labels(self):
         assert state_files == ["linear_d5b1df11_state", "linear2_80b93950_state"]
 
     @unittest.skip("Needs re-implementation")
-    def test_014_report_on_failed_build(self):
+    def test_011_report_on_failed_build(self):
         testargs = [
             "turnkey",
             bash(f"{corpus_dir}/linear.py"),
@@ -680,7 +583,7 @@ def test_014_report_on_failed_build(self):
         ), "Wrong number of parameters found in report"
         assert summary[0]["hash"] == "d5b1df11", "Wrong hash found in report"
 
-    def test_015_runtimes(self):
+    def test_012_runtimes(self):
         # Attempt to benchmark using an invalid runtime
         with self.assertRaises(exceptions.ArgError):
             testargs = [
@@ -729,7 +632,7 @@ def test_015_runtimes(self):
 
     # TODO: Investigate why this test is only failing on Windows CI
     @unittest.skipIf(platform.system() == "Windows", "Windows CI only failure")
-    def test_016_cli_onnx_opset(self):
+    def test_013_cli_onnx_opset(self):
         # Test the first model in the corpus
         test_script = list(common.test_scripts_dot_py.keys())[0]
 
@@ -752,7 +655,7 @@ def test_016_cli_onnx_opset(self):
             [test_script], cache_dir, None, check_perf=True, check_opset=user_opset
         )
 
-    def test_016_cli_iteration_count(self):
+    def test_014_cli_iteration_count(self):
         # Test the first model in the corpus
         test_script = list(common.test_scripts_dot_py.keys())[0]
 
@@ -777,7 +680,7 @@ def test_016_cli_iteration_count(self):
             check_iteration_count=test_iterations,
         )
 
-    def test_017_cli_process_isolation(self):
+    def test_015_cli_process_isolation(self):
         # Test the first model in the corpus
         test_script = list(common.test_scripts_dot_py.keys())[0]
 
@@ -799,7 +702,7 @@ def test_017_cli_process_isolation(self):
         "Skipping, as torch.compile is not supported on Windows"
         "Revisit when torch.compile for Windows is supported",
     )
-    def test_018_skip_compiled(self):
+    def test_016_skip_compiled(self):
         test_script = "compiled.py"
         testargs = [
             "turnkey",
@@ -817,14 +720,14 @@ def test_018_skip_compiled(self):
         # One of those is compiled and should be skipped.
         assert builds_found == 1
 
-    def test_019_invalid_file_type(self):
+    def test_017_invalid_file_type(self):
         # Ensure that we get an error when running turnkey with invalid input_files
         with self.assertRaises(exceptions.ArgError):
             testargs = ["turnkey", "gobbledegook"]
             with patch.object(sys, "argv", flatten(testargs)):
                 turnkeycli()
 
-    def test_020_cli_export_only(self):
+    def test_018_cli_export_only(self):
         # Test the first model in the corpus
         test_script = list(common.test_scripts_dot_py.keys())[0]
 
@@ -842,7 +745,7 @@ def test_020_cli_export_only(self):
 
         assert_success_of_builds([test_script], cache_dir, check_onnx_file_count=1)
 
-    def test_022_cli_onnx_model(self):
+    def test_019_cli_onnx_model(self):
         """
         Manually export an ONNX file, then feed it into the CLI
         """
@@ -871,7 +774,7 @@ def test_022_cli_onnx_model(self):
 
         assert_success_of_builds([build_name], cache_dir)
 
-    def test_023_cli_onnx_model_opset(self):
+    def test_020_cli_onnx_model_opset(self):
         """
         Manually export an ONNX file with a non-defualt opset, then feed it into the CLI
         """
@@ -904,7 +807,7 @@ def test_023_cli_onnx_model_opset(self):
 
         assert_success_of_builds([build_name], cache_dir)
 
-    def test_024_args_encode_decode(self):
+    def test_021_args_encode_decode(self):
         """
         Test the encoding and decoding of arguments that follow the
         ["arg1::[value1,value2]","arg2::value1","flag_arg"]' format
@@ -916,7 +819,7 @@ def test_024_args_encode_decode(self):
             reencoded_value == encoded_value
         ), f"input: {encoded_value}, decoded: {decoded_value}, reencoded_value: {reencoded_value}"
 
-    def test_025_benchmark_non_existent_file(self):
+    def test_022_benchmark_non_existent_file(self):
         # Ensure we get an error when benchmarking a non existent file
         with self.assertRaises(exceptions.ArgError):
             filename = "thou_shall_not_exist.py"
@@ -925,7 +828,7 @@ def test_025_benchmark_non_existent_file(self):
                 with patch.object(sys, "argv", testargs):
                     turnkeycli()
 
-    def test_026_benchmark_non_existent_file_prefix(self):
+    def test_023_benchmark_non_existent_file_prefix(self):
         # Ensure we get an error when benchmarking a non existent file
         with self.assertRaises(exceptions.ArgError):
             file_prefix = "non_existent_prefix_*.py"
@@ -934,7 +837,7 @@ def test_026_benchmark_non_existent_file_prefix(self):
                 with patch.object(sys, "argv", testargs):
                     turnkeycli()
 
-    def test_027_input_text_file(self):
+    def test_024_input_text_file(self):
         """
         Ensure that we can intake .txt files
         """
@@ -955,7 +858,7 @@ def test_027_input_text_file(self):
             builds_found == 3
         ), f"Expected 3 builds (1 for linear.py, 2 for linear2.py), but got {builds_found}."
 
-    def test_028_cli_timeout(self):
+    def test_025_cli_timeout(self):
         """
         Make sure that the --timeout option and its associated reporting features work.
 
@@ -1009,6 +912,103 @@ def test_028_cli_timeout(self):
             # the stats.yaml was created, which in turn means the CSV is empty
             pass
 
+    def test_026_cli_report(self):
+        # NOTE: this is not a unit test, it relies on other command
+        # If this test is failing, make sure the following tests are passing:
+        # - test_cli_corpus
+
+        test_scripts = common.test_scripts_dot_py.keys()
+
+        # Build the test corpus so we have builds to report
+        testargs = [
+            "turnkey",
+            "benchmark",
+            bash(f"{corpus_dir}/*.py"),
+            "--cache-dir",
+            cache_dir,
+        ]
+        with patch.object(sys, "argv", flatten(testargs)):
+            turnkeycli()
+
+        testargs = [
+            "turnkey",
+            "cache",
+            "report",
+            "--cache-dir",
+            cache_dir,
+        ]
+        with patch.object(sys, "argv", testargs):
+            turnkeycli()
+
+        # Read generated CSV file
+        summary_csv_path = report.get_report_name()
+        with open(summary_csv_path, "r", encoding="utf8") as summary_csv:
+            summary = list(csv.DictReader(summary_csv))
+
+        # Check if csv file contains all expected rows and columns
+        expected_cols = [
+            "model_name",
+            "author",
+            "class",
+            "parameters",
+            "hash",
+            "runtime",
+            "device_type",
+            "device",
+            "mean_latency",
+            "throughput",
+            "all_build_stages",
+            "completed_build_stages",
+        ]
+        linear_summary = summary[1]
+        assert len(summary) == len(test_scripts)
+        for elem in expected_cols:
+            assert (
+                elem in linear_summary
+            ), f"Couldn't find expected key {elem} in results spreadsheet"
+
+        # Check whether all rows we expect to be populated are actually populated
+        assert (
+            linear_summary["model_name"] == "linear2"
+        ), f"Wrong model name found {linear_summary['model_name']}"
+        assert (
+            linear_summary["author"] == "turnkey"
+        ), f"Wrong author name found {linear_summary['author']}"
+        assert (
+            linear_summary["class"] == "TwoLayerModel"
+        ), f"Wrong class found {linear_summary['model_class']}"
+        assert (
+            linear_summary["hash"] == "80b93950"
+        ), f"Wrong hash found {linear_summary['hash']}"
+        assert (
+            linear_summary["runtime"] == "ort"
+        ), f"Wrong runtime found {linear_summary['runtime']}"
+        assert (
+            linear_summary["device_type"] == "x86"
+        ), f"Wrong device type found {linear_summary['device_type']}"
+        assert (
+            float(linear_summary["mean_latency"]) > 0
+        ), f"latency must be >0, got {linear_summary['x86_latency']}"
+        assert (
+            float(linear_summary["throughput"]) > 100
+        ), f"throughput must be >100, got {linear_summary['throughput']}"
+
+        # Make sure the report.get_dict() API works
+        result_dict = report.get_dict(
+            summary_csv_path, ["all_build_stages", "completed_build_stages"]
+        )
+        for result in result_dict.values():
+            # All of the models should have exported to ONNX, so the "onnx_exported" value
+            # should be True for all of them
+            assert "export_pytorch" in yaml.safe_load(result["all_build_stages"])
+            assert (
+                "export_pytorch"
+                in yaml.safe_load(result["completed_build_stages"]).keys()
+            )
+            assert (
+                yaml.safe_load(result["completed_build_stages"])["export_pytorch"] > 0
+            )
+
 
 if __name__ == "__main__":
     unittest.main()