From ae775d5aeee1deb82ab9bde17bf87ed671b05da6 Mon Sep 17 00:00:00 2001 From: Samhita Alla Date: Wed, 6 Dec 2023 23:32:08 +0530 Subject: [PATCH] standard tests list generator (#1264) * standard tests list generator Signed-off-by: Samhita Alla * bash Signed-off-by: Samhita Alla * nit Signed-off-by: Samhita Alla * nit Signed-off-by: Samhita Alla * add torch Signed-off-by: Samhita Alla * add torch Signed-off-by: Samhita Alla * validate tests Signed-off-by: Samhita Alla * update validation logic Signed-off-by: Samhita Alla * lint Signed-off-by: Samhita Alla * debug Signed-off-by: Samhita Alla * debug Signed-off-by: Samhita Alla * debug Signed-off-by: Samhita Alla * debug Signed-off-by: Samhita Alla * debug Signed-off-by: Samhita Alla * debug Signed-off-by: Samhita Alla * modify regex Signed-off-by: Samhita Alla * debug Signed-off-by: Samhita Alla * remove re Signed-off-by: Samhita Alla * remove test.py Signed-off-by: Samhita Alla * debug Signed-off-by: Samhita Alla * debug Signed-off-by: Samhita Alla * debug Signed-off-by: Samhita Alla * remove test.py Signed-off-by: Samhita Alla * debug Signed-off-by: Samhita Alla * change regex Signed-off-by: Samhita Alla * debug Signed-off-by: Samhita Alla * add env Signed-off-by: Samhita Alla * do not truncate output Signed-off-by: Samhita Alla * search Signed-off-by: Samhita Alla * clean string Signed-off-by: Samhita Alla * cleanup Signed-off-by: Samhita Alla * incorporate suggestions by Eduardo Signed-off-by: Samhita Alla * remove comment Signed-off-by: Samhita Alla --------- Signed-off-by: Samhita Alla --- .github/workflows/checks.yml | 34 ++- .../advanced_composition/merge_sort.py | 2 + .../advanced_composition/subworkflows.py | 2 + flyte_tests.txt | 17 ++ flyte_tests_manifest.json | 199 +++++++++++------- flyte_tests_validate.py | 58 +++++ 6 files changed, 222 insertions(+), 90 deletions(-) create mode 100644 flyte_tests.txt create mode 100644 flyte_tests_validate.py diff --git a/.github/workflows/checks.yml b/.github/workflows/checks.yml index fd715dd3c..dd9502ade 100644 --- a/.github/workflows/checks.yml +++ b/.github/workflows/checks.yml @@ -122,10 +122,17 @@ jobs: name: Publish artifacts to github release runs-on: ubuntu-latest needs: [prerelease] + strategy: + matrix: + python-version: ["3.11"] steps: - uses: actions/checkout@v2 with: fetch-depth: "0" + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} - name: setup download artifact dir run: | mkdir download-artifact @@ -140,6 +147,7 @@ jobs: for i in */; do tar -czvf "../release-snacks/${i%/}.tar.gz" "$i" & done; wait cd .. && sudo rm -rf download-artifact/ cp flyte_tests_manifest.json release-snacks/flyte_tests_manifest.json + cp flyte_tests.txt release-snacks/flyte_tests.txt - name: Release test manifest uses: goreleaser/goreleaser-action@v2 with: @@ -203,35 +211,25 @@ jobs: - name: Install Python dependencies run: | python -m pip install --upgrade pip - pip install flytekit flytekitplugins-deck-standard + pip install flytekit flytekitplugins-deck-standard torch pip freeze - name: Checkout flytesnacks uses: actions/checkout@v3 with: repository: flyteorg/flytesnacks path: flytesnacks + - name: Verify existence of the tests + run: | + python flyte_tests_validate.py - name: Register specific tests run: | - for f in \ - basics/basics/hello_world.py \ - basics/basics/workflow.py \ - basics/basics/named_outputs.py \ - advanced_composition/advanced_composition/chain_entities.py \ - advanced_composition/advanced_composition/dynamics.py \ - advanced_composition/advanced_composition/map_task.py \ - advanced_composition/advanced_composition/subworkflows.py \ - data_types_and_io/data_types_and_io/dataclass.py \ - data_types_and_io/data_types_and_io/structured_dataset.py ; + while read -r line; do - # TODO: unpin version of flytekit once the FlyteFile bug is fixed pyflyte --config ./boilerplate/flyte/end2end/functional-test-config.yaml \ register \ --project flytesnacks \ --domain development \ - --image cr.flyte.org/flyteorg/flytekit:py3.11-1.10.0 \ + --image cr.flyte.org/flyteorg/flytekit:py3.11-latest \ --version ${{ env.FLYTESNACKS_VERSION }} \ - flytesnacks/examples/$f; - done - - name: End2End - run: | - make end2end_execute + flytesnacks/$line; + done < flyte_tests.txt diff --git a/examples/advanced_composition/advanced_composition/merge_sort.py b/examples/advanced_composition/advanced_composition/merge_sort.py index 342ff3306..55d28e194 100644 --- a/examples/advanced_composition/advanced_composition/merge_sort.py +++ b/examples/advanced_composition/advanced_composition/merge_sort.py @@ -29,6 +29,7 @@ # %% [markdown] # A simple split function that divides a list into two halves. + # %% @task def split(numbers: typing.List[int]) -> Tuple[typing.List[int], typing.List[int], int, int]: @@ -65,6 +66,7 @@ def merge(sorted_list1: typing.List[int], sorted_list2: typing.List[int]) -> typ # Generally speaking, the algorithm will recurse through the list, splitting it in half until it reaches a size that we # know is efficient enough to run locally. At which point it'll just use the python-builtin sorted function. + # %% [markdown] # This runs the sorting completely locally. It's faster and more efficient to do so if the entire list fits in memory. # %% diff --git a/examples/advanced_composition/advanced_composition/subworkflows.py b/examples/advanced_composition/advanced_composition/subworkflows.py index 07f4efa7e..be4eff792 100644 --- a/examples/advanced_composition/advanced_composition/subworkflows.py +++ b/examples/advanced_composition/advanced_composition/subworkflows.py @@ -87,6 +87,7 @@ def parent_wf(a: int) -> Tuple[int, str, str]: if __name__ == "__main__": print(f"Running parent_wf(a=3) {parent_wf(a=3)}") + # %% [markdown] # Interestingly, we can nest a workflow that has a subworkflow within a workflow. # Workflows can be simply composed from other workflows, even if they are standalone entities. Each of the @@ -164,6 +165,7 @@ def ext_workflow(my_input: str) -> Dict: "parent_workflow_execution", ) + # %% [markdown] # Define another task that returns the repeated keys (in our case, words) from a dictionary. # %% diff --git a/flyte_tests.txt b/flyte_tests.txt new file mode 100644 index 000000000..5de785ff6 --- /dev/null +++ b/flyte_tests.txt @@ -0,0 +1,17 @@ +examples/advanced_composition/advanced_composition/chain_entities.py +examples/advanced_composition/advanced_composition/conditions.py +examples/advanced_composition/advanced_composition/decorating_tasks.py +examples/advanced_composition/advanced_composition/decorating_workflows.py +examples/advanced_composition/advanced_composition/dynamics.py +examples/advanced_composition/advanced_composition/map_task.py +examples/advanced_composition/advanced_composition/waiting_for_external_inputs.py +examples/basics/basics/documenting_workflows.py +examples/basics/basics/hello_world.py +examples/basics/basics/named_outputs.py +examples/basics/basics/shell_task.py +examples/basics/basics/workflow.py +examples/data_types_and_io/data_types_and_io/dataclass.py +examples/data_types_and_io/data_types_and_io/enum_type.py +examples/data_types_and_io/data_types_and_io/file.py +examples/data_types_and_io/data_types_and_io/folder.py +examples/data_types_and_io/data_types_and_io/structured_dataset.py diff --git a/flyte_tests_manifest.json b/flyte_tests_manifest.json index 91339d19e..8e5590c37 100644 --- a/flyte_tests_manifest.json +++ b/flyte_tests_manifest.json @@ -1,154 +1,209 @@ -[{ +[ + { "name": "core", "priority": "P0", "path": "core", + "examples": [ + ["advanced_composition.chain_entities.chain_workflows_wf", {}], + ["advanced_composition.conditions.consume_outputs", { "my_input": 10.0 }], + ["advanced_composition.decorating_tasks.wf", { "x": 10 }], + ["advanced_composition.decorating_workflows.wf", { "x": 19.8 }], + ["advanced_composition.dynamics.wf", { "s1": "Pear", "s2": "Earth" }], + [ + "advanced_composition.map_task.my_map_workflow", + { "a": [1, 2, 3, 4, 5] } + ], + [ + "advanced_composition.waiting_for_external_inputs.sleep_wf", + { "num": 5 } + ], + ["basics.documenting_workflows.sphinx_docstring_wf", {}], + ["basics.hello_world.hello_world_wf", {}], + ["basics.named_outputs.simple_wf_with_named_outputs", {}], + ["basics.shell_task.shell_task_wf", {}], + ["basics.workflow.simple_wf", { "x": [1, 2, 3], "y": [1, 2, 3] }], + ["data_types_and_io.dataclass.dataclass_wf", { "x": 10, "y": 20 }], + ["data_types_and_io.enum_type.coffee_maker", { "coffee": "latte" }], + [ + "data_types_and_io.file.normalize_csv_file", + { + "csv_url": "https://people.sc.fsu.edu/~jburkardt/data/csv/biostats.csv", + "column_names": [ + "Name", + "Sex", + "Age", + "Heights (in)", + "Weight (lbs)" + ], + "columns_to_normalize": ["Age"] + } + ], + [ + "data_types_and_io.folder.download_and_normalize_csv_files", + { + "csv_urls": [ + "https://people.sc.fsu.edu/~jburkardt/data/csv/biostats.csv", + "https://people.sc.fsu.edu/~jburkardt/data/csv/faithful.csv" + ], + "columns_metadata": [ + ["Name", "Sex", "Age", "Heights (in)", "Weight (lbs)"], + ["Index", "Eruption length (mins)", "Eruption wait (mins)"] + ], + "columns_to_normalize_metadata": [["Age"], ["Eruption length (mins)"]] + } + ], + ["data_types_and_io.structured_dataset.simple_sd_wf", { "a": 42 }] + ], "exitCondition": { "exit_success": true, "exit_message": "" } - },{ + }, + { "name": "integrations-hive", "priority": "P1", "path": "examples/hive_plugin", "exitCondition": { "exit_success": true, - "exit_message": "" + "exit_message": "" } - },{ + }, + { "name": "integrations-k8s-spark", "priority": "P1", "path": "examples/k8s_spark_plugin", + "examples": [ + [ + "k8s_spark_plugin.pyspark_pi.my_spark", + { "triggered_date": "2023-11-21T18:58:01" } + ] + ], "exitCondition": { "exit_success": true, - "exit_message": "" + "exit_message": "" } - },{ + }, + { "name": "integrations-kfpytorch", "priority": "P1", "path": "examples/kfpytorch_plugin", + "examples": [["kfpytorch_plugin.pytorch_mnist.pytorch_training_wf", {}]], "exitCondition": { "exit_success": true, - "exit_message": "" + "exit_message": "" } - },{ + }, + { "name": "integrations-kftensorflow", "priority": "P1", "path": "examples/kftensorflow_plugin", + "examples": [ + ["kftensorflow_plugin.tf_mnist.mnist_tensorflow_workflow", {}] + ], "exitCondition": { "exit_success": true, - "exit_message": "" - } - },{ - "name": "integrations-pod", - "priority": "P1", - "path": "examples/k8s_pod_plugin", - "exitCondition": { - "exit_success": true, - "exit_message": "" + "exit_message": "" } - },{ + }, + { "name": "integrations-pandera", "priority": "P1", "path": "examples/pandera_plugin", + "examples": [["pandera_plugin.basic_schema_example.process_data", {}]], "exitCondition": { "exit_success": true, - "exit_message": "" - } - },{ - "name": "integrations-whylogs", - "priority": "P1", - "path": "examples/whylogs_plugin", - "exitCondition": { - "exit_success": true, - "exit_message": "" + "exit_message": "" } }, { "name": "integrations-modin", "priority": "P1", "path": "examples/modin_plugin", + "examples": [["modin_plugin.knn_classifier.pipeline", {}]], "exitCondition": { "exit_success": true, - "exit_message": "" + "exit_message": "" } - },{ + }, + { "name": "integrations-papermill", "priority": "P1", "path": "examples/papermill_plugin", + "examples": [ + ["papermill_plugin.simple.nb_to_python_wf", { "f": 3.1415926535 }] + ], "exitCondition": { "exit_success": true, - "exit_message": "" + "exit_message": "" } - },{ + }, + { "name": "integrations-greatexpectations", "priority": "P1", "path": "examples/greatexpectations_plugin", + "examples": [ + ["greatexpectations_plugin.task_example.simple_wf", {}], + ["greatexpectations_plugin.task_example.file_wf", {}], + ["greatexpectations_plugin.task_example.schema_wf", {}], + ["greatexpectations_plugin.task_example.runtime_wf", {}] + ], "exitCondition": { "exit_success": true, - "exit_message": "" + "exit_message": "" } - },{ - "name": "integrations-sagemaker-pytorch", - "priority": "P1", - "path": "examples/sagemaker_pytorch_plugin", + }, + { + "name": "case-studies-house-price-prediction", + "priority": "P2", + "path": "examples/house_price_prediction", "exitCondition": { "exit_success": true, - "exit_message": "" + "exit_message": "" } - },{ - "name": "integrations-sagemaker-training", - "priority": "P1", - "path": "examples/sagemaker_training_plugin", + }, + { + "name": "case-studies-pima-diabetes", + "priority": "P2", + "path": "examples/pima_diabetes", "exitCondition": { "exit_success": true, - "exit_message": "" + "exit_message": "" } - },{ - "name": "case-studies-house-price-prediction", - "priority": "P2", - "path": "examples/house_price_prediction", - "exitCondition": { - "exit_success": true, - "exit_message": "" - } - },{ - "name": "case-studies-pima-diabetes", - "priority": "P2", - "path": "examples/pima_diabetes", - "exitCondition": { - "exit_success": true, - "exit_message": "" - } - },{ - "name": "case-studies-mnist-classifier", - "priority": "P2", - "path": "examples/mnist_classifier", - "exitCondition": { - "exit_success": true, - "exit_message": "" - } - },{ + }, + { + "name": "case-studies-mnist-classifier", + "priority": "P2", + "path": "examples/mnist_classifier", + "exitCondition": { + "exit_success": true, + "exit_message": "" + } + }, + { "name": "case-studies-eda", "priority": "P2", "path": "examples/eda", "exitCondition": { "exit_success": true, "exit_message": "" - } - },{ + } + }, + { "name": "case-studies-feast-integration", "priority": "P2", "path": "examples/feast_integration", "exitCondition": { "exit_success": true, "exit_message": "" - } - },{ + } + }, + { "name": "case-studies-blast", "priority": "P2", "path": "examples/blast", "exitCondition": { "exit_success": true, "exit_message": "" + } } -}] +] diff --git a/flyte_tests_validate.py b/flyte_tests_validate.py new file mode 100644 index 000000000..12db7b33c --- /dev/null +++ b/flyte_tests_validate.py @@ -0,0 +1,58 @@ +import json +import os +import re +import subprocess + +if __name__ == "__main__": + file_list = "flyte_tests.txt" + + with open("flyte_tests_manifest.json", "r") as file: + data = json.load(file) + + examples = [ + (example[0], example[1]) for entry in data for example in entry.get("examples", []) if len(example) >= 1 + ] + + for file_name in open(file_list, "r").readlines(): + file_name = file_name.strip() + print(f"Processing file: {file_name}") + + # Retrieve the file path, including the name of the file and its immediate parent directory + directory_path = os.path.dirname(file_name).split(os.path.sep)[-1:] + file_path = ".".join(directory_path + [os.path.splitext(os.path.basename(file_name))[0]]) + + # Retrieve the workflow(s) + workflows = list(filter(lambda tup: file_path in tup[0], examples)) + + # Verify if there are any workflows present in the provided file path + if not workflows: + raise Exception("The file does not contain any workflows.") + + for workflow, params_dict in workflows: + # Use the `pyflyte run` command to execute the workflow + output_string = str(subprocess.run(["pyflyte", "run", file_name], capture_output=True, text=True).stdout) + + # Check if the workflow specified is present in the pyflyte run output + cleaned_string = re.sub(r"\x1b\[[0-9;]*[mG]", "", output_string) + just_the_workflow = workflow.split(".")[2] + if just_the_workflow in cleaned_string.split(): + print("Workflow found in the pyflyte run output.") + else: + raise Exception("Workflow not found in the pyflyte run output.") + + # Check if the specified parameters are valid + options_output = subprocess.run( + ["pyflyte", "run", file_name, just_the_workflow, "--help"], + capture_output=True, + text=True, + ).stdout + + params = params_dict.keys() + if not params: + print("No parameters found.") + elif any(re.findall(r"|".join(params), options_output, re.IGNORECASE)): + print("All parameters found.") + else: + raise Exception( + "There's a mismatch between the values accepted by the workflow and the ones you provided." + )