diff --git a/core_collection/varia_collection/data_axs.json b/core_collection/varia_collection/data_axs.json index 6226cb70..94f26f11 100644 --- a/core_collection/varia_collection/data_axs.json +++ b/core_collection/varia_collection/data_axs.json @@ -14,6 +14,7 @@ "sysinfo": "sysinfo", "rule_matching_demonstrator": "rule_matching_demonstrator", "rule_matching_producer": "rule_matching_producer", - "rule_matching_advertiser": "rule_matching_advertiser" + "rule_matching_advertiser": "rule_matching_advertiser", + "graph" : "graph" } } diff --git a/core_collection/varia_collection/graph/README.md b/core_collection/varia_collection/graph/README.md new file mode 100644 index 00000000..f4a3c091 --- /dev/null +++ b/core_collection/varia_collection/graph/README.md @@ -0,0 +1,146 @@ +## Generating Hierarchy Graph +Set the target entry that we would like to draw a dependency graph for, e.g., image_classification_using_tf_py +``` +export TARGET=image_classification_using_tf_py +``` +### `AXS` command to generate the graph: +``` +axs byquery graph_output,target=${TARGET} +``` +![Alt text](image.png) +The figure shows a dependency graph generated for `image_classification_using_tf_py` entry. + +- Red : target entry +- Lightcoral: Parents of the target entry +- Blue: output node +- Lightblue: Parents of the output node + +If the run is successful, it should print `Graph is generated!` +``` +saved to '/home/saheli/work_collection/generated_by_graph_on_draw_a579763d98044530962cc967ac659b28/data_axs.json' +byname_entries: ['base_imagenet_experiment'] +Graph is generated! +['^', 'byname', 'generated_by_graph_on_draw_generated_by_graph_on_draw_a579763d98044530962cc967ac659b28'] +``` +The image of the graph rendered in vector graphics, `image.svg` and `.dot` file are generated under `axs byquery graph_output,target=${target} , get_path`. Open `image.svg` with a GUI interface to view the generated graph. + +The structure of the `.dot` file will look something like this: `image`. It can be interpreted as a normal text file. +``` +digraph { + node [shape=ellipse] + dpi=400 + subgraph cluster_0 { + style=dotted + label="Entry and Its Parent(s)" + image_classification_using_tf_py [color=red style=filled] + python_script -> image_classification_using_tf_py + base_benchmark_program -> image_classification_using_tf_py + base_benchmark_program [color=lightcoral style=filled] + python_script [color=lightcoral style=filled] + python_in_shell -> python_script + python_in_shell [color=lightcoral style=filled] + shell -> python_in_shell + shell [color=lightcoral style=filled] + } + output [color=blue style=filled] + image_classification_using_tf_py -> output + base_imagenet_experiment -> output + subgraph cluster_1 { + style=dotted + label="Parent(s) of the Output Entry" + base_imagenet_experiment [color=lightblue style=filled] + base_experiment -> base_imagenet_experiment + base_experiment [color=lightblue style=filled] + } +} +``` +### `AXS` command to print the hierarchy tree: +``` +axs byname graph , print_hierarchy llmcb_using_qaic_kilt +``` + +
+saheli@chai:~/axs/core_collection/varia_collection/graph$ axs byname graph , print_hierarchy llmcb_using_qaic_kilt
+output_entries_list: ['AS^IS', ['^', 'byname', 'base_llama2_loadgen_experiment'], ['^', 'byname', 'base_qaic_experiment'], ['^', 'byname', 'traced_kilt_entry']]
+{'traced_kilt_entry', 'base_llama2_loadgen_experiment', 'base_qaic_experiment'}
+output_entries_list: ['AS^IS', ['^', 'byname', 'base_mixtral_loadgen_experiment'], ['^', 'byname', 'base_qaic_experiment'], ['^', 'byname', 'traced_kilt_entry']]
+{'traced_kilt_entry', 'base_llama2_loadgen_experiment', 'base_mixtral_loadgen_experiment', 'base_qaic_experiment'}
+llmcb_using_qaic_kilt
+|
++-/home/saheli/axs/core_collection/essentials_collection
+    shell
+|
++-/home/saheli/work_collection/axs2kilt-dev
+    base_llmcb_kilt_program
+    |
+    +-/home/saheli/work_collection/axs2kilt-dev
+        base_kilt_program
+        |
+        +-/home/saheli/work_collection/axs2mlperf
+            base_loadgen_program
+            |
+            +-/home/saheli/work_collection/axs2mlperf
+                cpufreq_support
+|
++-/home/saheli/work_collection/axs2mlperf
+    base_loadgen_program
+    |
+    +-/home/saheli/work_collection/axs2mlperf
+        cpufreq_support
+|
++-/home/saheli/work_collection/axs2qaic-dev
+    base_qaic_program
+    |
+    +-/home/saheli/work_collection/axs2mlperf
+        base_loadgen_program
+        |
+        +-/home/saheli/work_collection/axs2mlperf
+            cpufreq_support
+|
++-/home/saheli/work_collection/axs2qaic-dev
+    qaic_sdk_info
+|
+-->/home/saheli/work_collection/axs2kilt-dev :: Output Parents
+    traced_kilt_entry
+|
+-->/home/saheli/work_collection/axs2mlperf :: Output Parents
+    base_llama2_loadgen_experiment
+    |
+    +-/home/saheli/work_collection/axs2mlperf
+        base_loadgen_experiment
+|
+-->/home/saheli/work_collection/axs2mlperf :: Output Parents
+    base_mixtral_loadgen_experiment
+    |
+    +-/home/saheli/work_collection/axs2mlperf
+        base_loadgen_experiment
+|
+-->/home/saheli/work_collection/axs2qaic-dev :: Output Parents
+    base_qaic_experiment
+Tree printed successfully!
+
+ +## Running tests +The next step is to run the tests. + +### Step 1: +The first step is to run the `create_json.py` to generate two json files containing dictionaries containing all the contained entries obtained as keys from `bert`, `image_classification`, `object_detection` folders and corresponding values as the `_parent_entries` and `output_parent_entries` for each entry. +``` +python create_json.py +``` +
+output_parent_entries_dict.json  parent_entries_dict.json  
+ +### Step 2: +Next, to run the `test_parent_and_output_entries.py` file +``` +pytest test_parent_and_output_entries.py +``` +
+==================================================================================== 2 passed in 0.01s =====================================================================================
+collected 2 items
+
+test_parent_and_output_entries.py::test_compare_dot_and_json_for_target PASSED
+test_parent_and_output_entries.py::test_compare_dot_and_json_for_target_output PASSED
+ 
+ diff --git a/core_collection/varia_collection/graph/code_axs.py b/core_collection/varia_collection/graph/code_axs.py new file mode 100644 index 00000000..a8ef4c4f --- /dev/null +++ b/core_collection/varia_collection/graph/code_axs.py @@ -0,0 +1,297 @@ +import re +import graphviz +import json +import networkx as nx +import subprocess +import os +from kernel import default_kernel as ak +from networkx.drawing.nx_pydot import read_dot + + + +initial_root_visited = False + +def draw(target, return_this_entry=None, __entry__=None): + + """ Generate Dependency Graph for a given entry. + + Usage examples: + axs byname graph , draw bert_using_onnxrt_py + axs byquery graph_output,target=image_classification_using_tf_py + """ + + global initial_root_visited + initial_root_visited = False + output = False + output_parents_data = "" + dest_dir = return_this_entry.get_path() + + target_entry = __entry__.get_kernel().byname(target) + + if target_entry: + get_path = target_entry.get_path() + file_path = f'{get_path}/data_axs.json' + target_data = target_entry.own_data() + output_entries = target_entry.get("output_entry_parents") + + if output_entries: + # Extract all 'byname' entries from "output_entry_parents" as objects to byname as key + byname_entries = extract_byname_entries(output_entries) + + for key, val in target_data.items(): + if "_parent_entries" in str(val): + output = True + output_parents_data = val + elif "tags" in str(val): + output = True + elif output_entries: + output = True + + f = graphviz.Digraph(format='png') + f.attr('node', shape='ellipse') + f.attr(dpi='400') + f.engine = 'dot' + + with f.subgraph(name='cluster_0') as c: + c.attr(style='dotted') + c.attr(label='Entry and Its Parent(s)') + dfs(target, c, __entry__, is_output=False) + + if output: + f.node("output", style='filled', color='blue') + f.edge(target, "output") + + if output_parents_data: + info = find_parent(output_parents_data) + output_parents = find_byname(file_path,obj=info) + #print("output_parents", output_parents) + for output_parent in output_parents: + with f.subgraph(name='cluster_1') as c: + c.attr(style='dotted') + c.attr(label='Parent(s) of the Output Entry') + dfs(output_parent, c, __entry__, is_output=True) + f.edge(output_parent, "output") + target_entry = output_parent + else: + target_entry = None + + elif output_entries and byname_entries: + for byname_entry in byname_entries: + with f.subgraph(name=f'cluster_1') as c: + c.attr(style='dotted') + c.attr(label=f'Parent(s) of the Output Entry') + dfs(byname_entry, c, __entry__, is_output=True) + f.edge(byname_entry, "output") + + f.render(filename=f"{dest_dir}/image", view=False, cleanup=False) + print("Graph is generated!") + + return return_this_entry + else: + print("ERROR! Provide correct entry name!") + +def dfs(root, f, __entry__, is_output=False): + + """ Depth First Search(DFS) for a given node. + """ + + global initial_root_visited + stack = [] + visited = set() + + cur_target_entry = __entry__.get_kernel().byname(root) + if not cur_target_entry: + print("ERROR!") + return + + stack.append((cur_target_entry, True)) # Using True to signify that this is the initial root node + + while stack: + cur_target_entry, is_initial_root = stack.pop() + cur_target = cur_target_entry.get_name() + cur_target_path = cur_target_entry.get_path() + + if cur_target in visited: + continue + + if not initial_root_visited: + color = 'red' + initial_root_visited = True + elif is_output: + color = 'lightblue' + else: + color = 'lightcoral' + + f.node(cur_target, label=f'{cur_target}\n{cur_target_path}', color=color, style='filled') + visited.add(cur_target) + + parents = cur_target_entry.get("_parent_entries") + if parents: + for parent in parents: + if isinstance(parent, str): + p = __entry__.get_kernel().byname(parent) + else: + p = parent + if not p: + continue + stack.append((p, False)) # Using False to signify that this is not the initial root node + f.edge(p.get_name(), cur_target) + parent_path = p.get_path() + f.node(p.get_name(), label=f'{p.get_name()}\n{parent_path}', style='filled') + + return f + +def find_parent(obj): + items = find_key(obj, "_parent_entries") + return items + +def find_byname(file_path, obj=None): + obj=process_json(file_path) + items = find_key(obj, "byname") + #print("items",items) + return [list(item)[2] for item in items] + +def find_key(obj, key): + matches = [] + if isinstance(obj, dict): + for k, v in obj.items(): + if k == key: + return v + matches.extend(find_key(v, key)) + result = find_key(v, key) + if result is not None: + return result + elif isinstance(obj, list): + if key == "_parent_entries" and re.search(r"^\[(?:'|\")_parent_entries(.*)", str(obj)): + matches.append(obj) + if key == "byname" and re.search(r"^\[(?:'|\")\^(?:'|\")(?:\s*),(?:\s*)(?:'|\")byname(.*)", str(obj)): + matches.append(obj) + for item in obj: + matches.extend(find_key(item, key)) + + return matches + +def process_json(file_path): + with open(file_path) as f: + obj = json.load(f) + required_data = {key: obj[key] for key in ['output_file_path', 'output_entry'] if key in obj} + parents = find_parent(required_data) + return parents + +def extract_byname_entries(output_entries): + byname_entries = [] + for item in output_entries: + if isinstance(item, list) and 'byname' in item: + index = item.index('byname') + 1 + if index < len(item): + byname_entries.append(item[index]) + return byname_entries + +def draw_collection(collection_name, __entry__=None): + """ Generate Dependency Graph for all entries in the collection. + + Usage examples: + axs byname graph_collection, draw_collection work_collection + """ + collection_entry = __entry__.get_kernel().byname(collection_name) + + if collection_name!=None: + collection_path = collection_entry.get_path() + draw(collection_name, return_this_entry=collection_entry, __entry__=collection_entry) + image_path = f'{collection_path}/image.png' + dot_file_path = f'{collection_path}/image' + print("image_path: ", image_path) + print("dot_file_path: ", dot_file_path) + try: + cmd = ['graph-easy', dot_file_path] + result = subprocess.run(cmd, capture_output=True, text=True) + + # Check if the command was successful + if result.returncode == 0: + print("Graph-Easy output:") + print(result.stdout) + else: + print(f"Graph-Easy failed with error code {result.returncode}") + print(result.stderr) + + except FileNotFoundError: + print("graph-easy command not found. Please ensure it is installed.") + except Exception as e: + print(f"An error occurred: {e}") + + return collection_entry + +def print_hierarchy(entry_name, __entry__, indent_level=0, d=None, output=False): + """ + Recursive function to print the entry, parent hierarchy in a tree-like structure with max_depth 'd'. + + Usage examples: + axs byname graph , print_hierarchy llmcb_using_qaic_kilt + + """ + entry = __entry__.get_kernel().byname(entry_name) + + if not entry: + print("Entry not found!") + return + + else: + data = entry.own_data() + # Collect all output-related fields + output_entries_fields = [key for key in data.keys() if key.startswith("output_entry_parents_")] + + output_entries = {} + for field in output_entries_fields: + output_entries[field] = entry.get(field) + + for key, val in data.items(): + if "_parent_entries" in str(val): + output = True + elif "tags" in str(val): + output = True + elif output_entries: + output = True + + byname_entries = set() # For now, only unique parent entries are kept in the set + if output_entries: + for field, output_entries_list in output_entries.items(): + print(f"output_entries_list: {output_entries_list}") + byname_entries.update(extract_byname_entries(output_entries_list)) + print(byname_entries) + + + base_indent = " " * indent_level + output_indent = "--> " * indent_level if output_entries else base_indent + + if d is not None and indent_level >= d: + return + + print(f"{base_indent}{entry_name}") + + parents = entry.get("_parent_entries") + + # If there are parents, recursively print them + if parents: + for parent in parents: + parent_name = parent if isinstance(parent, str) else parent.get_name() + parent_path = parent.get_path() + parent_dir = os.path.dirname(parent_path) + print(f"{base_indent}|") + print(f"{base_indent}+-{parent_dir}") + print_hierarchy(parent_name, __entry__, indent_level + 1, d=d) + + + + # If there are outputs, print their hierarchy + if output: + if output_entries: + for entry in byname_entries: + output_name = entry if isinstance(entry, str) else entry.get_name() + output_entry = __entry__.get_kernel().byname(entry) + output_path = output_entry.get_path() + output_dir = os.path.dirname(output_path) + print(f"{output_indent}|") + print(f"{output_indent}-->{output_dir} :: Output Parents") + print_hierarchy(output_name, __entry__, indent_level + 1, d=d, output=True) + + return "Tree printed successfully!" diff --git a/core_collection/varia_collection/graph/create_json.py b/core_collection/varia_collection/graph/create_json.py new file mode 100644 index 00000000..809ce14a --- /dev/null +++ b/core_collection/varia_collection/graph/create_json.py @@ -0,0 +1,88 @@ +import json +import os + +base_directory = os.path.expanduser('~/axs/core_collection/workflows_collection') + +def get_contained_entries_keys_from_multiple_locations(): + folders = ['bert', 'image_classification', 'object_detection'] + keys_dict = {} + workflows_directory = os.path.expanduser('~/axs/core_collection/workflows_collection') + + for folder in folders: + for root, dirs, files in os.walk(os.path.join(workflows_directory, folder)): + if 'data_axs.json' in files: + json_file_path = os.path.join(root, 'data_axs.json') + + print(f"Checking existence of {json_file_path}") # Debug line + + with open(json_file_path, 'r') as f: + data = json.load(f) + keys = list(data.get('contained_entries', {}).keys()) + + if folder in keys_dict: + keys_dict[folder].extend(keys) + else: + keys_dict[folder] = keys + else: + print(f"The JSON file at {os.path.join(root, 'data_axs.json')} doesn't exist.") + + return keys_dict + +contained_entries_keys_dict = get_contained_entries_keys_from_multiple_locations() + +key_json_paths = [] + +for folder, keys in contained_entries_keys_dict.items(): + for key in keys: + key_json_path = os.path.join(base_directory,folder, key, 'data_axs.json') + key_json_paths.append(key_json_path) + +def read_entries(key_json_paths, entry_key): + entries_dict = {} + for key_json_path in key_json_paths: + if os.path.exists(key_json_path): + with open(key_json_path, 'r') as f: + data = json.load(f) + key_name = os.path.basename(os.path.dirname(key_json_path)) + entries_dict[key_name] = [entry[-1] for entry in data.get('_parent_entries', [])] + else: + print(f"The JSON file at {key_json_path} doesn't exist.") + return entries_dict + +# Read parent entries +parent_entries_dict = read_entries(key_json_paths, '_parent_entries') + +def read_output_parent_entries(key_json_paths): + output_parent_entries_dict = {} + for key_json_path in key_json_paths: + if os.path.exists(key_json_path): + with open(key_json_path, 'r') as f: + data = json.load(f) + output_key_name = os.path.basename(os.path.dirname(key_json_path)) + + final_entries = [] + for entry in data.get('output_entry_parents', []): + if isinstance(entry, list) and 'byname' in entry: + index = entry.index('byname') + 1 + if index < len(entry): + final_entries.append(entry[index]) + + if final_entries: + output_parent_entries_dict[output_key_name] = final_entries + else: + output_parent_entries_dict[output_key_name] = data.get('output_entry_parents', []) + + else: + print(f"The JSON file at {key_json_path} doesn't exist.") + + return output_parent_entries_dict + +# Read output parent entries +output_parent_entries_dict = read_output_parent_entries(key_json_paths) + +# Save to a JSON file +with open('parent_entries_dict.json', 'w') as f: + json.dump(parent_entries_dict, f, indent=4) + +with open('output_parent_entries_dict.json', 'w') as f: + json.dump(output_parent_entries_dict, f, indent=4) diff --git a/core_collection/varia_collection/graph/data_axs.json b/core_collection/varia_collection/graph/data_axs.json new file mode 100644 index 00000000..f8705a7f --- /dev/null +++ b/core_collection/varia_collection/graph/data_axs.json @@ -0,0 +1,15 @@ +{ + "_producer_rules": [ + [ [ "graph_output" ], [["draw"]], { }, [ "target"]]], + "target" : "shell", + "return_this_entry": [ "^^", "execute", [[ + [ "get", "__record_entry__" ], + [ "attach", [ "^", "work_collection" ] ], + [ "plant", [ "^^", "substitute", [[ + "tags", [ "graph_output"], + "target", "#{target}#" + ]] ] ], + [ "save" ] + ]] ] + +} \ No newline at end of file diff --git a/core_collection/varia_collection/graph/image.png b/core_collection/varia_collection/graph/image.png new file mode 100644 index 00000000..a3121db9 Binary files /dev/null and b/core_collection/varia_collection/graph/image.png differ diff --git a/core_collection/varia_collection/graph/test_parent_and_output_entries.py b/core_collection/varia_collection/graph/test_parent_and_output_entries.py new file mode 100644 index 00000000..5fa24462 --- /dev/null +++ b/core_collection/varia_collection/graph/test_parent_and_output_entries.py @@ -0,0 +1,78 @@ +import re +import json +import pytest +import subprocess + +def read_dot_file(file_path): + with open(file_path, 'r') as f: + return f.read() + +def parse_dot_content(dot_content, target): + parent_nodes = [] + pattern = re.compile(r"(\w+)\s*->\s*(\w+)") + for match in pattern.findall(dot_content): + parent, child = match + if child == target: + parent_nodes.append(parent) + return parent_nodes + +def read_cluster_1_content(dot_content): + cluster_1_pattern = re.compile(r'subgraph cluster_1 {([\s\S]*?)}\n', re.MULTILINE) + match = cluster_1_pattern.search(dot_content) + if match: + return match.group(1) + else: + return None + +def parse_output_dot_content(cluster_1_content, target): + output_parent_nodes = [] + output_parent_nodes.append('output') + pattern = re.compile(r"(\w+)\s*->\s*(\w+)") + + for match in pattern.findall(cluster_1_content): # Using cluster_1_content here + parent, child = match + if child: + output_parent_nodes.append(child) + print("output",output_parent_nodes) + + return output_parent_nodes + +def get_actual_path(): + command = "axs byquery graph_output,target=image_classification_using_tf_py , get_path" + result = subprocess.run(command, stdout=subprocess.PIPE, shell=True, text=True) + return result.stdout.strip() + +dot_file_path = get_actual_path() + "/image" +dot_content = read_dot_file(dot_file_path) +cluster_1_content = read_cluster_1_content(dot_content) +print("cluster_1_content",cluster_1_content) + +json_file_path = "parent_entries_dict.json" +json_file_path_output = "output_parent_entries_dict.json" + +with open(json_file_path, 'r') as f: + json_data = json.load(f) + +with open(json_file_path_output, 'r') as f: + json_data_output_entries = json.load(f) + +target = "image_classification_using_tf_py" +dot_relationships = {target: parse_dot_content(dot_content, target)} +dot_output_relationships = {target: parse_output_dot_content(cluster_1_content, target)} + +def test_compare_dot_and_json_for_target(): + assert set(json_data.get(target, [])) == set(dot_relationships.get(target, [])) + +def check_output_child_exists(dot_content): + pattern = re.compile(r"output\s+\[color=blue style=filled\]") + match = pattern.search(dot_content) + return bool(match) + +def test_compare_dot_and_json_for_target_output(): + assert set(json_data_output_entries.get(target, [])).issubset(set(dot_output_relationships.get(target, []))) + + # Assertion to ensure 'output' exists in cluster_1 if output_parent_entries are present. + if json_data_output_entries.get(target, []): + assert check_output_child_exists(dot_content) + +pytest.main(["-v", "-s"])