|
| 1 | +import os |
| 2 | +import sys |
| 3 | +import logging |
| 4 | +import torch |
| 5 | +import numpy as np |
| 6 | +from dgl.data import LegacyTUDataset |
| 7 | +import json |
| 8 | + |
| 9 | + |
| 10 | +def _load_check_mark(path:str): |
| 11 | + if os.path.exists(path): |
| 12 | + with open(path, 'r') as f: |
| 13 | + return json.load(f) |
| 14 | + else: |
| 15 | + return {} |
| 16 | + |
| 17 | +def _save_check_mark(path:str, marks:dict): |
| 18 | + with open(path, 'w') as f: |
| 19 | + json.dump(marks, f) |
| 20 | + |
| 21 | + |
| 22 | +def node_label_as_feature(dataset:LegacyTUDataset, mode="concat", save=True): |
| 23 | + """ |
| 24 | + Description |
| 25 | + ----------- |
| 26 | + Add node labels to graph node features dict |
| 27 | +
|
| 28 | + Parameters |
| 29 | + ---------- |
| 30 | + dataset : LegacyTUDataset |
| 31 | + The dataset object |
| 32 | + concat : str, optional |
| 33 | + How to add node label to the graph. Valid options are "add", |
| 34 | + "replace" and "concat". |
| 35 | + - "add": Directly add node_label to graph node feature dict. |
| 36 | + - "concat": Concatenate "feat" and "node_label" |
| 37 | + - "replace": Use "node_label" as "feat" |
| 38 | + Default: :obj:`"concat"` |
| 39 | + save : bool, optional |
| 40 | + Save the result dataset. |
| 41 | + Default: :obj:`True` |
| 42 | + """ |
| 43 | + # check if node label is not available |
| 44 | + if not os.path.exists(dataset._file_path("node_labels")) or len(dataset) == 0: |
| 45 | + logging.warning("No Node Label Data") |
| 46 | + return dataset |
| 47 | + |
| 48 | + # check if has cached value |
| 49 | + check_mark_name = "node_label_as_feature" |
| 50 | + check_mark_path = os.path.join( |
| 51 | + dataset.save_path, "info_{}_{}.json".format(dataset.name, dataset.hash)) |
| 52 | + check_mark = _load_check_mark(check_mark_path) |
| 53 | + if check_mark_name in check_mark \ |
| 54 | + and check_mark[check_mark_name] \ |
| 55 | + and not dataset._force_reload: |
| 56 | + logging.warning("Using cached value in node_label_as_feature") |
| 57 | + return dataset |
| 58 | + logging.warning("Adding node labels into node features..., mode={}".format(mode)) |
| 59 | + |
| 60 | + # check if graph has "feat" |
| 61 | + if "feat" not in dataset[0][0].ndata: |
| 62 | + logging.warning("Dataset has no node feature 'feat'") |
| 63 | + if mode.lower() == "concat": |
| 64 | + mode = "replace" |
| 65 | + |
| 66 | + # first read node labels |
| 67 | + DS_node_labels = dataset._idx_from_zero( |
| 68 | + np.loadtxt(dataset._file_path("node_labels"), dtype=int)) |
| 69 | + one_hot_node_labels = dataset._to_onehot(DS_node_labels) |
| 70 | + |
| 71 | + # read graph idx |
| 72 | + DS_indicator = dataset._idx_from_zero( |
| 73 | + np.genfromtxt(dataset._file_path("graph_indicator"), dtype=int)) |
| 74 | + node_idx_list = [] |
| 75 | + for idx in range(np.max(DS_indicator) + 1): |
| 76 | + node_idx = np.where(DS_indicator == idx) |
| 77 | + node_idx_list.append(node_idx[0]) |
| 78 | + |
| 79 | + # add to node feature dict |
| 80 | + for idx, g in zip(node_idx_list, dataset.graph_lists): |
| 81 | + node_labels_tensor = torch.tensor(one_hot_node_labels[idx, :]) |
| 82 | + if mode.lower() == "concat": |
| 83 | + g.ndata["feat"] = torch.cat( |
| 84 | + (g.ndata["feat"], node_labels_tensor), dim=1) |
| 85 | + elif mode.lower() == "add": |
| 86 | + g.ndata["node_label"] = node_labels_tensor |
| 87 | + else: # replace |
| 88 | + g.ndata["feat"] = node_labels_tensor |
| 89 | + |
| 90 | + if save: |
| 91 | + check_mark[check_mark_name] = True |
| 92 | + _save_check_mark(check_mark_path, check_mark) |
| 93 | + dataset.save() |
| 94 | + return dataset |
| 95 | + |
| 96 | + |
| 97 | +def degree_as_feature(dataset:LegacyTUDataset, save=True): |
| 98 | + """ |
| 99 | + Description |
| 100 | + ----------- |
| 101 | + Use node degree (in one-hot format) as node feature |
| 102 | +
|
| 103 | + Parameters |
| 104 | + ---------- |
| 105 | + dataset : LegacyTUDataset |
| 106 | + The dataset object |
| 107 | +
|
| 108 | + save : bool, optional |
| 109 | + Save the result dataset. |
| 110 | + Default: :obj:`True` |
| 111 | + """ |
| 112 | + # first check if already have such feature |
| 113 | + check_mark_name = "degree_as_feat" |
| 114 | + feat_name = "feat" |
| 115 | + check_mark_path = os.path.join( |
| 116 | + dataset.save_path, "info_{}_{}.json".format(dataset.name, dataset.hash)) |
| 117 | + check_mark = _load_check_mark(check_mark_path) |
| 118 | + |
| 119 | + if check_mark_name in check_mark \ |
| 120 | + and check_mark[check_mark_name] \ |
| 121 | + and not dataset._force_reload: |
| 122 | + logging.warning("Using cached value in 'degree_as_feature'") |
| 123 | + return dataset |
| 124 | + |
| 125 | + logging.warning("Adding node degree into node features...") |
| 126 | + min_degree = sys.maxsize |
| 127 | + max_degree = 0 |
| 128 | + for i in range(len(dataset)): |
| 129 | + degrees = dataset.graph_lists[i].in_degrees() |
| 130 | + min_degree = min(min_degree, degrees.min().item()) |
| 131 | + max_degree = max(max_degree, degrees.max().item()) |
| 132 | + |
| 133 | + vec_len = max_degree - min_degree + 1 |
| 134 | + for i in range(len(dataset)): |
| 135 | + num_nodes = dataset.graph_lists[i].num_nodes() |
| 136 | + node_feat = torch.zeros((num_nodes, vec_len)) |
| 137 | + degrees = dataset.graph_lists[i].in_degrees() |
| 138 | + node_feat[torch.arange(num_nodes), degrees - min_degree] = 1. |
| 139 | + dataset.graph_lists[i].ndata[feat_name] = node_feat |
| 140 | + |
| 141 | + if save: |
| 142 | + check_mark[check_mark_name] = True |
| 143 | + dataset.save() |
| 144 | + _save_check_mark(check_mark_path, check_mark) |
| 145 | + return dataset |
0 commit comments