From 8f9e6393b3f4bfcb8e2c5c18b78c65ddeaa17ef2 Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Wed, 14 Aug 2024 02:35:23 +0000 Subject: [PATCH 01/39] change a variable --- python/dgl/distributed/partition.py | 93 ++++++++++++++++++----------- 1 file changed, 57 insertions(+), 36 deletions(-) diff --git a/python/dgl/distributed/partition.py b/python/dgl/distributed/partition.py index 73ea48959597..fd0fcae9d9c2 100644 --- a/python/dgl/distributed/partition.py +++ b/python/dgl/distributed/partition.py @@ -1105,7 +1105,7 @@ def get_homogeneous(g, balance_ntypes): inner_node_mask = _get_inner_node_mask(parts[i], ntype_id) val.append( F.as_scalar(F.sum(F.astype(inner_node_mask, F.int64), 0)) - ) + )#note inner_node_mask(tensor[n,bool])->tensor[n,int64]->sum->scalar, compute the num of one partition inner_nids = F.boolean_mask( parts[i].ndata[NID], inner_node_mask ) @@ -1115,7 +1115,7 @@ def get_homogeneous(g, balance_ntypes): int(F.as_scalar(inner_nids[-1])) + 1, ] ) - val = np.cumsum(val).tolist() + val = np.cumsum(val).tolist()# note computing the cumulative sum of array elements. assert val[-1] == g.num_nodes(ntype) for etype in g.canonical_etypes: etype_id = g.get_etype_id(etype) @@ -1135,7 +1135,7 @@ def get_homogeneous(g, balance_ntypes): [int(inner_eids[0]), int(inner_eids[-1]) + 1] ) val = np.cumsum(val).tolist() - assert val[-1] == g.num_edges(etype) + assert val[-1] == g.num_edges(etype)# note assure the tot graph can be used else: node_map_val = {} edge_map_val = {} @@ -1305,32 +1305,52 @@ def get_homogeneous(g, balance_ntypes): part_dir = os.path.join(out_path, "part" + str(part_id)) node_feat_file = os.path.join(part_dir, "node_feat.dgl") edge_feat_file = os.path.join(part_dir, "edge_feat.dgl") - part_graph_file = os.path.join(part_dir, "graph.dgl") - part_metadata["part-{}".format(part_id)] = { - "node_feats": os.path.relpath(node_feat_file, out_path), - "edge_feats": os.path.relpath(edge_feat_file, out_path), - "part_graph": os.path.relpath(part_graph_file, out_path), - } + os.makedirs(part_dir, mode=0o775, exist_ok=True) save_tensors(node_feat_file, node_feats) save_tensors(edge_feat_file, edge_feats) - sort_etypes = len(g.etypes) > 1 - _save_graphs( - part_graph_file, - [part], - formats=graph_formats, - sort_etypes=sort_etypes, + #save + if use_graphbolt: + part_metadata["part-{}".format(part_id)] = { + "node_feats": os.path.relpath(node_feat_file, out_path), + "edge_feats": os.path.relpath(edge_feat_file, out_path), + } + else: + part_graph_file = os.path.join(part_dir, "graph.dgl") + + part_metadata["part-{}".format(part_id)] = { + "node_feats": os.path.relpath(node_feat_file, out_path), + "edge_feats": os.path.relpath(edge_feat_file, out_path), + "part_graph": os.path.relpath(part_graph_file, out_path), + } + sort_etypes = len(g.etypes) > 1 + _save_graphs( + part_graph_file, + [part], + formats=graph_formats, + sort_etypes=sort_etypes, + ) + + + part_config = os.path.join(out_path, graph_name + ".json") + if use_graphbolt: + kwargs["graph_formats"] = graph_formats + dgl_partition_to_graphbolt( + part_config, + parts=parts, + part_meta=part_metadata, + **kwargs, ) + else: + _dump_part_config(part_config, part_metadata) + print( "Save partitions: {:.3f} seconds, peak memory: {:.3f} GB".format( time.time() - start, get_peak_mem() ) ) - part_config = os.path.join(out_path, graph_name + ".json") - _dump_part_config(part_config, part_metadata) - num_cuts = sim_g.num_edges() - tot_num_inner_edges if num_parts == 1: num_cuts = 0 @@ -1340,13 +1360,6 @@ def get_homogeneous(g, balance_ntypes): ) ) - if use_graphbolt: - kwargs["graph_formats"] = graph_formats - dgl_partition_to_graphbolt( - part_config, - **kwargs, - ) - if return_mapping: return orig_nids, orig_eids @@ -1392,9 +1405,9 @@ def init_type_per_edge(graph, gpb): etype_ids = gpb.map_to_per_etype(graph.edata[EID])[0] return etype_ids - -def gb_convert_single_dgl_partition( +def gb_convert_single_dgl_partition(# TODO change this part_id, + parts, graph_formats, part_config, store_eids, @@ -1427,14 +1440,18 @@ def gb_convert_single_dgl_partition( "Running in debug mode which means all attributes of DGL partitions" " will be saved to the new format." ) - + part_meta = _load_part_config(part_config) num_parts = part_meta["num_parts"] - graph, _, _, gpb, _, _, _ = load_partition( - part_config, part_id, load_feats=False - ) - _, _, ntypes, etypes = load_partition_book(part_config, part_id) + if parts!=None: + assert len(parts)==num_parts + graph=parts[part_id] + else: + graph, _, _, gpb, _, _, _ = load_partition( + part_config, part_id, load_feats=False + ) + gpb, _, ntypes, etypes = load_partition_book(part_config, part_id) is_homo = is_homogeneous(ntypes, etypes) node_type_to_id = ( None if is_homo else {ntype: ntid for ntid, ntype in enumerate(ntypes)} @@ -1503,7 +1520,7 @@ def gb_convert_single_dgl_partition( indptr, dtype=indices.dtype ) - # Cast various data to minimum dtype. + # Cast various data to minimum dtype.#note convert to minimun dtype # Cast 1: indptr. indptr = _cast_to_minimum_dtype(graph.num_edges(), indptr) # Cast 2: indices. @@ -1552,7 +1569,6 @@ def gb_convert_single_dgl_partition( return os.path.relpath(csc_graph_path, os.path.dirname(part_config)) # Update graph path. - def dgl_partition_to_graphbolt( part_config, *, @@ -1561,7 +1577,10 @@ def dgl_partition_to_graphbolt( store_inner_edge=False, graph_formats=None, n_jobs=1, -): + parts=None, + part_meta=None +):# note + """Convert partitions of dgl to FusedCSCSamplingGraph of GraphBolt. This API converts `DGLGraph` partitions to `FusedCSCSamplingGraph` which is @@ -1598,7 +1617,8 @@ def dgl_partition_to_graphbolt( "Running in debug mode which means all attributes of DGL partitions" " will be saved to the new format." ) - part_meta = _load_part_config(part_config) + if part_meta==None: + part_meta = _load_part_config(part_config) new_part_meta = copy.deepcopy(part_meta) num_parts = part_meta["num_parts"] @@ -1615,6 +1635,7 @@ def dgl_partition_to_graphbolt( convert_with_format = partial( gb_convert_single_dgl_partition, graph_formats=graph_formats, + parts=parts, part_config=part_config, store_eids=store_eids, store_inner_node=store_inner_node, From bfeb3b454e4dc928ed6e69d2e5f38976c9d83968 Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Tue, 20 Aug 2024 09:35:36 +0000 Subject: [PATCH 02/39] modify partition test case --- tests/distributed/test_partition.py | 641 +++++++++++++++++++++------- 1 file changed, 485 insertions(+), 156 deletions(-) diff --git a/tests/distributed/test_partition.py b/tests/distributed/test_partition.py index 5fb121750e01..0f2425cb054d 100644 --- a/tests/distributed/test_partition.py +++ b/tests/distributed/test_partition.py @@ -5,10 +5,12 @@ import dgl import dgl.backend as F +import dgl.sparse as dglsp import numpy as np import pytest import torch as th from dgl import function as fn +from dgl.base import NTYPE from dgl.distributed import ( dgl_partition_to_graphbolt, load_partition, @@ -35,12 +37,19 @@ from utils import reset_envs -def _verify_partition_data_types(part_g): - for k, dtype in RESERVED_FIELD_DTYPE.items(): - if k in part_g.ndata: - assert part_g.ndata[k].dtype == dtype - if k in part_g.edata: - assert part_g.edata[k].dtype == dtype +def _verify_partition_data_types(part_g, use_graphbolt=False): + if not use_graphbolt: + for k, dtype in RESERVED_FIELD_DTYPE.items(): + if k in part_g.ndata: + assert part_g.ndata[k].dtype == dtype + if k in part_g.edata: + assert part_g.edata[k].dtype == dtype + else: + for k, dtype in RESERVED_FIELD_DTYPE.items(): + if k in part_g.node_attributes: + assert part_g.node_attributes[k].dtype == dtype + if k in part_g.edge_attributes: + assert part_g.edge_attributes[k].dtype == dtype def _verify_partition_formats(part_g, formats): @@ -81,11 +90,58 @@ def create_random_hetero(): return dgl.heterograph(edges, num_nodes) -def verify_hetero_graph(g, parts): +def verify_hetero_graph(g, parts, use_graphbolt=False): + if use_graphbolt: + num_nodes = {ntype: 0 for ntype in g.ntypes} + num_edges = {etype: 0 for etype in g.canonical_etypes} + for part in parts: + for etype in g.canonical_etypes: + etype_id = g.get_etype_id(etype) + inner_edge_mask = _get_inner_edge_mask( + part, etype_id, use_graphbolt + ) + num_inner_edges = F.sum(F.astype(inner_edge_mask, F.int64), 0) + num_edges[etype] += num_inner_edges + + # Verify the number of edges are correct. + for etype in g.canonical_etypes: + print( + "edge {}: {}, {}".format( + etype, g.num_edges(etype), num_edges[etype] + ) + ) + assert g.num_edges(etype) == num_edges[etype] + + nids = {ntype: [] for ntype in g.ntypes} + eids = {etype: [] for etype in g.canonical_etypes} + for part in parts: + eid = th.arange(len(part.edge_attributes[dgl.EID])) + etype_arr = F.gather_row(part.type_per_edge, eid) + eid_type = F.gather_row(part.edge_attributes[dgl.EID], eid) + for etype in g.canonical_etypes: + etype_id = g.get_etype_id(etype) + eids[etype].append( + F.boolean_mask(eid_type, etype_arr == etype_id) + ) + # Make sure edge Ids fall into a range. + inner_edge_mask = _get_inner_edge_mask( + part, etype_id, use_graphbolt + ) + inner_eids = np.sort( + F.asnumpy( + F.boolean_mask( + part.edge_attributes[dgl.EID], inner_edge_mask + ) + ) + ) + assert np.all( + inner_eids == np.arange(inner_eids[0], inner_eids[-1] + 1) + ) + return + num_nodes = {ntype: 0 for ntype in g.ntypes} num_edges = {etype: 0 for etype in g.canonical_etypes} for part in parts: - assert len(g.ntypes) == len(F.unique(part.ndata[dgl.NTYPE])) assert len(g.canonical_etypes) == len(F.unique(part.edata[dgl.ETYPE])) for ntype in g.ntypes: ntype_id = g.get_ntype_id(ntype) @@ -161,47 +217,107 @@ def verify_hetero_graph(g, parts): def verify_graph_feats( - g, gpb, part, node_feats, edge_feats, orig_nids, orig_eids + g, + gpb, + part, + node_feats, + edge_feats, + orig_nids, + orig_eids, + use_graphbolt=False, ): - for ntype in g.ntypes: - ntype_id = g.get_ntype_id(ntype) - inner_node_mask = _get_inner_node_mask(part, ntype_id) - inner_nids = F.boolean_mask(part.ndata[dgl.NID], inner_node_mask) - ntype_ids, inner_type_nids = gpb.map_to_per_ntype(inner_nids) - partid = gpb.nid2partid(inner_type_nids, ntype) - assert np.all(F.asnumpy(ntype_ids) == ntype_id) - assert np.all(F.asnumpy(partid) == gpb.partid) - - orig_id = orig_nids[ntype][inner_type_nids] - local_nids = gpb.nid2localnid(inner_type_nids, gpb.partid, ntype) - - for name in g.nodes[ntype].data: - if name in [dgl.NID, "inner_node"]: - continue - true_feats = F.gather_row(g.nodes[ntype].data[name], orig_id) - ndata = F.gather_row(node_feats[ntype + "/" + name], local_nids) - assert np.all(F.asnumpy(ndata == true_feats)) + if use_graphbolt: + for ntype in g.ntypes: + ntype_id = g.get_ntype_id(ntype) + inner_node_mask = _get_inner_node_mask( + part, ntype_id, use_graphbolt + ) + inner_nids = F.boolean_mask( + part.node_attributes[dgl.NID], inner_node_mask + ) + ntype_ids, inner_type_nids = gpb.map_to_per_ntype(inner_nids) + partid = gpb.nid2partid(inner_type_nids, ntype) + assert np.all(F.asnumpy(ntype_ids) == ntype_id) + assert np.all(F.asnumpy(partid) == gpb.partid) - for etype in g.canonical_etypes: - etype_id = g.get_etype_id(etype) - inner_edge_mask = _get_inner_edge_mask(part, etype_id) - inner_eids = F.boolean_mask(part.edata[dgl.EID], inner_edge_mask) - etype_ids, inner_type_eids = gpb.map_to_per_etype(inner_eids) - partid = gpb.eid2partid(inner_type_eids, etype) - assert np.all(F.asnumpy(etype_ids) == etype_id) - assert np.all(F.asnumpy(partid) == gpb.partid) - - orig_id = orig_eids[etype][inner_type_eids] - local_eids = gpb.eid2localeid(inner_type_eids, gpb.partid, etype) - - for name in g.edges[etype].data: - if name in [dgl.EID, "inner_edge"]: - continue - true_feats = F.gather_row(g.edges[etype].data[name], orig_id) - edata = F.gather_row( - edge_feats[_etype_tuple_to_str(etype) + "/" + name], local_eids + orig_id = orig_nids[ntype][inner_type_nids] + local_nids = gpb.nid2localnid(inner_type_nids, gpb.partid, ntype) + + for name in g.nodes[ntype].data: + if name in [dgl.NID, "inner_node"]: + continue + true_feats = F.gather_row(g.nodes[ntype].data[name], orig_id) + ndata = F.gather_row(node_feats[ntype + "/" + name], local_nids) + assert np.all(F.asnumpy(ndata == true_feats)) + + for etype in g.canonical_etypes: + etype_id = g.get_etype_id(etype) + inner_edge_mask = _get_inner_edge_mask( + part, etype_id, use_graphbolt ) - assert np.all(F.asnumpy(edata == true_feats)) + inner_eids = F.boolean_mask( + part.edge_attributes[dgl.EID], inner_edge_mask + ) + etype_ids, inner_type_eids = gpb.map_to_per_etype(inner_eids) + partid = gpb.eid2partid(inner_type_eids, etype) + assert np.all(F.asnumpy(etype_ids) == etype_id) + assert np.all(F.asnumpy(partid) == gpb.partid) + + orig_id = orig_eids[etype][inner_type_eids] + local_eids = gpb.eid2localeid(inner_type_eids, gpb.partid, etype) + + for name in g.edges[etype].data: + if name in [dgl.EID, "inner_edge"]: + continue + true_feats = F.gather_row(g.edges[etype].data[name], orig_id) + edata = F.gather_row( + edge_feats[_etype_tuple_to_str(etype) + "/" + name], + local_eids, + ) + assert np.all(F.asnumpy(edata == true_feats)) + else: + for ntype in g.ntypes: + ntype_id = g.get_ntype_id(ntype) + inner_node_mask = _get_inner_node_mask( + part, ntype_id, use_graphbolt + ) + inner_nids = F.boolean_mask(part.ndata[dgl.NID], inner_node_mask) + ntype_ids, inner_type_nids = gpb.map_to_per_ntype(inner_nids) + partid = gpb.nid2partid(inner_type_nids, ntype) + assert np.all(F.asnumpy(ntype_ids) == ntype_id) + assert np.all(F.asnumpy(partid) == gpb.partid) + + orig_id = orig_nids[ntype][inner_type_nids] + local_nids = gpb.nid2localnid(inner_type_nids, gpb.partid, ntype) + + for name in g.nodes[ntype].data: + if name in [dgl.NID, "inner_node"]: + continue + true_feats = F.gather_row(g.nodes[ntype].data[name], orig_id) + ndata = F.gather_row(node_feats[ntype + "/" + name], local_nids) + assert np.all(F.asnumpy(ndata == true_feats)) + + for etype in g.canonical_etypes: + etype_id = g.get_etype_id(etype) + inner_edge_mask = _get_inner_edge_mask(part, etype_id) + inner_eids = F.boolean_mask(part.edata[dgl.EID], inner_edge_mask) + etype_ids, inner_type_eids = gpb.map_to_per_etype(inner_eids) + partid = gpb.eid2partid(inner_type_eids, etype) + assert np.all(F.asnumpy(etype_ids) == etype_id) + assert np.all(F.asnumpy(partid) == gpb.partid) + + orig_id = orig_eids[etype][inner_type_eids] + local_eids = gpb.eid2localeid(inner_type_eids, gpb.partid, etype) + + for name in g.edges[etype].data: + if name in [dgl.EID, "inner_edge"]: + continue + true_feats = F.gather_row(g.edges[etype].data[name], orig_id) + edata = F.gather_row( + edge_feats[_etype_tuple_to_str(etype) + "/" + name], + local_eids, + ) + assert np.all(F.asnumpy(edata == true_feats)) def check_hetero_partition( @@ -245,7 +361,7 @@ def check_hetero_partition( shuffled_labels = [] shuffled_elabels = [] for i in range(num_parts): - part_g, node_feats, edge_feats, gpb, _, ntypes, etypes = load_partition( + part_g, node_feats, edge_feats, gpb, _, _, _ = load_partition( "/tmp/partition/test.json", i, load_feats=load_feats ) _verify_partition_data_types(part_g) @@ -1075,17 +1191,12 @@ def test_not_sorted_node_edge_map(): @pytest.mark.parametrize("part_method", ["metis", "random"]) @pytest.mark.parametrize("num_parts", [1, 4]) -@pytest.mark.parametrize("store_eids", [True, False]) -@pytest.mark.parametrize("store_inner_node", [True, False]) -@pytest.mark.parametrize("store_inner_edge", [True, False]) @pytest.mark.parametrize("debug_mode", [True, False]) def test_partition_graph_graphbolt_homo( part_method, num_parts, - store_eids, - store_inner_node, - store_inner_edge, debug_mode, + num_trainers_per_machine=1, ): reset_envs() if debug_mode: @@ -1093,148 +1204,369 @@ def test_partition_graph_graphbolt_homo( with tempfile.TemporaryDirectory() as test_dir: g = create_random_graph(1000) graph_name = "test" - partition_graph( + g.ndata["labels"] = F.arange(0, g.num_nodes()) + g.ndata["feats"] = F.tensor( + np.random.randn(g.num_nodes(), 10), F.float32 + ) + g.edata["feats"] = F.tensor( + np.random.randn(g.num_edges(), 10), F.float32 + ) + g.update_all(fn.copy_u("feats", "msg"), fn.sum("msg", "h")) + g.update_all(fn.copy_e("feats", "msg"), fn.sum("msg", "eh")) + + orig_nids, orig_eids = partition_graph( g, graph_name, num_parts, test_dir, part_method=part_method, use_graphbolt=True, - store_eids=store_eids, - store_inner_node=store_inner_node, - store_inner_edge=store_inner_edge, + store_eids=True, + store_inner_node=True, + store_inner_edge=True, + return_mapping=True, ) + part_sizes = [] + shuffled_labels = [] + shuffled_edata = [] part_config = os.path.join(test_dir, f"{graph_name}.json") - for part_id in range(num_parts): - orig_g = dgl.load_graphs( - os.path.join(test_dir, f"part{part_id}/graph.dgl") - )[0][0] - new_g = load_partition( - part_config, part_id, load_feats=False, use_graphbolt=True - )[0] - orig_indptr, orig_indices, orig_eids = orig_g.adj().csc() - assert th.equal(orig_indptr, new_g.csc_indptr) - assert th.equal(orig_indices, new_g.indices) - assert new_g.node_type_offset is None - assert th.equal( - orig_g.ndata[dgl.NID], new_g.node_attributes[dgl.NID] + for i in range(num_parts): + part_g, node_feats, edge_feats, gpb, _, _, _ = load_partition( + part_config, i, load_feats=True, use_graphbolt=True ) - if store_inner_node or debug_mode: - assert th.equal( - orig_g.ndata["inner_node"], - new_g.node_attributes["inner_node"], - ) - else: - assert "inner_node" not in new_g.node_attributes - if store_eids or debug_mode: - assert th.equal( - orig_g.edata[dgl.EID][orig_eids], - new_g.edge_attributes[dgl.EID], - ) - else: - assert dgl.EID not in new_g.edge_attributes - if store_inner_edge or debug_mode: - assert th.equal( - orig_g.edata["inner_edge"][orig_eids], - new_g.edge_attributes["inner_edge"], - ) - else: - assert "inner_edge" not in new_g.edge_attributes - assert new_g.type_per_edge is None - assert new_g.node_type_to_id is None - assert new_g.edge_type_to_id is None + if num_trainers_per_machine > 1: + for ntype in g.ntypes: + name = ntype + "/trainer_id" + assert name in node_feats + part_ids = F.floor_div( + node_feats[name], num_trainers_per_machine + ) + assert np.all(F.asnumpy(part_ids) == i) + + for etype in g.canonical_etypes: + name = _etype_tuple_to_str(etype) + "/trainer_id" + assert name in edge_feats + part_ids = F.floor_div( + edge_feats[name], num_trainers_per_machine + ) + assert np.all(F.asnumpy(part_ids) == i) + + # Check the metadata + assert gpb._num_nodes() == g.num_nodes() + assert gpb._num_edges() == g.num_edges() + + assert gpb.num_partitions() == num_parts + gpb_meta = gpb.metadata() + assert len(gpb_meta) == num_parts + assert len(gpb.partid2nids(i)) == gpb_meta[i]["num_nodes"] + assert len(gpb.partid2eids(i)) == gpb_meta[i]["num_edges"] + part_sizes.append( + (gpb_meta[i]["num_nodes"], gpb_meta[i]["num_edges"]) + ) + + nid = F.boolean_mask( + part_g.node_attributes[dgl.NID], + part_g.node_attributes["inner_node"], + ) + local_nid = gpb.nid2localnid(nid, i) + assert F.dtype(local_nid) in (F.int64, F.int32) + assert np.all(F.asnumpy(local_nid) == np.arange(0, len(local_nid))) + eid = F.boolean_mask( + part_g.edge_attributes[dgl.EID], + part_g.edge_attributes["inner_edge"], + ) + local_eid = gpb.eid2localeid(eid, i) + assert F.dtype(local_eid) in (F.int64, F.int32) + assert np.all( + np.sort(F.asnumpy(local_eid)) == np.arange(0, len(local_eid)) + ) + + # Check the node map. + local_nodes = F.boolean_mask( + part_g.node_attributes[dgl.NID], + part_g.node_attributes["inner_node"], + ) + llocal_nodes = F.nonzero_1d(part_g.node_attributes["inner_node"]) + local_nodes1 = gpb.partid2nids(i) + assert F.dtype(local_nodes1) in (F.int32, F.int64) + assert np.all( + np.sort(F.asnumpy(local_nodes)) + == np.sort(F.asnumpy(local_nodes1)) + ) + assert np.all( + F.asnumpy(llocal_nodes) == np.arange(len(llocal_nodes)) + ) + + # Check the edge map. + local_edges = F.boolean_mask( + part_g.edge_attributes[dgl.EID], + part_g.edge_attributes["inner_edge"], + ) + llocal_edges = F.nonzero_1d(part_g.edge_attributes["inner_edge"]) + local_edges1 = gpb.partid2eids(i) + assert F.dtype(local_edges1) in (F.int32, F.int64) + assert np.all( + np.sort(F.asnumpy(local_edges)) + == np.sort(F.asnumpy(local_edges1)) + ) + assert np.all( + F.asnumpy(llocal_edges) == np.arange(len(llocal_edges)) + ) + + # Verify the mapping between the reshuffled IDs and the original IDs. + indices, indptr = part_g.indices, part_g.csc_indptr + adj_matrix = dglsp.from_csc(indptr, indices) + part_src_ids, part_dst_ids = adj_matrix.coo() + part_src_ids = F.gather_row( + part_g.node_attributes[dgl.NID], part_src_ids + ) + part_dst_ids = F.gather_row( + part_g.node_attributes[dgl.NID], part_dst_ids + ) + part_eids = part_g.edge_attributes[dgl.EID] + orig_src_ids = F.gather_row(orig_nids, part_src_ids) + orig_dst_ids = F.gather_row(orig_nids, part_dst_ids) + orig_eids1 = F.gather_row(orig_eids, part_eids) + orig_eids2 = g.edge_ids(orig_src_ids, orig_dst_ids) + assert F.shape(orig_eids1)[0] == F.shape(orig_eids2)[0] + assert np.all(F.asnumpy(orig_eids1) == F.asnumpy(orig_eids2)) + + local_orig_nids = orig_nids[part_g.node_attributes[dgl.NID]] + local_orig_eids = orig_eids[part_g.edge_attributes[dgl.EID]] + part_g.node_attributes["feats"] = F.gather_row( + g.ndata["feats"], local_orig_nids + ) + part_g.edge_attributes["feats"] = F.gather_row( + g.edata["feats"], local_orig_eids + ) + local_nodes = orig_nids[local_nodes] + local_edges = orig_eids[local_edges] + + # part_g.update_all(fn.copy_u("feats", "msg"), fn.sum("msg", "h")) + # part_g.update_all(fn.copy_e("feats", "msg"), fn.sum("msg", "eh")) + # part_g.node_attributes["h"] = adj_matrix@part_g.node_attributes["h"] + + # assert F.allclose( + # F.gather_row(g.ndata["h"], local_nodes), + # F.gather_row(part_g.node_attributes["h"], llocal_nodes), + # ) + # assert F.allclose( + # F.gather_row(g.ndata["eh"], local_nodes), + # F.gather_row(part_g.node_attributes["eh"], llocal_nodes), + # ) + + for name in ["labels", "feats"]: + assert "_N/" + name in node_feats + assert node_feats["_N/" + name].shape[0] == len(local_nodes) + true_feats = F.gather_row(g.ndata[name], local_nodes) + ndata = F.gather_row(node_feats["_N/" + name], local_nid) + assert np.all(F.asnumpy(true_feats) == F.asnumpy(ndata)) + for name in ["feats"]: + efeat_name = _etype_tuple_to_str(DEFAULT_ETYPE) + "/" + name + assert efeat_name in edge_feats + assert edge_feats[efeat_name].shape[0] == len(local_edges) + true_feats = F.gather_row(g.edata[name], local_edges) + edata = F.gather_row(edge_feats[efeat_name], local_eid) + assert np.all(F.asnumpy(true_feats) == F.asnumpy(edata)) + + # This only works if node/edge IDs are shuffled. + shuffled_labels.append(node_feats["_N/labels"]) + shuffled_edata.append(edge_feats["_N:_E:_N/feats"]) + + # Verify that we can reconstruct node/edge data for original IDs. + shuffled_labels = F.asnumpy(F.cat(shuffled_labels, 0)) + shuffled_edata = F.asnumpy(F.cat(shuffled_edata, 0)) + orig_labels = np.zeros( + shuffled_labels.shape, dtype=shuffled_labels.dtype + ) + orig_edata = np.zeros(shuffled_edata.shape, dtype=shuffled_edata.dtype) + orig_labels[F.asnumpy(orig_nids)] = shuffled_labels + orig_edata[F.asnumpy(orig_eids)] = shuffled_edata + assert np.all(orig_labels == F.asnumpy(g.ndata["labels"])) + assert np.all(orig_edata == F.asnumpy(g.edata["feats"])) + + node_map = [] + edge_map = [] + for i, (num_nodes, num_edges) in enumerate(part_sizes): + node_map.append(np.ones(num_nodes) * i) + edge_map.append(np.ones(num_edges) * i) + node_map = np.concatenate(node_map) + edge_map = np.concatenate(edge_map) + nid2pid = gpb.nid2partid(F.arange(0, len(node_map))) + assert F.dtype(nid2pid) in (F.int32, F.int64) + assert np.all(F.asnumpy(nid2pid) == node_map) + eid2pid = gpb.eid2partid(F.arange(0, len(edge_map))) + assert F.dtype(eid2pid) in (F.int32, F.int64) + assert np.all(F.asnumpy(eid2pid) == edge_map) @pytest.mark.parametrize("part_method", ["metis", "random"]) @pytest.mark.parametrize("num_parts", [1, 4]) -@pytest.mark.parametrize("store_eids", [True, False]) -@pytest.mark.parametrize("store_inner_node", [True, False]) -@pytest.mark.parametrize("store_inner_edge", [True, False]) @pytest.mark.parametrize("debug_mode", [True, False]) def test_partition_graph_graphbolt_hetero( part_method, num_parts, - store_eids, - store_inner_node, - store_inner_edge, debug_mode, n_jobs=1, + num_trainers_per_machine=1, ): + test_ntype = "n1" + test_etype = ("n1", "r1", "n2") reset_envs() if debug_mode: os.environ["DGL_DIST_DEBUG"] = "1" with tempfile.TemporaryDirectory() as test_dir: - g = create_random_hetero() + hg = create_random_hetero() graph_name = "test" - partition_graph( - g, + hg.nodes[test_ntype].data["labels"] = F.arange( + 0, hg.num_nodes(test_ntype) + ) + hg.nodes[test_ntype].data["feats"] = F.tensor( + np.random.randn(hg.num_nodes(test_ntype), 10), F.float32 + ) + hg.edges[test_etype].data["feats"] = F.tensor( + np.random.randn(hg.num_edges(test_etype), 10), F.float32 + ) + hg.edges[test_etype].data["labels"] = F.arange( + 0, hg.num_edges(test_etype) + ) + num_hops = 1 + orig_nids, orig_eids = partition_graph( + hg, graph_name, num_parts, test_dir, part_method=part_method, + return_mapping=True, + num_trainers_per_machine=1, use_graphbolt=True, - store_eids=store_eids, - store_inner_node=store_inner_node, - store_inner_edge=store_inner_edge, + store_eids=True, + store_inner_node=True, + store_inner_edge=True, n_jobs=n_jobs, ) + assert len(orig_nids) == len(hg.ntypes) + assert len(orig_eids) == len(hg.canonical_etypes) + for ntype in hg.ntypes: + assert len(orig_nids[ntype]) == hg.num_nodes(ntype) + for etype in hg.canonical_etypes: + assert len(orig_eids[etype]) == hg.num_edges(etype) + parts = [] + shuffled_labels = [] + shuffled_elabels = [] part_config = os.path.join(test_dir, f"{graph_name}.json") for part_id in range(num_parts): - orig_g = dgl.load_graphs( - os.path.join(test_dir, f"part{part_id}/graph.dgl") - )[0][0] - new_g = load_partition( - part_config, part_id, load_feats=False, use_graphbolt=True - )[0] - orig_indptr, orig_indices, orig_eids = orig_g.adj().csc() - assert th.equal(orig_indptr, new_g.csc_indptr) - assert th.equal(orig_indices, new_g.indices) - assert th.equal( - orig_g.ndata[dgl.NID], new_g.node_attributes[dgl.NID] + part_g, node_feats, edge_feats, gpb, _, _, _ = load_partition( + part_config, part_id, load_feats=True, use_graphbolt=True ) - if store_inner_node or debug_mode: - assert th.equal( - orig_g.ndata["inner_node"], - new_g.node_attributes["inner_node"], + if num_trainers_per_machine > 1: + for ntype in hg.ntypes: + name = ntype + "/trainer_id" + assert name in node_feats + part_ids = F.floor_div( + node_feats[name], num_trainers_per_machine + ) + assert np.all(F.asnumpy(part_ids) == part_id) + + for etype in hg.canonical_etypes: + name = _etype_tuple_to_str(etype) + "/trainer_id" + assert name in edge_feats + part_ids = F.floor_div( + edge_feats[name], num_trainers_per_machine + ) + assert np.all(F.asnumpy(part_ids) == part_id) + + # Verify the mapping between the reshuffled IDs and the original IDs. + # These are partition-local IDs. + indices, indptr = part_g.indices, part_g.csc_indptr + csc_matrix = dglsp.from_csc(indptr, indices) + part_src_ids, part_dst_ids = csc_matrix.coo() + # These are reshuffled global homogeneous IDs. + part_src_ids = F.gather_row( + part_g.node_attributes[dgl.NID], part_src_ids + ) + part_dst_ids = F.gather_row( + part_g.node_attributes[dgl.NID], part_dst_ids + ) + part_eids = part_g.edge_attributes[dgl.EID] + # These are reshuffled per-type IDs. + src_ntype_ids, part_src_ids = gpb.map_to_per_ntype(part_src_ids) + dst_ntype_ids, part_dst_ids = gpb.map_to_per_ntype(part_dst_ids) + etype_ids, part_eids = gpb.map_to_per_etype(part_eids) + # `IdMap` is in int64 by default. + assert src_ntype_ids.dtype == F.int64 + assert dst_ntype_ids.dtype == F.int64 + assert etype_ids.dtype == F.int64 + with pytest.raises(dgl.utils.internal.InconsistentDtypeException): + gpb.map_to_per_ntype(F.tensor([0], F.int32)) + with pytest.raises(dgl.utils.internal.InconsistentDtypeException): + gpb.map_to_per_etype(F.tensor([0], F.int32)) + # These are original per-type IDs. + for etype_id, etype in enumerate(hg.canonical_etypes): + part_src_ids1 = F.boolean_mask( + part_src_ids, etype_ids == etype_id ) - else: - assert "inner_node" not in new_g.node_attributes - if debug_mode: - assert th.equal( - orig_g.ndata[dgl.NTYPE], new_g.node_attributes[dgl.NTYPE] + src_ntype_ids1 = F.boolean_mask( + src_ntype_ids, etype_ids == etype_id ) - else: - assert dgl.NTYPE not in new_g.node_attributes - if store_eids or debug_mode: - assert th.equal( - orig_g.edata[dgl.EID][orig_eids], - new_g.edge_attributes[dgl.EID], + part_dst_ids1 = F.boolean_mask( + part_dst_ids, etype_ids == etype_id ) - else: - assert dgl.EID not in new_g.edge_attributes - if store_inner_edge or debug_mode: - assert th.equal( - orig_g.edata["inner_edge"], - new_g.edge_attributes["inner_edge"], + dst_ntype_ids1 = F.boolean_mask( + dst_ntype_ids, etype_ids == etype_id ) - else: - assert "inner_edge" not in new_g.edge_attributes - if debug_mode: - assert th.equal( - orig_g.edata[dgl.ETYPE][orig_eids], - new_g.edge_attributes[dgl.ETYPE], + part_eids1 = F.boolean_mask(part_eids, etype_ids == etype_id) + assert np.all(F.asnumpy(src_ntype_ids1 == src_ntype_ids1[0])) + assert np.all(F.asnumpy(dst_ntype_ids1 == dst_ntype_ids1[0])) + src_ntype = hg.ntypes[F.as_scalar(src_ntype_ids1[0])] + dst_ntype = hg.ntypes[F.as_scalar(dst_ntype_ids1[0])] + orig_src_ids1 = F.gather_row( + orig_nids[src_ntype], part_src_ids1 ) - else: - assert dgl.ETYPE not in new_g.edge_attributes - assert th.equal( - orig_g.edata[dgl.ETYPE][orig_eids], new_g.type_per_edge + orig_dst_ids1 = F.gather_row( + orig_nids[dst_ntype], part_dst_ids1 + ) + orig_eids1 = F.gather_row(orig_eids[etype], part_eids1) + orig_eids2 = hg.edge_ids( + orig_src_ids1, orig_dst_ids1, etype=etype + ) + assert len(orig_eids1) == len(orig_eids2) + assert np.all(F.asnumpy(orig_eids1) == F.asnumpy(orig_eids2)) + parts.append(part_g) + if NTYPE in part_g.node_attributes: + verify_graph_feats( + hg, + gpb, + part_g, + node_feats, + edge_feats, + orig_nids, + orig_eids, + use_graphbolt=True, + ) + + shuffled_labels.append(node_feats[test_ntype + "/labels"]) + shuffled_elabels.append( + edge_feats[_etype_tuple_to_str(test_etype) + "/labels"] ) + verify_hetero_graph(hg, parts, True) - for node_type, type_id in new_g.node_type_to_id.items(): - assert g.get_ntype_id(node_type) == type_id - for edge_type, type_id in new_g.edge_type_to_id.items(): - assert g.get_etype_id(_etype_str_to_tuple(edge_type)) == type_id - assert new_g.node_type_offset is None + shuffled_labels = F.asnumpy(F.cat(shuffled_labels, 0)) + shuffled_elabels = F.asnumpy(F.cat(shuffled_elabels, 0)) + orig_labels = np.zeros( + shuffled_labels.shape, dtype=shuffled_labels.dtype + ) + orig_elabels = np.zeros( + shuffled_elabels.shape, dtype=shuffled_elabels.dtype + ) + orig_labels[F.asnumpy(orig_nids[test_ntype])] = shuffled_labels + orig_elabels[F.asnumpy(orig_eids[test_etype])] = shuffled_elabels + assert np.all( + orig_labels == F.asnumpy(hg.nodes[test_ntype].data["labels"]) + ) + assert np.all( + orig_elabels == F.asnumpy(hg.edges[test_etype].data["labels"]) + ) @pytest.mark.parametrize("part_method", ["metis", "random"]) @@ -1461,9 +1793,6 @@ def test_partition_graph_graphbolt_hetero_multi( part_method="random", num_parts=num_parts, n_jobs=4, - store_eids=True, - store_inner_node=True, - store_inner_edge=True, debug_mode=False, ) From bec2af3f84d468a9ffb2603710adf3f4405f393e Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Tue, 20 Aug 2024 09:45:50 +0000 Subject: [PATCH 03/39] change pr --- python/dgl/distributed/partition.py | 95 ++--- tests/distributed/test_partition.py | 641 +++++++++++++++++++++------- 2 files changed, 522 insertions(+), 214 deletions(-) diff --git a/python/dgl/distributed/partition.py b/python/dgl/distributed/partition.py index fd0fcae9d9c2..ab5cf670d743 100644 --- a/python/dgl/distributed/partition.py +++ b/python/dgl/distributed/partition.py @@ -1105,7 +1105,7 @@ def get_homogeneous(g, balance_ntypes): inner_node_mask = _get_inner_node_mask(parts[i], ntype_id) val.append( F.as_scalar(F.sum(F.astype(inner_node_mask, F.int64), 0)) - )#note inner_node_mask(tensor[n,bool])->tensor[n,int64]->sum->scalar, compute the num of one partition + ) inner_nids = F.boolean_mask( parts[i].ndata[NID], inner_node_mask ) @@ -1115,7 +1115,7 @@ def get_homogeneous(g, balance_ntypes): int(F.as_scalar(inner_nids[-1])) + 1, ] ) - val = np.cumsum(val).tolist()# note computing the cumulative sum of array elements. + val = np.cumsum(val).tolist() assert val[-1] == g.num_nodes(ntype) for etype in g.canonical_etypes: etype_id = g.get_etype_id(etype) @@ -1135,7 +1135,7 @@ def get_homogeneous(g, balance_ntypes): [int(inner_eids[0]), int(inner_eids[-1]) + 1] ) val = np.cumsum(val).tolist() - assert val[-1] == g.num_edges(etype)# note assure the tot graph can be used + assert val[-1] == g.num_edges(etype) else: node_map_val = {} edge_map_val = {} @@ -1305,52 +1305,32 @@ def get_homogeneous(g, balance_ntypes): part_dir = os.path.join(out_path, "part" + str(part_id)) node_feat_file = os.path.join(part_dir, "node_feat.dgl") edge_feat_file = os.path.join(part_dir, "edge_feat.dgl") - - os.makedirs(part_dir, mode=0o775, exist_ok=True) - save_tensors(node_feat_file, node_feats) - save_tensors(edge_feat_file, edge_feats) - - #save - if use_graphbolt: - part_metadata["part-{}".format(part_id)] = { - "node_feats": os.path.relpath(node_feat_file, out_path), - "edge_feats": os.path.relpath(edge_feat_file, out_path), - } - else: - part_graph_file = os.path.join(part_dir, "graph.dgl") - - part_metadata["part-{}".format(part_id)] = { + part_graph_file = os.path.join(part_dir, "graph.dgl") + part_metadata["part-{}".format(part_id)] = { "node_feats": os.path.relpath(node_feat_file, out_path), "edge_feats": os.path.relpath(edge_feat_file, out_path), "part_graph": os.path.relpath(part_graph_file, out_path), } - sort_etypes = len(g.etypes) > 1 - _save_graphs( - part_graph_file, - [part], - formats=graph_formats, - sort_etypes=sort_etypes, - ) - - - part_config = os.path.join(out_path, graph_name + ".json") - if use_graphbolt: - kwargs["graph_formats"] = graph_formats - dgl_partition_to_graphbolt( - part_config, - parts=parts, - part_meta=part_metadata, - **kwargs, + os.makedirs(part_dir, mode=0o775, exist_ok=True) + save_tensors(node_feat_file, node_feats) + save_tensors(edge_feat_file, edge_feats) + + sort_etypes = len(g.etypes) > 1 + _save_graphs( + part_graph_file, + [part], + formats=graph_formats, + sort_etypes=sort_etypes, ) - else: - _dump_part_config(part_config, part_metadata) - print( "Save partitions: {:.3f} seconds, peak memory: {:.3f} GB".format( time.time() - start, get_peak_mem() ) ) + part_config = os.path.join(out_path, graph_name + ".json") + _dump_part_config(part_config, part_metadata) + num_cuts = sim_g.num_edges() - tot_num_inner_edges if num_parts == 1: num_cuts = 0 @@ -1360,6 +1340,13 @@ def get_homogeneous(g, balance_ntypes): ) ) + if use_graphbolt: + kwargs["graph_formats"] = graph_formats + dgl_partition_to_graphbolt( + part_config, + **kwargs, + ) + if return_mapping: return orig_nids, orig_eids @@ -1405,9 +1392,9 @@ def init_type_per_edge(graph, gpb): etype_ids = gpb.map_to_per_etype(graph.edata[EID])[0] return etype_ids -def gb_convert_single_dgl_partition(# TODO change this + +def gb_convert_single_dgl_partition( part_id, - parts, graph_formats, part_config, store_eids, @@ -1440,18 +1427,14 @@ def gb_convert_single_dgl_partition(# TODO change this "Running in debug mode which means all attributes of DGL partitions" " will be saved to the new format." ) - + part_meta = _load_part_config(part_config) num_parts = part_meta["num_parts"] - if parts!=None: - assert len(parts)==num_parts - graph=parts[part_id] - else: - graph, _, _, gpb, _, _, _ = load_partition( - part_config, part_id, load_feats=False - ) - gpb, _, ntypes, etypes = load_partition_book(part_config, part_id) + graph, _, _, gpb, _, _, _ = load_partition( + part_config, part_id, load_feats=False + ) + _, _, ntypes, etypes = load_partition_book(part_config, part_id) is_homo = is_homogeneous(ntypes, etypes) node_type_to_id = ( None if is_homo else {ntype: ntid for ntid, ntype in enumerate(ntypes)} @@ -1520,7 +1503,7 @@ def gb_convert_single_dgl_partition(# TODO change this indptr, dtype=indices.dtype ) - # Cast various data to minimum dtype.#note convert to minimun dtype + # Cast various data to minimum dtype. # Cast 1: indptr. indptr = _cast_to_minimum_dtype(graph.num_edges(), indptr) # Cast 2: indices. @@ -1569,6 +1552,7 @@ def gb_convert_single_dgl_partition(# TODO change this return os.path.relpath(csc_graph_path, os.path.dirname(part_config)) # Update graph path. + def dgl_partition_to_graphbolt( part_config, *, @@ -1577,10 +1561,7 @@ def dgl_partition_to_graphbolt( store_inner_edge=False, graph_formats=None, n_jobs=1, - parts=None, - part_meta=None -):# note - +): """Convert partitions of dgl to FusedCSCSamplingGraph of GraphBolt. This API converts `DGLGraph` partitions to `FusedCSCSamplingGraph` which is @@ -1617,8 +1598,7 @@ def dgl_partition_to_graphbolt( "Running in debug mode which means all attributes of DGL partitions" " will be saved to the new format." ) - if part_meta==None: - part_meta = _load_part_config(part_config) + part_meta = _load_part_config(part_config) new_part_meta = copy.deepcopy(part_meta) num_parts = part_meta["num_parts"] @@ -1635,7 +1615,6 @@ def dgl_partition_to_graphbolt( convert_with_format = partial( gb_convert_single_dgl_partition, graph_formats=graph_formats, - parts=parts, part_config=part_config, store_eids=store_eids, store_inner_node=store_inner_node, @@ -1675,4 +1654,4 @@ def dgl_partition_to_graphbolt( new_part_meta["edge_map_dtype"] = "int64" _dump_part_config(part_config, new_part_meta) - print(f"Converted partitions to GraphBolt format into {part_config}") + print(f"Converted partitions to GraphBolt format into {part_config}") \ No newline at end of file diff --git a/tests/distributed/test_partition.py b/tests/distributed/test_partition.py index 5fb121750e01..0f2425cb054d 100644 --- a/tests/distributed/test_partition.py +++ b/tests/distributed/test_partition.py @@ -5,10 +5,12 @@ import dgl import dgl.backend as F +import dgl.sparse as dglsp import numpy as np import pytest import torch as th from dgl import function as fn +from dgl.base import NTYPE from dgl.distributed import ( dgl_partition_to_graphbolt, load_partition, @@ -35,12 +37,19 @@ from utils import reset_envs -def _verify_partition_data_types(part_g): - for k, dtype in RESERVED_FIELD_DTYPE.items(): - if k in part_g.ndata: - assert part_g.ndata[k].dtype == dtype - if k in part_g.edata: - assert part_g.edata[k].dtype == dtype +def _verify_partition_data_types(part_g, use_graphbolt=False): + if not use_graphbolt: + for k, dtype in RESERVED_FIELD_DTYPE.items(): + if k in part_g.ndata: + assert part_g.ndata[k].dtype == dtype + if k in part_g.edata: + assert part_g.edata[k].dtype == dtype + else: + for k, dtype in RESERVED_FIELD_DTYPE.items(): + if k in part_g.node_attributes: + assert part_g.node_attributes[k].dtype == dtype + if k in part_g.edge_attributes: + assert part_g.edge_attributes[k].dtype == dtype def _verify_partition_formats(part_g, formats): @@ -81,11 +90,58 @@ def create_random_hetero(): return dgl.heterograph(edges, num_nodes) -def verify_hetero_graph(g, parts): +def verify_hetero_graph(g, parts, use_graphbolt=False): + if use_graphbolt: + num_nodes = {ntype: 0 for ntype in g.ntypes} + num_edges = {etype: 0 for etype in g.canonical_etypes} + for part in parts: + for etype in g.canonical_etypes: + etype_id = g.get_etype_id(etype) + inner_edge_mask = _get_inner_edge_mask( + part, etype_id, use_graphbolt + ) + num_inner_edges = F.sum(F.astype(inner_edge_mask, F.int64), 0) + num_edges[etype] += num_inner_edges + + # Verify the number of edges are correct. + for etype in g.canonical_etypes: + print( + "edge {}: {}, {}".format( + etype, g.num_edges(etype), num_edges[etype] + ) + ) + assert g.num_edges(etype) == num_edges[etype] + + nids = {ntype: [] for ntype in g.ntypes} + eids = {etype: [] for etype in g.canonical_etypes} + for part in parts: + eid = th.arange(len(part.edge_attributes[dgl.EID])) + etype_arr = F.gather_row(part.type_per_edge, eid) + eid_type = F.gather_row(part.edge_attributes[dgl.EID], eid) + for etype in g.canonical_etypes: + etype_id = g.get_etype_id(etype) + eids[etype].append( + F.boolean_mask(eid_type, etype_arr == etype_id) + ) + # Make sure edge Ids fall into a range. + inner_edge_mask = _get_inner_edge_mask( + part, etype_id, use_graphbolt + ) + inner_eids = np.sort( + F.asnumpy( + F.boolean_mask( + part.edge_attributes[dgl.EID], inner_edge_mask + ) + ) + ) + assert np.all( + inner_eids == np.arange(inner_eids[0], inner_eids[-1] + 1) + ) + return + num_nodes = {ntype: 0 for ntype in g.ntypes} num_edges = {etype: 0 for etype in g.canonical_etypes} for part in parts: - assert len(g.ntypes) == len(F.unique(part.ndata[dgl.NTYPE])) assert len(g.canonical_etypes) == len(F.unique(part.edata[dgl.ETYPE])) for ntype in g.ntypes: ntype_id = g.get_ntype_id(ntype) @@ -161,47 +217,107 @@ def verify_hetero_graph(g, parts): def verify_graph_feats( - g, gpb, part, node_feats, edge_feats, orig_nids, orig_eids + g, + gpb, + part, + node_feats, + edge_feats, + orig_nids, + orig_eids, + use_graphbolt=False, ): - for ntype in g.ntypes: - ntype_id = g.get_ntype_id(ntype) - inner_node_mask = _get_inner_node_mask(part, ntype_id) - inner_nids = F.boolean_mask(part.ndata[dgl.NID], inner_node_mask) - ntype_ids, inner_type_nids = gpb.map_to_per_ntype(inner_nids) - partid = gpb.nid2partid(inner_type_nids, ntype) - assert np.all(F.asnumpy(ntype_ids) == ntype_id) - assert np.all(F.asnumpy(partid) == gpb.partid) - - orig_id = orig_nids[ntype][inner_type_nids] - local_nids = gpb.nid2localnid(inner_type_nids, gpb.partid, ntype) - - for name in g.nodes[ntype].data: - if name in [dgl.NID, "inner_node"]: - continue - true_feats = F.gather_row(g.nodes[ntype].data[name], orig_id) - ndata = F.gather_row(node_feats[ntype + "/" + name], local_nids) - assert np.all(F.asnumpy(ndata == true_feats)) + if use_graphbolt: + for ntype in g.ntypes: + ntype_id = g.get_ntype_id(ntype) + inner_node_mask = _get_inner_node_mask( + part, ntype_id, use_graphbolt + ) + inner_nids = F.boolean_mask( + part.node_attributes[dgl.NID], inner_node_mask + ) + ntype_ids, inner_type_nids = gpb.map_to_per_ntype(inner_nids) + partid = gpb.nid2partid(inner_type_nids, ntype) + assert np.all(F.asnumpy(ntype_ids) == ntype_id) + assert np.all(F.asnumpy(partid) == gpb.partid) - for etype in g.canonical_etypes: - etype_id = g.get_etype_id(etype) - inner_edge_mask = _get_inner_edge_mask(part, etype_id) - inner_eids = F.boolean_mask(part.edata[dgl.EID], inner_edge_mask) - etype_ids, inner_type_eids = gpb.map_to_per_etype(inner_eids) - partid = gpb.eid2partid(inner_type_eids, etype) - assert np.all(F.asnumpy(etype_ids) == etype_id) - assert np.all(F.asnumpy(partid) == gpb.partid) - - orig_id = orig_eids[etype][inner_type_eids] - local_eids = gpb.eid2localeid(inner_type_eids, gpb.partid, etype) - - for name in g.edges[etype].data: - if name in [dgl.EID, "inner_edge"]: - continue - true_feats = F.gather_row(g.edges[etype].data[name], orig_id) - edata = F.gather_row( - edge_feats[_etype_tuple_to_str(etype) + "/" + name], local_eids + orig_id = orig_nids[ntype][inner_type_nids] + local_nids = gpb.nid2localnid(inner_type_nids, gpb.partid, ntype) + + for name in g.nodes[ntype].data: + if name in [dgl.NID, "inner_node"]: + continue + true_feats = F.gather_row(g.nodes[ntype].data[name], orig_id) + ndata = F.gather_row(node_feats[ntype + "/" + name], local_nids) + assert np.all(F.asnumpy(ndata == true_feats)) + + for etype in g.canonical_etypes: + etype_id = g.get_etype_id(etype) + inner_edge_mask = _get_inner_edge_mask( + part, etype_id, use_graphbolt ) - assert np.all(F.asnumpy(edata == true_feats)) + inner_eids = F.boolean_mask( + part.edge_attributes[dgl.EID], inner_edge_mask + ) + etype_ids, inner_type_eids = gpb.map_to_per_etype(inner_eids) + partid = gpb.eid2partid(inner_type_eids, etype) + assert np.all(F.asnumpy(etype_ids) == etype_id) + assert np.all(F.asnumpy(partid) == gpb.partid) + + orig_id = orig_eids[etype][inner_type_eids] + local_eids = gpb.eid2localeid(inner_type_eids, gpb.partid, etype) + + for name in g.edges[etype].data: + if name in [dgl.EID, "inner_edge"]: + continue + true_feats = F.gather_row(g.edges[etype].data[name], orig_id) + edata = F.gather_row( + edge_feats[_etype_tuple_to_str(etype) + "/" + name], + local_eids, + ) + assert np.all(F.asnumpy(edata == true_feats)) + else: + for ntype in g.ntypes: + ntype_id = g.get_ntype_id(ntype) + inner_node_mask = _get_inner_node_mask( + part, ntype_id, use_graphbolt + ) + inner_nids = F.boolean_mask(part.ndata[dgl.NID], inner_node_mask) + ntype_ids, inner_type_nids = gpb.map_to_per_ntype(inner_nids) + partid = gpb.nid2partid(inner_type_nids, ntype) + assert np.all(F.asnumpy(ntype_ids) == ntype_id) + assert np.all(F.asnumpy(partid) == gpb.partid) + + orig_id = orig_nids[ntype][inner_type_nids] + local_nids = gpb.nid2localnid(inner_type_nids, gpb.partid, ntype) + + for name in g.nodes[ntype].data: + if name in [dgl.NID, "inner_node"]: + continue + true_feats = F.gather_row(g.nodes[ntype].data[name], orig_id) + ndata = F.gather_row(node_feats[ntype + "/" + name], local_nids) + assert np.all(F.asnumpy(ndata == true_feats)) + + for etype in g.canonical_etypes: + etype_id = g.get_etype_id(etype) + inner_edge_mask = _get_inner_edge_mask(part, etype_id) + inner_eids = F.boolean_mask(part.edata[dgl.EID], inner_edge_mask) + etype_ids, inner_type_eids = gpb.map_to_per_etype(inner_eids) + partid = gpb.eid2partid(inner_type_eids, etype) + assert np.all(F.asnumpy(etype_ids) == etype_id) + assert np.all(F.asnumpy(partid) == gpb.partid) + + orig_id = orig_eids[etype][inner_type_eids] + local_eids = gpb.eid2localeid(inner_type_eids, gpb.partid, etype) + + for name in g.edges[etype].data: + if name in [dgl.EID, "inner_edge"]: + continue + true_feats = F.gather_row(g.edges[etype].data[name], orig_id) + edata = F.gather_row( + edge_feats[_etype_tuple_to_str(etype) + "/" + name], + local_eids, + ) + assert np.all(F.asnumpy(edata == true_feats)) def check_hetero_partition( @@ -245,7 +361,7 @@ def check_hetero_partition( shuffled_labels = [] shuffled_elabels = [] for i in range(num_parts): - part_g, node_feats, edge_feats, gpb, _, ntypes, etypes = load_partition( + part_g, node_feats, edge_feats, gpb, _, _, _ = load_partition( "/tmp/partition/test.json", i, load_feats=load_feats ) _verify_partition_data_types(part_g) @@ -1075,17 +1191,12 @@ def test_not_sorted_node_edge_map(): @pytest.mark.parametrize("part_method", ["metis", "random"]) @pytest.mark.parametrize("num_parts", [1, 4]) -@pytest.mark.parametrize("store_eids", [True, False]) -@pytest.mark.parametrize("store_inner_node", [True, False]) -@pytest.mark.parametrize("store_inner_edge", [True, False]) @pytest.mark.parametrize("debug_mode", [True, False]) def test_partition_graph_graphbolt_homo( part_method, num_parts, - store_eids, - store_inner_node, - store_inner_edge, debug_mode, + num_trainers_per_machine=1, ): reset_envs() if debug_mode: @@ -1093,148 +1204,369 @@ def test_partition_graph_graphbolt_homo( with tempfile.TemporaryDirectory() as test_dir: g = create_random_graph(1000) graph_name = "test" - partition_graph( + g.ndata["labels"] = F.arange(0, g.num_nodes()) + g.ndata["feats"] = F.tensor( + np.random.randn(g.num_nodes(), 10), F.float32 + ) + g.edata["feats"] = F.tensor( + np.random.randn(g.num_edges(), 10), F.float32 + ) + g.update_all(fn.copy_u("feats", "msg"), fn.sum("msg", "h")) + g.update_all(fn.copy_e("feats", "msg"), fn.sum("msg", "eh")) + + orig_nids, orig_eids = partition_graph( g, graph_name, num_parts, test_dir, part_method=part_method, use_graphbolt=True, - store_eids=store_eids, - store_inner_node=store_inner_node, - store_inner_edge=store_inner_edge, + store_eids=True, + store_inner_node=True, + store_inner_edge=True, + return_mapping=True, ) + part_sizes = [] + shuffled_labels = [] + shuffled_edata = [] part_config = os.path.join(test_dir, f"{graph_name}.json") - for part_id in range(num_parts): - orig_g = dgl.load_graphs( - os.path.join(test_dir, f"part{part_id}/graph.dgl") - )[0][0] - new_g = load_partition( - part_config, part_id, load_feats=False, use_graphbolt=True - )[0] - orig_indptr, orig_indices, orig_eids = orig_g.adj().csc() - assert th.equal(orig_indptr, new_g.csc_indptr) - assert th.equal(orig_indices, new_g.indices) - assert new_g.node_type_offset is None - assert th.equal( - orig_g.ndata[dgl.NID], new_g.node_attributes[dgl.NID] + for i in range(num_parts): + part_g, node_feats, edge_feats, gpb, _, _, _ = load_partition( + part_config, i, load_feats=True, use_graphbolt=True ) - if store_inner_node or debug_mode: - assert th.equal( - orig_g.ndata["inner_node"], - new_g.node_attributes["inner_node"], - ) - else: - assert "inner_node" not in new_g.node_attributes - if store_eids or debug_mode: - assert th.equal( - orig_g.edata[dgl.EID][orig_eids], - new_g.edge_attributes[dgl.EID], - ) - else: - assert dgl.EID not in new_g.edge_attributes - if store_inner_edge or debug_mode: - assert th.equal( - orig_g.edata["inner_edge"][orig_eids], - new_g.edge_attributes["inner_edge"], - ) - else: - assert "inner_edge" not in new_g.edge_attributes - assert new_g.type_per_edge is None - assert new_g.node_type_to_id is None - assert new_g.edge_type_to_id is None + if num_trainers_per_machine > 1: + for ntype in g.ntypes: + name = ntype + "/trainer_id" + assert name in node_feats + part_ids = F.floor_div( + node_feats[name], num_trainers_per_machine + ) + assert np.all(F.asnumpy(part_ids) == i) + + for etype in g.canonical_etypes: + name = _etype_tuple_to_str(etype) + "/trainer_id" + assert name in edge_feats + part_ids = F.floor_div( + edge_feats[name], num_trainers_per_machine + ) + assert np.all(F.asnumpy(part_ids) == i) + + # Check the metadata + assert gpb._num_nodes() == g.num_nodes() + assert gpb._num_edges() == g.num_edges() + + assert gpb.num_partitions() == num_parts + gpb_meta = gpb.metadata() + assert len(gpb_meta) == num_parts + assert len(gpb.partid2nids(i)) == gpb_meta[i]["num_nodes"] + assert len(gpb.partid2eids(i)) == gpb_meta[i]["num_edges"] + part_sizes.append( + (gpb_meta[i]["num_nodes"], gpb_meta[i]["num_edges"]) + ) + + nid = F.boolean_mask( + part_g.node_attributes[dgl.NID], + part_g.node_attributes["inner_node"], + ) + local_nid = gpb.nid2localnid(nid, i) + assert F.dtype(local_nid) in (F.int64, F.int32) + assert np.all(F.asnumpy(local_nid) == np.arange(0, len(local_nid))) + eid = F.boolean_mask( + part_g.edge_attributes[dgl.EID], + part_g.edge_attributes["inner_edge"], + ) + local_eid = gpb.eid2localeid(eid, i) + assert F.dtype(local_eid) in (F.int64, F.int32) + assert np.all( + np.sort(F.asnumpy(local_eid)) == np.arange(0, len(local_eid)) + ) + + # Check the node map. + local_nodes = F.boolean_mask( + part_g.node_attributes[dgl.NID], + part_g.node_attributes["inner_node"], + ) + llocal_nodes = F.nonzero_1d(part_g.node_attributes["inner_node"]) + local_nodes1 = gpb.partid2nids(i) + assert F.dtype(local_nodes1) in (F.int32, F.int64) + assert np.all( + np.sort(F.asnumpy(local_nodes)) + == np.sort(F.asnumpy(local_nodes1)) + ) + assert np.all( + F.asnumpy(llocal_nodes) == np.arange(len(llocal_nodes)) + ) + + # Check the edge map. + local_edges = F.boolean_mask( + part_g.edge_attributes[dgl.EID], + part_g.edge_attributes["inner_edge"], + ) + llocal_edges = F.nonzero_1d(part_g.edge_attributes["inner_edge"]) + local_edges1 = gpb.partid2eids(i) + assert F.dtype(local_edges1) in (F.int32, F.int64) + assert np.all( + np.sort(F.asnumpy(local_edges)) + == np.sort(F.asnumpy(local_edges1)) + ) + assert np.all( + F.asnumpy(llocal_edges) == np.arange(len(llocal_edges)) + ) + + # Verify the mapping between the reshuffled IDs and the original IDs. + indices, indptr = part_g.indices, part_g.csc_indptr + adj_matrix = dglsp.from_csc(indptr, indices) + part_src_ids, part_dst_ids = adj_matrix.coo() + part_src_ids = F.gather_row( + part_g.node_attributes[dgl.NID], part_src_ids + ) + part_dst_ids = F.gather_row( + part_g.node_attributes[dgl.NID], part_dst_ids + ) + part_eids = part_g.edge_attributes[dgl.EID] + orig_src_ids = F.gather_row(orig_nids, part_src_ids) + orig_dst_ids = F.gather_row(orig_nids, part_dst_ids) + orig_eids1 = F.gather_row(orig_eids, part_eids) + orig_eids2 = g.edge_ids(orig_src_ids, orig_dst_ids) + assert F.shape(orig_eids1)[0] == F.shape(orig_eids2)[0] + assert np.all(F.asnumpy(orig_eids1) == F.asnumpy(orig_eids2)) + + local_orig_nids = orig_nids[part_g.node_attributes[dgl.NID]] + local_orig_eids = orig_eids[part_g.edge_attributes[dgl.EID]] + part_g.node_attributes["feats"] = F.gather_row( + g.ndata["feats"], local_orig_nids + ) + part_g.edge_attributes["feats"] = F.gather_row( + g.edata["feats"], local_orig_eids + ) + local_nodes = orig_nids[local_nodes] + local_edges = orig_eids[local_edges] + + # part_g.update_all(fn.copy_u("feats", "msg"), fn.sum("msg", "h")) + # part_g.update_all(fn.copy_e("feats", "msg"), fn.sum("msg", "eh")) + # part_g.node_attributes["h"] = adj_matrix@part_g.node_attributes["h"] + + # assert F.allclose( + # F.gather_row(g.ndata["h"], local_nodes), + # F.gather_row(part_g.node_attributes["h"], llocal_nodes), + # ) + # assert F.allclose( + # F.gather_row(g.ndata["eh"], local_nodes), + # F.gather_row(part_g.node_attributes["eh"], llocal_nodes), + # ) + + for name in ["labels", "feats"]: + assert "_N/" + name in node_feats + assert node_feats["_N/" + name].shape[0] == len(local_nodes) + true_feats = F.gather_row(g.ndata[name], local_nodes) + ndata = F.gather_row(node_feats["_N/" + name], local_nid) + assert np.all(F.asnumpy(true_feats) == F.asnumpy(ndata)) + for name in ["feats"]: + efeat_name = _etype_tuple_to_str(DEFAULT_ETYPE) + "/" + name + assert efeat_name in edge_feats + assert edge_feats[efeat_name].shape[0] == len(local_edges) + true_feats = F.gather_row(g.edata[name], local_edges) + edata = F.gather_row(edge_feats[efeat_name], local_eid) + assert np.all(F.asnumpy(true_feats) == F.asnumpy(edata)) + + # This only works if node/edge IDs are shuffled. + shuffled_labels.append(node_feats["_N/labels"]) + shuffled_edata.append(edge_feats["_N:_E:_N/feats"]) + + # Verify that we can reconstruct node/edge data for original IDs. + shuffled_labels = F.asnumpy(F.cat(shuffled_labels, 0)) + shuffled_edata = F.asnumpy(F.cat(shuffled_edata, 0)) + orig_labels = np.zeros( + shuffled_labels.shape, dtype=shuffled_labels.dtype + ) + orig_edata = np.zeros(shuffled_edata.shape, dtype=shuffled_edata.dtype) + orig_labels[F.asnumpy(orig_nids)] = shuffled_labels + orig_edata[F.asnumpy(orig_eids)] = shuffled_edata + assert np.all(orig_labels == F.asnumpy(g.ndata["labels"])) + assert np.all(orig_edata == F.asnumpy(g.edata["feats"])) + + node_map = [] + edge_map = [] + for i, (num_nodes, num_edges) in enumerate(part_sizes): + node_map.append(np.ones(num_nodes) * i) + edge_map.append(np.ones(num_edges) * i) + node_map = np.concatenate(node_map) + edge_map = np.concatenate(edge_map) + nid2pid = gpb.nid2partid(F.arange(0, len(node_map))) + assert F.dtype(nid2pid) in (F.int32, F.int64) + assert np.all(F.asnumpy(nid2pid) == node_map) + eid2pid = gpb.eid2partid(F.arange(0, len(edge_map))) + assert F.dtype(eid2pid) in (F.int32, F.int64) + assert np.all(F.asnumpy(eid2pid) == edge_map) @pytest.mark.parametrize("part_method", ["metis", "random"]) @pytest.mark.parametrize("num_parts", [1, 4]) -@pytest.mark.parametrize("store_eids", [True, False]) -@pytest.mark.parametrize("store_inner_node", [True, False]) -@pytest.mark.parametrize("store_inner_edge", [True, False]) @pytest.mark.parametrize("debug_mode", [True, False]) def test_partition_graph_graphbolt_hetero( part_method, num_parts, - store_eids, - store_inner_node, - store_inner_edge, debug_mode, n_jobs=1, + num_trainers_per_machine=1, ): + test_ntype = "n1" + test_etype = ("n1", "r1", "n2") reset_envs() if debug_mode: os.environ["DGL_DIST_DEBUG"] = "1" with tempfile.TemporaryDirectory() as test_dir: - g = create_random_hetero() + hg = create_random_hetero() graph_name = "test" - partition_graph( - g, + hg.nodes[test_ntype].data["labels"] = F.arange( + 0, hg.num_nodes(test_ntype) + ) + hg.nodes[test_ntype].data["feats"] = F.tensor( + np.random.randn(hg.num_nodes(test_ntype), 10), F.float32 + ) + hg.edges[test_etype].data["feats"] = F.tensor( + np.random.randn(hg.num_edges(test_etype), 10), F.float32 + ) + hg.edges[test_etype].data["labels"] = F.arange( + 0, hg.num_edges(test_etype) + ) + num_hops = 1 + orig_nids, orig_eids = partition_graph( + hg, graph_name, num_parts, test_dir, part_method=part_method, + return_mapping=True, + num_trainers_per_machine=1, use_graphbolt=True, - store_eids=store_eids, - store_inner_node=store_inner_node, - store_inner_edge=store_inner_edge, + store_eids=True, + store_inner_node=True, + store_inner_edge=True, n_jobs=n_jobs, ) + assert len(orig_nids) == len(hg.ntypes) + assert len(orig_eids) == len(hg.canonical_etypes) + for ntype in hg.ntypes: + assert len(orig_nids[ntype]) == hg.num_nodes(ntype) + for etype in hg.canonical_etypes: + assert len(orig_eids[etype]) == hg.num_edges(etype) + parts = [] + shuffled_labels = [] + shuffled_elabels = [] part_config = os.path.join(test_dir, f"{graph_name}.json") for part_id in range(num_parts): - orig_g = dgl.load_graphs( - os.path.join(test_dir, f"part{part_id}/graph.dgl") - )[0][0] - new_g = load_partition( - part_config, part_id, load_feats=False, use_graphbolt=True - )[0] - orig_indptr, orig_indices, orig_eids = orig_g.adj().csc() - assert th.equal(orig_indptr, new_g.csc_indptr) - assert th.equal(orig_indices, new_g.indices) - assert th.equal( - orig_g.ndata[dgl.NID], new_g.node_attributes[dgl.NID] + part_g, node_feats, edge_feats, gpb, _, _, _ = load_partition( + part_config, part_id, load_feats=True, use_graphbolt=True ) - if store_inner_node or debug_mode: - assert th.equal( - orig_g.ndata["inner_node"], - new_g.node_attributes["inner_node"], + if num_trainers_per_machine > 1: + for ntype in hg.ntypes: + name = ntype + "/trainer_id" + assert name in node_feats + part_ids = F.floor_div( + node_feats[name], num_trainers_per_machine + ) + assert np.all(F.asnumpy(part_ids) == part_id) + + for etype in hg.canonical_etypes: + name = _etype_tuple_to_str(etype) + "/trainer_id" + assert name in edge_feats + part_ids = F.floor_div( + edge_feats[name], num_trainers_per_machine + ) + assert np.all(F.asnumpy(part_ids) == part_id) + + # Verify the mapping between the reshuffled IDs and the original IDs. + # These are partition-local IDs. + indices, indptr = part_g.indices, part_g.csc_indptr + csc_matrix = dglsp.from_csc(indptr, indices) + part_src_ids, part_dst_ids = csc_matrix.coo() + # These are reshuffled global homogeneous IDs. + part_src_ids = F.gather_row( + part_g.node_attributes[dgl.NID], part_src_ids + ) + part_dst_ids = F.gather_row( + part_g.node_attributes[dgl.NID], part_dst_ids + ) + part_eids = part_g.edge_attributes[dgl.EID] + # These are reshuffled per-type IDs. + src_ntype_ids, part_src_ids = gpb.map_to_per_ntype(part_src_ids) + dst_ntype_ids, part_dst_ids = gpb.map_to_per_ntype(part_dst_ids) + etype_ids, part_eids = gpb.map_to_per_etype(part_eids) + # `IdMap` is in int64 by default. + assert src_ntype_ids.dtype == F.int64 + assert dst_ntype_ids.dtype == F.int64 + assert etype_ids.dtype == F.int64 + with pytest.raises(dgl.utils.internal.InconsistentDtypeException): + gpb.map_to_per_ntype(F.tensor([0], F.int32)) + with pytest.raises(dgl.utils.internal.InconsistentDtypeException): + gpb.map_to_per_etype(F.tensor([0], F.int32)) + # These are original per-type IDs. + for etype_id, etype in enumerate(hg.canonical_etypes): + part_src_ids1 = F.boolean_mask( + part_src_ids, etype_ids == etype_id ) - else: - assert "inner_node" not in new_g.node_attributes - if debug_mode: - assert th.equal( - orig_g.ndata[dgl.NTYPE], new_g.node_attributes[dgl.NTYPE] + src_ntype_ids1 = F.boolean_mask( + src_ntype_ids, etype_ids == etype_id ) - else: - assert dgl.NTYPE not in new_g.node_attributes - if store_eids or debug_mode: - assert th.equal( - orig_g.edata[dgl.EID][orig_eids], - new_g.edge_attributes[dgl.EID], + part_dst_ids1 = F.boolean_mask( + part_dst_ids, etype_ids == etype_id ) - else: - assert dgl.EID not in new_g.edge_attributes - if store_inner_edge or debug_mode: - assert th.equal( - orig_g.edata["inner_edge"], - new_g.edge_attributes["inner_edge"], + dst_ntype_ids1 = F.boolean_mask( + dst_ntype_ids, etype_ids == etype_id ) - else: - assert "inner_edge" not in new_g.edge_attributes - if debug_mode: - assert th.equal( - orig_g.edata[dgl.ETYPE][orig_eids], - new_g.edge_attributes[dgl.ETYPE], + part_eids1 = F.boolean_mask(part_eids, etype_ids == etype_id) + assert np.all(F.asnumpy(src_ntype_ids1 == src_ntype_ids1[0])) + assert np.all(F.asnumpy(dst_ntype_ids1 == dst_ntype_ids1[0])) + src_ntype = hg.ntypes[F.as_scalar(src_ntype_ids1[0])] + dst_ntype = hg.ntypes[F.as_scalar(dst_ntype_ids1[0])] + orig_src_ids1 = F.gather_row( + orig_nids[src_ntype], part_src_ids1 ) - else: - assert dgl.ETYPE not in new_g.edge_attributes - assert th.equal( - orig_g.edata[dgl.ETYPE][orig_eids], new_g.type_per_edge + orig_dst_ids1 = F.gather_row( + orig_nids[dst_ntype], part_dst_ids1 + ) + orig_eids1 = F.gather_row(orig_eids[etype], part_eids1) + orig_eids2 = hg.edge_ids( + orig_src_ids1, orig_dst_ids1, etype=etype + ) + assert len(orig_eids1) == len(orig_eids2) + assert np.all(F.asnumpy(orig_eids1) == F.asnumpy(orig_eids2)) + parts.append(part_g) + if NTYPE in part_g.node_attributes: + verify_graph_feats( + hg, + gpb, + part_g, + node_feats, + edge_feats, + orig_nids, + orig_eids, + use_graphbolt=True, + ) + + shuffled_labels.append(node_feats[test_ntype + "/labels"]) + shuffled_elabels.append( + edge_feats[_etype_tuple_to_str(test_etype) + "/labels"] ) + verify_hetero_graph(hg, parts, True) - for node_type, type_id in new_g.node_type_to_id.items(): - assert g.get_ntype_id(node_type) == type_id - for edge_type, type_id in new_g.edge_type_to_id.items(): - assert g.get_etype_id(_etype_str_to_tuple(edge_type)) == type_id - assert new_g.node_type_offset is None + shuffled_labels = F.asnumpy(F.cat(shuffled_labels, 0)) + shuffled_elabels = F.asnumpy(F.cat(shuffled_elabels, 0)) + orig_labels = np.zeros( + shuffled_labels.shape, dtype=shuffled_labels.dtype + ) + orig_elabels = np.zeros( + shuffled_elabels.shape, dtype=shuffled_elabels.dtype + ) + orig_labels[F.asnumpy(orig_nids[test_ntype])] = shuffled_labels + orig_elabels[F.asnumpy(orig_eids[test_etype])] = shuffled_elabels + assert np.all( + orig_labels == F.asnumpy(hg.nodes[test_ntype].data["labels"]) + ) + assert np.all( + orig_elabels == F.asnumpy(hg.edges[test_etype].data["labels"]) + ) @pytest.mark.parametrize("part_method", ["metis", "random"]) @@ -1461,9 +1793,6 @@ def test_partition_graph_graphbolt_hetero_multi( part_method="random", num_parts=num_parts, n_jobs=4, - store_eids=True, - store_inner_node=True, - store_inner_edge=True, debug_mode=False, ) From 21b592da7f21677b4d9e8efd042a3f64b0e1619f Mon Sep 17 00:00:00 2001 From: Ubuntu <2649624957@qq.com> Date: Wed, 21 Aug 2024 03:55:43 +0000 Subject: [PATCH 04/39] change test_partition.py and partiton.py --- python/dgl/distributed/partition.py | 41 ++++++++++++++++++++++------- 1 file changed, 31 insertions(+), 10 deletions(-) diff --git a/python/dgl/distributed/partition.py b/python/dgl/distributed/partition.py index 73ea48959597..7d81a15c8cfd 100644 --- a/python/dgl/distributed/partition.py +++ b/python/dgl/distributed/partition.py @@ -109,19 +109,40 @@ def _save_graphs(filename, g_list, formats=None, sort_etypes=False): save_graphs(filename, g_list, formats=formats) -def _get_inner_node_mask(graph, ntype_id): - if NTYPE in graph.ndata: - dtype = F.dtype(graph.ndata["inner_node"]) - return ( - graph.ndata["inner_node"] - * F.astype(graph.ndata[NTYPE] == ntype_id, dtype) - == 1 - ) +def _get_inner_node_mask(graph, ntype_id, use_graphbolt=False): + if use_graphbolt: + if NTYPE in graph.node_attributes: + dtype = F.dtype(graph.node_attributes["inner_node"]) + return ( + graph.node_attributes["inner_node"] + * F.astype(graph.node_attributes[NTYPE] == ntype_id, dtype) + == 1 + ) + else: + return graph.node_attributes["inner_node"] == 1 else: - return graph.ndata["inner_node"] == 1 + if NTYPE in graph.ndata: + dtype = F.dtype(graph.ndata["inner_node"]) + return ( + graph.ndata["inner_node"] + * F.astype(graph.ndata[NTYPE] == ntype_id, dtype) + == 1 + ) + else: + return graph.ndata["inner_node"] == 1 -def _get_inner_edge_mask(graph, etype_id): +def _get_inner_edge_mask(graph, etype_id, use_graphbolt=False): + if use_graphbolt: + if graph.type_per_edge is not None: + dtype = F.dtype(graph.edge_attributes["inner_edge"]) + return ( + graph.edge_attributes["inner_edge"] + * F.astype(graph.type_per_edge == etype_id, dtype) + == 1 + ) + else: + return graph.edge_attributes["inner_edge"] == 1 if ETYPE in graph.edata: dtype = F.dtype(graph.edata["inner_edge"]) return ( From 4ef95d5ae932c81a7d94fa1afc015969a39b2801 Mon Sep 17 00:00:00 2001 From: Ubuntu <2649624957@qq.com> Date: Wed, 21 Aug 2024 04:08:24 +0000 Subject: [PATCH 05/39] partition --- python/dgl/distributed/partition.py | 354 +++++++++++++++++++++------- 1 file changed, 270 insertions(+), 84 deletions(-) diff --git a/python/dgl/distributed/partition.py b/python/dgl/distributed/partition.py index 7d81a15c8cfd..2559f6ec943e 100644 --- a/python/dgl/distributed/partition.py +++ b/python/dgl/distributed/partition.py @@ -88,7 +88,7 @@ def _dump_part_config(part_config, part_metadata): json.dump(part_metadata, outfile, sort_keys=False, indent=4) -def _save_graphs(filename, g_list, formats=None, sort_etypes=False): +def _process_partitions(g_list, formats=None, sort_etypes=False): """Preprocess partitions before saving: 1. format data types. 2. sort csc/csr by tag. @@ -106,6 +106,13 @@ def _save_graphs(filename, g_list, formats=None, sort_etypes=False): g = sort_csr_by_tag(g, tag=g.edata[ETYPE], tag_type="edge") if "csc" in formats: g = sort_csc_by_tag(g, tag=g.edata[ETYPE], tag_type="edge") + return g_list + + +def _save_graphs(filename, g_list, formats=None, sort_etypes=False): + g_list = _process_partitions( + g_list, formats=formats, sort_etypes=sort_etypes + ) save_graphs(filename, g_list, formats=formats) @@ -336,9 +343,10 @@ def load_partition(part_config, part_id, load_feats=True, use_graphbolt=False): "part-{}".format(part_id) in part_metadata ), "part-{} does not exist".format(part_id) part_files = part_metadata["part-{}".format(part_id)] - part_graph_field = "part_graph" if use_graphbolt: part_graph_field = "part_graph_graphbolt" + else: + part_graph_field = "part_graph" assert ( part_graph_field in part_files ), f"the partition does not contain graph structure: {part_graph_field}" @@ -465,6 +473,105 @@ def load_partition_feats( return node_feats, edge_feats +def _load_partition_book_from_metadata(part_metadata, part_id): + assert "num_parts" in part_metadata, "num_parts does not exist." + assert ( + part_metadata["num_parts"] > part_id + ), "part {} is out of range (#parts: {})".format( + part_id, part_metadata["num_parts"] + ) + num_parts = part_metadata["num_parts"] + assert ( + "num_nodes" in part_metadata + ), "cannot get the number of nodes of the global graph." + assert ( + "num_edges" in part_metadata + ), "cannot get the number of edges of the global graph." + assert "node_map" in part_metadata, "cannot get the node map." + assert "edge_map" in part_metadata, "cannot get the edge map." + assert "graph_name" in part_metadata, "cannot get the graph name" + + # If this is a range partitioning, node_map actually stores a list, whose elements + # indicate the boundary of range partitioning. Otherwise, node_map stores a filename + # that contains node map in a NumPy array. + node_map = part_metadata["node_map"] + edge_map = part_metadata["edge_map"] + if isinstance(node_map, dict): + for key in node_map: + is_range_part = isinstance(node_map[key], list) + break + elif isinstance(node_map, list): + is_range_part = True + node_map = {DEFAULT_NTYPE: node_map} + else: + is_range_part = False + if isinstance(edge_map, list): + edge_map = {DEFAULT_ETYPE: edge_map} + + ntypes = {DEFAULT_NTYPE: 0} + etypes = {DEFAULT_ETYPE: 0} + if "ntypes" in part_metadata: + ntypes = part_metadata["ntypes"] + if "etypes" in part_metadata: + etypes = part_metadata["etypes"] + + if isinstance(node_map, dict): + for key in node_map: + assert key in ntypes, "The node type {} is invalid".format(key) + if isinstance(edge_map, dict): + for key in edge_map: + assert key in etypes, "The edge type {} is invalid".format(key) + + if not is_range_part: + raise TypeError("Only RangePartitionBook is supported currently.") + + node_map = _get_part_ranges(node_map) + edge_map = _get_part_ranges(edge_map) + + # Format dtype of node/edge map if dtype is specified. + def _format_node_edge_map(part_metadata, map_type, data): + key = f"{map_type}_map_dtype" + if key not in part_metadata: + return data + dtype = part_metadata[key] + assert dtype in ["int32", "int64"], ( + f"The {map_type} map dtype should be either int32 or int64, " + f"but got {dtype}." + ) + for key in data: + data[key] = data[key].astype(dtype) + return data + + node_map = _format_node_edge_map(part_metadata, "node", node_map) + edge_map = _format_node_edge_map(part_metadata, "edge", edge_map) + + # Sort the node/edge maps by the node/edge type ID. + node_map = dict(sorted(node_map.items(), key=lambda x: ntypes[x[0]])) + edge_map = dict(sorted(edge_map.items(), key=lambda x: etypes[x[0]])) + + def _assert_is_sorted(id_map): + id_ranges = np.array(list(id_map.values())) + ids = [] + for i in range(num_parts): + ids.append(id_ranges[:, i, :]) + ids = np.array(ids).flatten() + assert np.all( + ids[:-1] <= ids[1:] + ), f"The node/edge map is not sorted: {ids}" + + _assert_is_sorted(node_map) + _assert_is_sorted(edge_map) + + return ( + RangePartitionBook( + part_id, num_parts, node_map, edge_map, ntypes, etypes + ), + part_metadata["graph_name"], + ntypes, + etypes, + ) + + def load_partition_book(part_config, part_id): """Load a graph partition book from the partition config file. @@ -1326,31 +1433,41 @@ def get_homogeneous(g, balance_ntypes): part_dir = os.path.join(out_path, "part" + str(part_id)) node_feat_file = os.path.join(part_dir, "node_feat.dgl") edge_feat_file = os.path.join(part_dir, "edge_feat.dgl") - part_graph_file = os.path.join(part_dir, "graph.dgl") - part_metadata["part-{}".format(part_id)] = { - "node_feats": os.path.relpath(node_feat_file, out_path), - "edge_feats": os.path.relpath(edge_feat_file, out_path), - "part_graph": os.path.relpath(part_graph_file, out_path), - } + os.makedirs(part_dir, mode=0o775, exist_ok=True) save_tensors(node_feat_file, node_feats) save_tensors(edge_feat_file, edge_feats) + part_metadata["part-{}".format(part_id)] = { + "node_feats": os.path.relpath(node_feat_file, out_path), + "edge_feats": os.path.relpath(edge_feat_file, out_path), + } sort_etypes = len(g.etypes) > 1 - _save_graphs( - part_graph_file, - [part], - formats=graph_formats, - sort_etypes=sort_etypes, - ) - print( - "Save partitions: {:.3f} seconds, peak memory: {:.3f} GB".format( - time.time() - start, get_peak_mem() - ) - ) + if not use_graphbolt: + part_graph_file = os.path.join(part_dir, "graph.dgl") + part_metadata["part-{}".format(part_id)][ + "part_graph" + ] = os.path.relpath(part_graph_file, out_path) + _save_graphs( + part_graph_file, + [part], + formats=graph_formats, + sort_etypes=sort_etypes, + ) + else: + part = _process_partitions([part], graph_formats, sort_etypes)[0] part_config = os.path.join(out_path, graph_name + ".json") - _dump_part_config(part_config, part_metadata) + if use_graphbolt: + kwargs["graph_formats"] = graph_formats + _dgl_partition_to_graphbolt( + part_config, + parts=parts, + part_meta=part_metadata, + **kwargs, + ) + else: + _dump_part_config(part_config, part_metadata) num_cuts = sim_g.num_edges() - tot_num_inner_edges if num_parts == 1: @@ -1361,12 +1478,11 @@ def get_homogeneous(g, balance_ntypes): ) ) - if use_graphbolt: - kwargs["graph_formats"] = graph_formats - dgl_partition_to_graphbolt( - part_config, - **kwargs, + print( + "Save partitions: {:.3f} seconds, peak memory: {:.3f} GB".format( + time.time() - start, get_peak_mem() ) + ) if return_mapping: return orig_nids, orig_eids @@ -1414,8 +1530,21 @@ def init_type_per_edge(graph, gpb): return etype_ids +def _load_parts(part_config, part_id, parts): + """load parts from variable or dist.""" + if parts is None: + graph, _, _, _, _, _, _ = load_partition( + part_config, part_id, load_feats=False + ) + else: + graph = parts[part_id] + return graph + + def gb_convert_single_dgl_partition( part_id, + parts, + part_meta, graph_formats, part_config, store_eids, @@ -1448,14 +1577,18 @@ def gb_convert_single_dgl_partition( "Running in debug mode which means all attributes of DGL partitions" " will be saved to the new format." ) - - part_meta = _load_part_config(part_config) + if part_meta is None: + part_meta = _load_part_config(part_config) num_parts = part_meta["num_parts"] - graph, _, _, gpb, _, _, _ = load_partition( - part_config, part_id, load_feats=False + graph = _load_parts(part_config, part_id, parts) + + gpb, _, ntypes, etypes = ( + load_partition_book(part_config, part_id) + if part_meta is None + else _load_partition_book_from_metadata(part_meta, part_id) ) - _, _, ntypes, etypes = load_partition_book(part_config, part_id) + is_homo = is_homogeneous(ntypes, etypes) node_type_to_id = ( None if is_homo else {ntype: ntid for ntid, ntype in enumerate(ntypes)} @@ -1561,12 +1694,12 @@ def gb_convert_single_dgl_partition( node_type_to_id=node_type_to_id, edge_type_to_id=edge_type_to_id, ) - orig_graph_path = os.path.join( + orig_feats_path = os.path.join( os.path.dirname(part_config), - part_meta[f"part-{part_id}"]["part_graph"], + part_meta[f"part-{part_id}"]["node_feats"], ) csc_graph_path = os.path.join( - os.path.dirname(orig_graph_path), "fused_csc_sampling_graph.pt" + os.path.dirname(orig_feats_path), "fused_csc_sampling_graph.pt" ) torch.save(csc_graph, csc_graph_path) @@ -1574,55 +1707,17 @@ def gb_convert_single_dgl_partition( # Update graph path. -def dgl_partition_to_graphbolt( +def convert_partition_to_graphbolt( + part_meta, + graph_formats, part_config, - *, - store_eids=True, - store_inner_node=False, - store_inner_edge=False, - graph_formats=None, - n_jobs=1, + store_eids, + store_inner_node, + store_inner_edge, + n_jobs, + num_parts, + parts=None, ): - """Convert partitions of dgl to FusedCSCSamplingGraph of GraphBolt. - - This API converts `DGLGraph` partitions to `FusedCSCSamplingGraph` which is - dedicated for sampling in `GraphBolt`. New graphs will be stored alongside - original graph as `fused_csc_sampling_graph.pt`. - - In the near future, partitions are supposed to be saved as - `FusedCSCSamplingGraph` directly. At that time, this API should be deprecated. - - Parameters - ---------- - part_config : str - The partition configuration JSON file. - store_eids : bool, optional - Whether to store edge IDs in the new graph. Default: True. - store_inner_node : bool, optional - Whether to store inner node mask in the new graph. Default: False. - store_inner_edge : bool, optional - Whether to store inner edge mask in the new graph. Default: False. - graph_formats : str or list[str], optional - Save partitions in specified formats. It could be any combination of - `coo`, `csc`. As `csc` format is mandatory for `FusedCSCSamplingGraph`, - it is not necessary to specify this argument. It's mainly for - specifying `coo` format to save edge ID mapping and destination node - IDs. If not specified, whether to save `coo` format is determined by - the availability of the format in DGL partitions. Default: None. - n_jobs: int - Number of parallel jobs to run during partition conversion. Max parallelism - is determined by the partition count. - """ - debug_mode = "DGL_DIST_DEBUG" in os.environ - if debug_mode: - dgl_warning( - "Running in debug mode which means all attributes of DGL partitions" - " will be saved to the new format." - ) - part_meta = _load_part_config(part_config) - new_part_meta = copy.deepcopy(part_meta) - num_parts = part_meta["num_parts"] - # [Rui] DGL partitions are always saved as homogeneous graphs even though # the original graph is heterogeneous. But heterogeneous information like # node/edge types are saved as node/edge data alongside with partitions. @@ -1635,6 +1730,8 @@ def dgl_partition_to_graphbolt( # Iterate over partitions. convert_with_format = partial( gb_convert_single_dgl_partition, + parts=parts, + part_meta=part_meta, graph_formats=graph_formats, part_config=part_config, store_eids=store_eids, @@ -1664,15 +1761,104 @@ def dgl_partition_to_graphbolt( for part_id in range(num_parts): # Update graph path. - new_part_meta[f"part-{part_id}"][ + part_meta[f"part-{part_id}"][ "part_graph_graphbolt" ] = rel_path_results[part_id] # Save dtype info into partition config. # [TODO][Rui] Always use int64_t for node/edge IDs in GraphBolt. See more # details in #7175. - new_part_meta["node_map_dtype"] = "int64" - new_part_meta["edge_map_dtype"] = "int64" + part_meta["node_map_dtype"] = "int64" + part_meta["edge_map_dtype"] = "int64" - _dump_part_config(part_config, new_part_meta) + _dump_part_config(part_config, part_meta) print(f"Converted partitions to GraphBolt format into {part_config}") + +def _dgl_partition_to_graphbolt( + part_config, + part_meta, + parts, + *, + store_eids=True, + store_inner_node=False, + store_inner_edge=False, + graph_formats=None, + n_jobs=1, +): + debug_mode = "DGL_DIST_DEBUG" in os.environ + if debug_mode: + dgl_warning( + "Running in debug mode which means all attributes of DGL partitions" + " will be saved to the new format." + ) + new_part_meta = copy.deepcopy(part_meta) + num_parts = part_meta["num_parts"] + convert_partition_to_graphbolt(new_part_meta, + graph_formats, + part_config, + store_eids, + store_inner_node, + store_inner_edge, + n_jobs, + num_parts, + parts=parts, + ) + + +def dgl_partition_to_graphbolt( + part_config, + *, + store_eids=True, + store_inner_node=False, + store_inner_edge=False, + graph_formats=None, + n_jobs=1, +): + """Convert partitions of dgl to FusedCSCSamplingGraph of GraphBolt. + + This API converts `DGLGraph` partitions to `FusedCSCSamplingGraph` which is + dedicated for sampling in `GraphBolt`. New graphs will be stored alongside + original graph as `fused_csc_sampling_graph.pt`. + + In the near future, partitions are supposed to be saved as + `FusedCSCSamplingGraph` directly. At that time, this API should be deprecated. + + Parameters + ---------- + part_config : str + The partition configuration JSON file. + store_eids : bool, optional + Whether to store edge IDs in the new graph. Default: True. + store_inner_node : bool, optional + Whether to store inner node mask in the new graph. Default: False. + store_inner_edge : bool, optional + Whether to store inner edge mask in the new graph. Default: False. + graph_formats : str or list[str], optional + Save partitions in specified formats. It could be any combination of + `coo`, `csc`. As `csc` format is mandatory for `FusedCSCSamplingGraph`, + it is not necessary to specify this argument. It's mainly for + specifying `coo` format to save edge ID mapping and destination node + IDs. If not specified, whether to save `coo` format is determined by + the availability of the format in DGL partitions. Default: None. + n_jobs: int + Number of parallel jobs to run during partition conversion. Max parallelism + is determined by the partition count. + """ + debug_mode = "DGL_DIST_DEBUG" in os.environ + if debug_mode: + dgl_warning( + "Running in debug mode which means all attributes of DGL partitions" + " will be saved to the new format." + ) + part_meta = _load_part_config(part_config) + new_part_meta = copy.deepcopy(part_meta) + num_parts = part_meta["num_parts"] + convert_partition_to_graphbolt(new_part_meta, + graph_formats, + part_config, + store_eids, + store_inner_node, + store_inner_edge, + n_jobs, + num_parts, + ) \ No newline at end of file From 1074f85a8ef230e16a73497ce1251036bd603c5b Mon Sep 17 00:00:00 2001 From: Ubuntu <2649624957@qq.com> Date: Wed, 21 Aug 2024 04:22:27 +0000 Subject: [PATCH 06/39] change partition --- python/dgl/distributed/partition.py | 49 +++++++++++++++-------------- 1 file changed, 26 insertions(+), 23 deletions(-) diff --git a/python/dgl/distributed/partition.py b/python/dgl/distributed/partition.py index 2559f6ec943e..c70e406691f1 100644 --- a/python/dgl/distributed/partition.py +++ b/python/dgl/distributed/partition.py @@ -1761,9 +1761,9 @@ def convert_partition_to_graphbolt( for part_id in range(num_parts): # Update graph path. - part_meta[f"part-{part_id}"][ - "part_graph_graphbolt" - ] = rel_path_results[part_id] + part_meta[f"part-{part_id}"]["part_graph_graphbolt"] = rel_path_results[ + part_id + ] # Save dtype info into partition config. # [TODO][Rui] Always use int64_t for node/edge IDs in GraphBolt. See more @@ -1774,6 +1774,7 @@ def convert_partition_to_graphbolt( _dump_part_config(part_config, part_meta) print(f"Converted partitions to GraphBolt format into {part_config}") + def _dgl_partition_to_graphbolt( part_config, part_meta, @@ -1793,17 +1794,18 @@ def _dgl_partition_to_graphbolt( ) new_part_meta = copy.deepcopy(part_meta) num_parts = part_meta["num_parts"] - convert_partition_to_graphbolt(new_part_meta, - graph_formats, - part_config, - store_eids, - store_inner_node, - store_inner_edge, - n_jobs, - num_parts, - parts=parts, - ) - + convert_partition_to_graphbolt( + new_part_meta, + graph_formats, + part_config, + store_eids, + store_inner_node, + store_inner_edge, + n_jobs, + num_parts, + parts=parts, + ) + def dgl_partition_to_graphbolt( part_config, @@ -1853,12 +1855,13 @@ def dgl_partition_to_graphbolt( part_meta = _load_part_config(part_config) new_part_meta = copy.deepcopy(part_meta) num_parts = part_meta["num_parts"] - convert_partition_to_graphbolt(new_part_meta, - graph_formats, - part_config, - store_eids, - store_inner_node, - store_inner_edge, - n_jobs, - num_parts, - ) \ No newline at end of file + convert_partition_to_graphbolt( + new_part_meta, + graph_formats, + part_config, + store_eids, + store_inner_node, + store_inner_edge, + n_jobs, + num_parts, + ) From e03376dbd637dcce6b5179e3f49d991d17e0da5e Mon Sep 17 00:00:00 2001 From: Ubuntu <2649624957@qq.com> Date: Wed, 21 Aug 2024 04:53:31 +0000 Subject: [PATCH 07/39] change partition internal function --- python/dgl/distributed/partition.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/python/dgl/distributed/partition.py b/python/dgl/distributed/partition.py index c70e406691f1..0f5e317b1627 100644 --- a/python/dgl/distributed/partition.py +++ b/python/dgl/distributed/partition.py @@ -1707,7 +1707,7 @@ def gb_convert_single_dgl_partition( # Update graph path. -def convert_partition_to_graphbolt( +def _convert_partition_to_graphbolt( part_meta, graph_formats, part_config, @@ -1794,7 +1794,7 @@ def _dgl_partition_to_graphbolt( ) new_part_meta = copy.deepcopy(part_meta) num_parts = part_meta["num_parts"] - convert_partition_to_graphbolt( + _convert_partition_to_graphbolt( new_part_meta, graph_formats, part_config, @@ -1855,7 +1855,7 @@ def dgl_partition_to_graphbolt( part_meta = _load_part_config(part_config) new_part_meta = copy.deepcopy(part_meta) num_parts = part_meta["num_parts"] - convert_partition_to_graphbolt( + _convert_partition_to_graphbolt( new_part_meta, graph_formats, part_config, From 090a4302fefa2e14f3603009ac328c822cc034e6 Mon Sep 17 00:00:00 2001 From: Ubuntu <2649624957@qq.com> Date: Fri, 23 Aug 2024 07:24:23 +0000 Subject: [PATCH 08/39] change partition --- python/dgl/distributed/partition.py | 416 ++++++++--- tests/distributed/test_partition.py | 1069 ++++++++++++++++----------- 2 files changed, 926 insertions(+), 559 deletions(-) diff --git a/python/dgl/distributed/partition.py b/python/dgl/distributed/partition.py index 7d81a15c8cfd..f5d81decea92 100644 --- a/python/dgl/distributed/partition.py +++ b/python/dgl/distributed/partition.py @@ -88,7 +88,7 @@ def _dump_part_config(part_config, part_metadata): json.dump(part_metadata, outfile, sort_keys=False, indent=4) -def _save_graphs(filename, g_list, formats=None, sort_etypes=False): +def _process_partitions(g_list, formats=None, sort_etypes=False): """Preprocess partitions before saving: 1. format data types. 2. sort csc/csr by tag. @@ -106,52 +106,40 @@ def _save_graphs(filename, g_list, formats=None, sort_etypes=False): g = sort_csr_by_tag(g, tag=g.edata[ETYPE], tag_type="edge") if "csc" in formats: g = sort_csc_by_tag(g, tag=g.edata[ETYPE], tag_type="edge") + return g_list + + +def _save_graphs(filename, g_list, formats=None, sort_etypes=False): + g_list = _process_partitions( + g_list, formats=formats, sort_etypes=sort_etypes + ) save_graphs(filename, g_list, formats=formats) def _get_inner_node_mask(graph, ntype_id, use_graphbolt=False): - if use_graphbolt: - if NTYPE in graph.node_attributes: - dtype = F.dtype(graph.node_attributes["inner_node"]) - return ( - graph.node_attributes["inner_node"] - * F.astype(graph.node_attributes[NTYPE] == ntype_id, dtype) - == 1 - ) - else: - return graph.node_attributes["inner_node"] == 1 + ndata = graph.node_attributes if use_graphbolt else graph.ndata + assert "inner_node" in ndata, '"inner_node" is not nodes\' data' + if NTYPE in ndata: + dtype = F.dtype(ndata["inner_node"]) + return ( + ndata["inner_node"] * F.astype(ndata[NTYPE] == ntype_id, dtype) == 1 + ) else: - if NTYPE in graph.ndata: - dtype = F.dtype(graph.ndata["inner_node"]) - return ( - graph.ndata["inner_node"] - * F.astype(graph.ndata[NTYPE] == ntype_id, dtype) - == 1 - ) - else: - return graph.ndata["inner_node"] == 1 + return ndata["inner_node"] == 1 def _get_inner_edge_mask(graph, etype_id, use_graphbolt=False): - if use_graphbolt: - if graph.type_per_edge is not None: - dtype = F.dtype(graph.edge_attributes["inner_edge"]) - return ( - graph.edge_attributes["inner_edge"] - * F.astype(graph.type_per_edge == etype_id, dtype) - == 1 - ) - else: - return graph.edge_attributes["inner_edge"] == 1 - if ETYPE in graph.edata: - dtype = F.dtype(graph.edata["inner_edge"]) - return ( - graph.edata["inner_edge"] - * F.astype(graph.edata[ETYPE] == etype_id, dtype) - == 1 - ) + edata = graph.edge_attributes if use_graphbolt else graph.edata + etype = ( + graph.type_per_edge + if use_graphbolt + else (graph.edata[ETYPE] if ETYPE in graph.edata else None) + ) + if etype is not None: + dtype = F.dtype(edata["inner_edge"]) + return edata["inner_edge"] * F.astype(etype == etype_id, dtype) == 1 else: - return graph.edata["inner_edge"] == 1 + return edata["inner_edge"] == 1 def _get_part_ranges(id_ranges): @@ -336,9 +324,10 @@ def load_partition(part_config, part_id, load_feats=True, use_graphbolt=False): "part-{}".format(part_id) in part_metadata ), "part-{} does not exist".format(part_id) part_files = part_metadata["part-{}".format(part_id)] - part_graph_field = "part_graph" if use_graphbolt: part_graph_field = "part_graph_graphbolt" + else: + part_graph_field = "part_graph" assert ( part_graph_field in part_files ), f"the partition does not contain graph structure: {part_graph_field}" @@ -465,6 +454,105 @@ def load_partition_feats( return node_feats, edge_feats +def _load_partition_book_from_metadata(part_metadata, part_id): + assert "num_parts" in part_metadata, "num_parts does not exist." + assert ( + part_metadata["num_parts"] > part_id + ), "part {} is out of range (#parts: {})".format( + part_id, part_metadata["num_parts"] + ) + num_parts = part_metadata["num_parts"] + assert ( + "num_nodes" in part_metadata + ), "cannot get the number of nodes of the global graph." + assert ( + "num_edges" in part_metadata + ), "cannot get the number of edges of the global graph." + assert "node_map" in part_metadata, "cannot get the node map." + assert "edge_map" in part_metadata, "cannot get the edge map." + assert "graph_name" in part_metadata, "cannot get the graph name" + + # If this is a range partitioning, node_map actually stores a list, whose elements + # indicate the boundary of range partitioning. Otherwise, node_map stores a filename + # that contains node map in a NumPy array. + node_map = part_metadata["node_map"] + edge_map = part_metadata["edge_map"] + if isinstance(node_map, dict): + for key in node_map: + is_range_part = isinstance(node_map[key], list) + break + elif isinstance(node_map, list): + is_range_part = True + node_map = {DEFAULT_NTYPE: node_map} + else: + is_range_part = False + if isinstance(edge_map, list): + edge_map = {DEFAULT_ETYPE: edge_map} + + ntypes = {DEFAULT_NTYPE: 0} + etypes = {DEFAULT_ETYPE: 0} + if "ntypes" in part_metadata: + ntypes = part_metadata["ntypes"] + if "etypes" in part_metadata: + etypes = part_metadata["etypes"] + + if isinstance(node_map, dict): + for key in node_map: + assert key in ntypes, "The node type {} is invalid".format(key) + if isinstance(edge_map, dict): + for key in edge_map: + assert key in etypes, "The edge type {} is invalid".format(key) + + if not is_range_part: + raise TypeError("Only RangePartitionBook is supported currently.") + + node_map = _get_part_ranges(node_map) + edge_map = _get_part_ranges(edge_map) + + # Format dtype of node/edge map if dtype is specified. + def _format_node_edge_map(part_metadata, map_type, data): + key = f"{map_type}_map_dtype" + if key not in part_metadata: + return data + dtype = part_metadata[key] + assert dtype in ["int32", "int64"], ( + f"The {map_type} map dtype should be either int32 or int64, " + f"but got {dtype}." + ) + for key in data: + data[key] = data[key].astype(dtype) + return data + + node_map = _format_node_edge_map(part_metadata, "node", node_map) + edge_map = _format_node_edge_map(part_metadata, "edge", edge_map) + + # Sort the node/edge maps by the node/edge type ID. + node_map = dict(sorted(node_map.items(), key=lambda x: ntypes[x[0]])) + edge_map = dict(sorted(edge_map.items(), key=lambda x: etypes[x[0]])) + + def _assert_is_sorted(id_map): + id_ranges = np.array(list(id_map.values())) + ids = [] + for i in range(num_parts): + ids.append(id_ranges[:, i, :]) + ids = np.array(ids).flatten() + assert np.all( + ids[:-1] <= ids[1:] + ), f"The node/edge map is not sorted: {ids}" + + _assert_is_sorted(node_map) + _assert_is_sorted(edge_map) + + return ( + RangePartitionBook( + part_id, num_parts, node_map, edge_map, ntypes, etypes + ), + part_metadata["graph_name"], + ntypes, + etypes, + ) + + def load_partition_book(part_config, part_id): """Load a graph partition book from the partition config file. @@ -1326,31 +1414,41 @@ def get_homogeneous(g, balance_ntypes): part_dir = os.path.join(out_path, "part" + str(part_id)) node_feat_file = os.path.join(part_dir, "node_feat.dgl") edge_feat_file = os.path.join(part_dir, "edge_feat.dgl") - part_graph_file = os.path.join(part_dir, "graph.dgl") - part_metadata["part-{}".format(part_id)] = { - "node_feats": os.path.relpath(node_feat_file, out_path), - "edge_feats": os.path.relpath(edge_feat_file, out_path), - "part_graph": os.path.relpath(part_graph_file, out_path), - } + os.makedirs(part_dir, mode=0o775, exist_ok=True) save_tensors(node_feat_file, node_feats) save_tensors(edge_feat_file, edge_feats) + part_metadata["part-{}".format(part_id)] = { + "node_feats": os.path.relpath(node_feat_file, out_path), + "edge_feats": os.path.relpath(edge_feat_file, out_path), + } sort_etypes = len(g.etypes) > 1 - _save_graphs( - part_graph_file, - [part], - formats=graph_formats, - sort_etypes=sort_etypes, - ) - print( - "Save partitions: {:.3f} seconds, peak memory: {:.3f} GB".format( - time.time() - start, get_peak_mem() - ) - ) + if not use_graphbolt: + part_graph_file = os.path.join(part_dir, "graph.dgl") + part_metadata["part-{}".format(part_id)][ + "part_graph" + ] = os.path.relpath(part_graph_file, out_path) + _save_graphs( + part_graph_file, + [part], + formats=graph_formats, + sort_etypes=sort_etypes, + ) + else: + part = _process_partitions([part], graph_formats, sort_etypes)[0] part_config = os.path.join(out_path, graph_name + ".json") - _dump_part_config(part_config, part_metadata) + if use_graphbolt: + kwargs["graph_formats"] = graph_formats + _dgl_partition_to_graphbolt( + part_config, + parts=parts, + part_meta=part_metadata, + **kwargs, + ) + else: + _dump_part_config(part_config, part_metadata) num_cuts = sim_g.num_edges() - tot_num_inner_edges if num_parts == 1: @@ -1361,12 +1459,11 @@ def get_homogeneous(g, balance_ntypes): ) ) - if use_graphbolt: - kwargs["graph_formats"] = graph_formats - dgl_partition_to_graphbolt( - part_config, - **kwargs, + print( + "Save partitions: {:.3f} seconds, peak memory: {:.3f} GB".format( + time.time() - start, get_peak_mem() ) + ) if return_mapping: return orig_nids, orig_eids @@ -1414,8 +1511,21 @@ def init_type_per_edge(graph, gpb): return etype_ids +def _load_parts(part_config, part_id, parts): + """load parts from variable or dist.""" + if parts is None: + graph, _, _, _, _, _, _ = load_partition( + part_config, part_id, load_feats=False + ) + else: + graph = parts[part_id] + return graph + + def gb_convert_single_dgl_partition( part_id, + parts, + part_meta, graph_formats, part_config, store_eids, @@ -1448,14 +1558,18 @@ def gb_convert_single_dgl_partition( "Running in debug mode which means all attributes of DGL partitions" " will be saved to the new format." ) - - part_meta = _load_part_config(part_config) + if part_meta is None: + part_meta = _load_part_config(part_config) num_parts = part_meta["num_parts"] - graph, _, _, gpb, _, _, _ = load_partition( - part_config, part_id, load_feats=False + graph = _load_parts(part_config, part_id, parts) + + gpb, _, ntypes, etypes = ( + load_partition_book(part_config, part_id) + if part_meta is None + else _load_partition_book_from_metadata(part_meta, part_id) ) - _, _, ntypes, etypes = load_partition_book(part_config, part_id) + is_homo = is_homogeneous(ntypes, etypes) node_type_to_id = ( None if is_homo else {ntype: ntid for ntid, ntype in enumerate(ntypes)} @@ -1561,12 +1675,12 @@ def gb_convert_single_dgl_partition( node_type_to_id=node_type_to_id, edge_type_to_id=edge_type_to_id, ) - orig_graph_path = os.path.join( + orig_feats_path = os.path.join( os.path.dirname(part_config), - part_meta[f"part-{part_id}"]["part_graph"], + part_meta[f"part-{part_id}"]["node_feats"], ) csc_graph_path = os.path.join( - os.path.dirname(orig_graph_path), "fused_csc_sampling_graph.pt" + os.path.dirname(orig_feats_path), "fused_csc_sampling_graph.pt" ) torch.save(csc_graph, csc_graph_path) @@ -1574,55 +1688,17 @@ def gb_convert_single_dgl_partition( # Update graph path. -def dgl_partition_to_graphbolt( +def _convert_partition_to_graphbolt( + part_meta, + graph_formats, part_config, - *, - store_eids=True, - store_inner_node=False, - store_inner_edge=False, - graph_formats=None, - n_jobs=1, + store_eids, + store_inner_node, + store_inner_edge, + n_jobs, + num_parts, + parts=None, ): - """Convert partitions of dgl to FusedCSCSamplingGraph of GraphBolt. - - This API converts `DGLGraph` partitions to `FusedCSCSamplingGraph` which is - dedicated for sampling in `GraphBolt`. New graphs will be stored alongside - original graph as `fused_csc_sampling_graph.pt`. - - In the near future, partitions are supposed to be saved as - `FusedCSCSamplingGraph` directly. At that time, this API should be deprecated. - - Parameters - ---------- - part_config : str - The partition configuration JSON file. - store_eids : bool, optional - Whether to store edge IDs in the new graph. Default: True. - store_inner_node : bool, optional - Whether to store inner node mask in the new graph. Default: False. - store_inner_edge : bool, optional - Whether to store inner edge mask in the new graph. Default: False. - graph_formats : str or list[str], optional - Save partitions in specified formats. It could be any combination of - `coo`, `csc`. As `csc` format is mandatory for `FusedCSCSamplingGraph`, - it is not necessary to specify this argument. It's mainly for - specifying `coo` format to save edge ID mapping and destination node - IDs. If not specified, whether to save `coo` format is determined by - the availability of the format in DGL partitions. Default: None. - n_jobs: int - Number of parallel jobs to run during partition conversion. Max parallelism - is determined by the partition count. - """ - debug_mode = "DGL_DIST_DEBUG" in os.environ - if debug_mode: - dgl_warning( - "Running in debug mode which means all attributes of DGL partitions" - " will be saved to the new format." - ) - part_meta = _load_part_config(part_config) - new_part_meta = copy.deepcopy(part_meta) - num_parts = part_meta["num_parts"] - # [Rui] DGL partitions are always saved as homogeneous graphs even though # the original graph is heterogeneous. But heterogeneous information like # node/edge types are saved as node/edge data alongside with partitions. @@ -1635,6 +1711,8 @@ def dgl_partition_to_graphbolt( # Iterate over partitions. convert_with_format = partial( gb_convert_single_dgl_partition, + parts=parts, + part_meta=part_meta, graph_formats=graph_formats, part_config=part_config, store_eids=store_eids, @@ -1664,15 +1742,107 @@ def dgl_partition_to_graphbolt( for part_id in range(num_parts): # Update graph path. - new_part_meta[f"part-{part_id}"][ - "part_graph_graphbolt" - ] = rel_path_results[part_id] + part_meta[f"part-{part_id}"]["part_graph_graphbolt"] = rel_path_results[ + part_id + ] # Save dtype info into partition config. # [TODO][Rui] Always use int64_t for node/edge IDs in GraphBolt. See more # details in #7175. - new_part_meta["node_map_dtype"] = "int64" - new_part_meta["edge_map_dtype"] = "int64" + part_meta["node_map_dtype"] = "int64" + part_meta["edge_map_dtype"] = "int64" - _dump_part_config(part_config, new_part_meta) + _dump_part_config(part_config, part_meta) print(f"Converted partitions to GraphBolt format into {part_config}") + + +def _dgl_partition_to_graphbolt( + part_config, + part_meta, + parts, + *, + store_eids=True, + store_inner_node=False, + store_inner_edge=False, + graph_formats=None, + n_jobs=1, +): + debug_mode = "DGL_DIST_DEBUG" in os.environ + if debug_mode: + dgl_warning( + "Running in debug mode which means all attributes of DGL partitions" + " will be saved to the new format." + ) + new_part_meta = copy.deepcopy(part_meta) + num_parts = part_meta["num_parts"] + _convert_partition_to_graphbolt( + new_part_meta, + graph_formats, + part_config, + store_eids, + store_inner_node, + store_inner_edge, + n_jobs, + num_parts, + parts=parts, + ) + + +def dgl_partition_to_graphbolt( + part_config, + *, + store_eids=True, + store_inner_node=False, + store_inner_edge=False, + graph_formats=None, + n_jobs=1, +): + """Convert partitions of dgl to FusedCSCSamplingGraph of GraphBolt. + + This API converts `DGLGraph` partitions to `FusedCSCSamplingGraph` which is + dedicated for sampling in `GraphBolt`. New graphs will be stored alongside + original graph as `fused_csc_sampling_graph.pt`. + + In the near future, partitions are supposed to be saved as + `FusedCSCSamplingGraph` directly. At that time, this API should be deprecated. + + Parameters + ---------- + part_config : str + The partition configuration JSON file. + store_eids : bool, optional + Whether to store edge IDs in the new graph. Default: True. + store_inner_node : bool, optional + Whether to store inner node mask in the new graph. Default: False. + store_inner_edge : bool, optional + Whether to store inner edge mask in the new graph. Default: False. + graph_formats : str or list[str], optional + Save partitions in specified formats. It could be any combination of + `coo`, `csc`. As `csc` format is mandatory for `FusedCSCSamplingGraph`, + it is not necessary to specify this argument. It's mainly for + specifying `coo` format to save edge ID mapping and destination node + IDs. If not specified, whether to save `coo` format is determined by + the availability of the format in DGL partitions. Default: None. + n_jobs: int + Number of parallel jobs to run during partition conversion. Max parallelism + is determined by the partition count. + """ + debug_mode = "DGL_DIST_DEBUG" in os.environ + if debug_mode: + dgl_warning( + "Running in debug mode which means all attributes of DGL partitions" + " will be saved to the new format." + ) + part_meta = _load_part_config(part_config) + new_part_meta = copy.deepcopy(part_meta) + num_parts = part_meta["num_parts"] + _convert_partition_to_graphbolt( + new_part_meta, + graph_formats, + part_config, + store_eids, + store_inner_node, + store_inner_edge, + n_jobs, + num_parts, + ) diff --git a/tests/distributed/test_partition.py b/tests/distributed/test_partition.py index 0f2425cb054d..ab877c19f6f7 100644 --- a/tests/distributed/test_partition.py +++ b/tests/distributed/test_partition.py @@ -38,18 +38,18 @@ def _verify_partition_data_types(part_g, use_graphbolt=False): - if not use_graphbolt: - for k, dtype in RESERVED_FIELD_DTYPE.items(): - if k in part_g.ndata: - assert part_g.ndata[k].dtype == dtype - if k in part_g.edata: - assert part_g.edata[k].dtype == dtype - else: - for k, dtype in RESERVED_FIELD_DTYPE.items(): - if k in part_g.node_attributes: - assert part_g.node_attributes[k].dtype == dtype - if k in part_g.edge_attributes: - assert part_g.edge_attributes[k].dtype == dtype + """ + check list: + make sure nodes and edges have correct type. + """ + ndata = part_g.node_attributes if use_graphbolt else part_g.ndata + edata = part_g.edge_attributes if use_graphbolt else part_g.edata + + for k, dtype in RESERVED_FIELD_DTYPE.items(): + if k in ndata: + assert ndata[k].dtype == dtype + if k in edata: + assert edata[k].dtype == dtype def _verify_partition_formats(part_g, formats): @@ -90,11 +90,34 @@ def create_random_hetero(): return dgl.heterograph(edges, num_nodes) -def verify_hetero_graph(g, parts, use_graphbolt=False): - if use_graphbolt: - num_nodes = {ntype: 0 for ntype in g.ntypes} - num_edges = {etype: 0 for etype in g.canonical_etypes} - for part in parts: +def _verify_hetero_graph_elements_number( + g, + parts, + store_inner_node, + store_inner_edge, + use_graphbolt, +): + """ + check list: + make sure edge type are correct. + make sure the number of nodes in each node type are correct. + make sure the argument store_inner_edge and store_inner_node work. + """ + num_nodes = {ntype: 0 for ntype in g.ntypes} + num_edges = {etype: 0 for etype in g.canonical_etypes} + for part in parts: + edata = part.edge_attributes if use_graphbolt else part.edata + if dgl.ETYPE in edata: + assert len(g.canonical_etypes) == len(F.unique(edata[dgl.ETYPE])) + if not use_graphbolt: + for ntype in g.ntypes: + ntype_id = g.get_ntype_id(ntype) + inner_node_mask = _get_inner_node_mask( + part, ntype_id, use_graphbolt + ) + num_inner_nodes = F.sum(F.astype(inner_node_mask, F.int64), 0) + num_nodes[ntype] += num_inner_nodes + if store_inner_edge or not use_graphbolt: for etype in g.canonical_etypes: etype_id = g.get_etype_id(etype) inner_edge_mask = _get_inner_edge_mask( @@ -103,7 +126,19 @@ def verify_hetero_graph(g, parts, use_graphbolt=False): num_inner_edges = F.sum(F.astype(inner_edge_mask, F.int64), 0) num_edges[etype] += num_inner_edges - # Verify the number of edges are correct. + # Verify the number of nodes are correct. + if not use_graphbolt: + for ntype in g.ntypes: + print( + "node {}: {}, {}".format( + ntype, g.num_nodes(ntype), num_nodes[ntype] + ) + ) + assert g.num_nodes(ntype) == num_nodes[ntype] + elif store_inner_node: + assert "inner_node" in parts[0].node_attributes + # Verify the number of edges are correct. + if store_inner_edge or not use_graphbolt: for etype in g.canonical_etypes: print( "edge {}: {}, {}".format( @@ -111,109 +146,112 @@ def verify_hetero_graph(g, parts, use_graphbolt=False): ) ) assert g.num_edges(etype) == num_edges[etype] + elif not store_inner_edge: + assert "inner_edge" not in parts[0].edge_attributes + +def _verify_hetero_graph_attributes( + g, + parts, + store_eids, + store_inner_edge, + use_graphbolt, +): + """ + check list: + make sure edge ids fall into a range. + make sure inner nodes have Ids fall into a range. + make sure all nodes is included. + make sure all edges is included. + make sure store_eids performs its function. + """ + if store_eids or not use_graphbolt: nids = {ntype: [] for ntype in g.ntypes} eids = {etype: [] for etype in g.canonical_etypes} for part in parts: - eid = th.arange(len(part.edge_attributes[dgl.EID])) - etype_arr = F.gather_row(part.type_per_edge, eid) - eid_type = F.gather_row(part.edge_attributes[dgl.EID], eid) + edata = part.edge_attributes if use_graphbolt else part.edata + etype = part.type_per_edge if use_graphbolt else edata[dgl.ETYPE] + eid = th.arange(len(edata[dgl.EID])) + etype_arr = F.gather_row(etype, eid) + eid_arr = F.gather_row(edata[dgl.EID], eid) for etype in g.canonical_etypes: etype_id = g.get_etype_id(etype) eids[etype].append( - F.boolean_mask(eid_type, etype_arr == etype_id) + F.boolean_mask(eid_arr, etype_arr == etype_id) ) # Make sure edge Ids fall into a range. - inner_edge_mask = _get_inner_edge_mask( - part, etype_id, use_graphbolt - ) - inner_eids = np.sort( - F.asnumpy( - F.boolean_mask( - part.edge_attributes[dgl.EID], inner_edge_mask + if store_inner_edge or not use_graphbolt: + inner_edge_mask = _get_inner_edge_mask( + part, etype_id, use_graphbolt=use_graphbolt + ) + inner_eids = np.sort( + F.asnumpy( + F.boolean_mask(edata[dgl.EID], inner_edge_mask) ) ) - ) - assert np.all( - inner_eids == np.arange(inner_eids[0], inner_eids[-1] + 1) - ) - return - - num_nodes = {ntype: 0 for ntype in g.ntypes} - num_edges = {etype: 0 for etype in g.canonical_etypes} - for part in parts: - assert len(g.canonical_etypes) == len(F.unique(part.edata[dgl.ETYPE])) - for ntype in g.ntypes: - ntype_id = g.get_ntype_id(ntype) - inner_node_mask = _get_inner_node_mask(part, ntype_id) - num_inner_nodes = F.sum(F.astype(inner_node_mask, F.int64), 0) - num_nodes[ntype] += num_inner_nodes - for etype in g.canonical_etypes: - etype_id = g.get_etype_id(etype) - inner_edge_mask = _get_inner_edge_mask(part, etype_id) - num_inner_edges = F.sum(F.astype(inner_edge_mask, F.int64), 0) - num_edges[etype] += num_inner_edges - # Verify the number of nodes are correct. - for ntype in g.ntypes: - print( - "node {}: {}, {}".format( - ntype, g.num_nodes(ntype), num_nodes[ntype] - ) - ) - assert g.num_nodes(ntype) == num_nodes[ntype] - # Verify the number of edges are correct. - for etype in g.canonical_etypes: - print( - "edge {}: {}, {}".format( - etype, g.num_edges(etype), num_edges[etype] - ) - ) - assert g.num_edges(etype) == num_edges[etype] - - nids = {ntype: [] for ntype in g.ntypes} - eids = {etype: [] for etype in g.canonical_etypes} - for part in parts: - _, _, eid = part.edges(form="all") - etype_arr = F.gather_row(part.edata[dgl.ETYPE], eid) - eid_type = F.gather_row(part.edata[dgl.EID], eid) - for etype in g.canonical_etypes: - etype_id = g.get_etype_id(etype) - eids[etype].append(F.boolean_mask(eid_type, etype_arr == etype_id)) - # Make sure edge Ids fall into a range. - inner_edge_mask = _get_inner_edge_mask(part, etype_id) - inner_eids = np.sort( - F.asnumpy(F.boolean_mask(part.edata[dgl.EID], inner_edge_mask)) - ) - assert np.all( - inner_eids == np.arange(inner_eids[0], inner_eids[-1] + 1) - ) - - for ntype in g.ntypes: - ntype_id = g.get_ntype_id(ntype) - # Make sure inner nodes have Ids fall into a range. - inner_node_mask = _get_inner_node_mask(part, ntype_id) - inner_nids = F.boolean_mask(part.ndata[dgl.NID], inner_node_mask) - assert np.all( - F.asnumpy( - inner_nids - == F.arange( - F.as_scalar(inner_nids[0]), - F.as_scalar(inner_nids[-1]) + 1, + assert np.all( + inner_eids + == np.arange(inner_eids[0], inner_eids[-1] + 1) ) - ) - ) - nids[ntype].append(inner_nids) - for ntype in nids: - nids_type = F.cat(nids[ntype], 0) - uniq_ids = F.unique(nids_type) - # We should get all nodes. - assert len(uniq_ids) == g.num_nodes(ntype) - for etype in eids: - eids_type = F.cat(eids[etype], 0) - uniq_ids = F.unique(eids_type) - assert len(uniq_ids) == g.num_edges(etype) - # TODO(zhengda) this doesn't check 'part_id' + if not use_graphbolt: + for ntype in g.ntypes: + ntype_id = g.get_ntype_id(ntype) + # Make sure inner nodes have Ids fall into a range. + inner_node_mask = _get_inner_node_mask(part, ntype_id) + inner_nids = F.boolean_mask( + part.ndata[dgl.NID], inner_node_mask + ) + assert np.all( + F.asnumpy( + inner_nids + == F.arange( + F.as_scalar(inner_nids[0]), + F.as_scalar(inner_nids[-1]) + 1, + ) + ) + ) + nids[ntype].append(inner_nids) + + if not use_graphbolt: + for ntype in nids: + nids_type = F.cat(nids[ntype], 0) + uniq_ids = F.unique(nids_type) + # We should get all nodes. + assert len(uniq_ids) == g.num_nodes(ntype) + + for etype in eids: + eids_type = F.cat(eids[etype], 0) + uniq_ids = F.unique(eids_type) + # We should get all nodes. + assert len(uniq_ids) == g.num_edges(etype) + # TODO(zhengda) this doesn't check 'part_id' + elif not store_eids: + assert dgl.EID not in parts[0].edge_attributes + + +def _verify_hetero_graph( + g, + parts, + use_graphbolt=False, + store_eids=False, + store_inner_node=False, + store_inner_edge=False, +): + _verify_hetero_graph_elements_number( + g, + parts, + store_inner_edge=store_inner_edge, + store_inner_node=store_inner_node, + use_graphbolt=use_graphbolt, + ) + _verify_hetero_graph_attributes( + g, + parts, + store_eids=store_eids, + store_inner_edge=store_inner_edge, + use_graphbolt=use_graphbolt, + ) def verify_graph_feats( @@ -224,23 +262,33 @@ def verify_graph_feats( edge_feats, orig_nids, orig_eids, + store_eids=False, + store_inner_edge=False, + store_inner_node=False, use_graphbolt=False, + is_homo=False, ): - if use_graphbolt: + """ + check list: + make sure the feats of nodes and edges are correct + """ + if (is_homo and store_inner_node) or not use_graphbolt: for ntype in g.ntypes: + ndata = part.node_attributes if use_graphbolt else part.ndata ntype_id = g.get_ntype_id(ntype) inner_node_mask = _get_inner_node_mask( part, ntype_id, use_graphbolt ) - inner_nids = F.boolean_mask( - part.node_attributes[dgl.NID], inner_node_mask - ) + inner_nids = F.boolean_mask(ndata[dgl.NID], inner_node_mask) ntype_ids, inner_type_nids = gpb.map_to_per_ntype(inner_nids) partid = gpb.nid2partid(inner_type_nids, ntype) assert np.all(F.asnumpy(ntype_ids) == ntype_id) assert np.all(F.asnumpy(partid) == gpb.partid) - orig_id = orig_nids[ntype][inner_type_nids] + if is_homo: + orig_id = orig_nids[inner_type_nids] + else: + orig_id = orig_nids[ntype][inner_type_nids] local_nids = gpb.nid2localnid(inner_type_nids, gpb.partid, ntype) for name in g.nodes[ntype].data: @@ -250,63 +298,23 @@ def verify_graph_feats( ndata = F.gather_row(node_feats[ntype + "/" + name], local_nids) assert np.all(F.asnumpy(ndata == true_feats)) + if (store_inner_edge and store_eids) or not use_graphbolt: for etype in g.canonical_etypes: + edata = part.edge_attributes if use_graphbolt else part.edata etype_id = g.get_etype_id(etype) inner_edge_mask = _get_inner_edge_mask( part, etype_id, use_graphbolt ) - inner_eids = F.boolean_mask( - part.edge_attributes[dgl.EID], inner_edge_mask - ) + inner_eids = F.boolean_mask(edata[dgl.EID], inner_edge_mask) etype_ids, inner_type_eids = gpb.map_to_per_etype(inner_eids) partid = gpb.eid2partid(inner_type_eids, etype) assert np.all(F.asnumpy(etype_ids) == etype_id) assert np.all(F.asnumpy(partid) == gpb.partid) - orig_id = orig_eids[etype][inner_type_eids] - local_eids = gpb.eid2localeid(inner_type_eids, gpb.partid, etype) - - for name in g.edges[etype].data: - if name in [dgl.EID, "inner_edge"]: - continue - true_feats = F.gather_row(g.edges[etype].data[name], orig_id) - edata = F.gather_row( - edge_feats[_etype_tuple_to_str(etype) + "/" + name], - local_eids, - ) - assert np.all(F.asnumpy(edata == true_feats)) - else: - for ntype in g.ntypes: - ntype_id = g.get_ntype_id(ntype) - inner_node_mask = _get_inner_node_mask( - part, ntype_id, use_graphbolt - ) - inner_nids = F.boolean_mask(part.ndata[dgl.NID], inner_node_mask) - ntype_ids, inner_type_nids = gpb.map_to_per_ntype(inner_nids) - partid = gpb.nid2partid(inner_type_nids, ntype) - assert np.all(F.asnumpy(ntype_ids) == ntype_id) - assert np.all(F.asnumpy(partid) == gpb.partid) - - orig_id = orig_nids[ntype][inner_type_nids] - local_nids = gpb.nid2localnid(inner_type_nids, gpb.partid, ntype) - - for name in g.nodes[ntype].data: - if name in [dgl.NID, "inner_node"]: - continue - true_feats = F.gather_row(g.nodes[ntype].data[name], orig_id) - ndata = F.gather_row(node_feats[ntype + "/" + name], local_nids) - assert np.all(F.asnumpy(ndata == true_feats)) - - for etype in g.canonical_etypes: - etype_id = g.get_etype_id(etype) - inner_edge_mask = _get_inner_edge_mask(part, etype_id) - inner_eids = F.boolean_mask(part.edata[dgl.EID], inner_edge_mask) - etype_ids, inner_type_eids = gpb.map_to_per_etype(inner_eids) - partid = gpb.eid2partid(inner_type_eids, etype) - assert np.all(F.asnumpy(etype_ids) == etype_id) - assert np.all(F.asnumpy(partid) == gpb.partid) - - orig_id = orig_eids[etype][inner_type_eids] + if is_homo: + orig_id = orig_eids[inner_type_eids] + else: + orig_id = orig_eids[etype][inner_type_eids] local_eids = gpb.eid2localeid(inner_type_eids, gpb.partid, etype) for name in g.edges[etype].data: @@ -437,7 +445,7 @@ def check_hetero_partition( shuffled_elabels.append( edge_feats[_etype_tuple_to_str(test_etype) + "/labels"] ) - verify_hetero_graph(hg, parts) + _verify_hetero_graph(hg, parts) shuffled_labels = F.asnumpy(F.cat(shuffled_labels, 0)) shuffled_elabels = F.asnumpy(F.cat(shuffled_elabels, 0)) @@ -1189,15 +1197,194 @@ def test_not_sorted_node_edge_map(): assert gpb.local_etype_offset == [0, 500, 1100, 1800, 2600] +def _verify_metadata( + g, + gpb, + part_g, + num_parts, + part_sizes, + part_i, + store_inner_node, + store_inner_edge, + store_eids, +): + """ + # Check the metadata + check list: + make sure gpb have correct node and edge number. + make sure gpb have correct number of partitions. + make sure gpb have correct number of nodes and edges in each partition. + make sure local nid and eid have correct dtype. + make sure local nid have correct order + """ + assert gpb._num_nodes() == g.num_nodes() + assert gpb._num_edges() == g.num_edges() + + assert gpb.num_partitions() == num_parts + gpb_meta = gpb.metadata() + assert len(gpb_meta) == num_parts + assert len(gpb.partid2nids(part_i)) == gpb_meta[part_i]["num_nodes"] + assert len(gpb.partid2eids(part_i)) == gpb_meta[part_i]["num_edges"] + part_sizes.append( + (gpb_meta[part_i]["num_nodes"], gpb_meta[part_i]["num_edges"]) + ) + + if store_inner_node and store_inner_edge and store_eids: + nid = F.boolean_mask( + part_g.node_attributes[dgl.NID], + part_g.node_attributes["inner_node"], + ) + local_nid = gpb.nid2localnid(nid, part_i) + assert F.dtype(local_nid) in (F.int64, F.int32) + assert np.all(F.asnumpy(local_nid) == np.arange(0, len(local_nid))) + eid = F.boolean_mask( + part_g.edge_attributes[dgl.EID], + part_g.edge_attributes["inner_edge"], + ) + local_eid = gpb.eid2localeid(eid, part_i) + assert F.dtype(local_eid) in (F.int64, F.int32) + assert np.all( + np.sort(F.asnumpy(local_eid)) == np.arange(0, len(local_eid)) + ) + return local_eid, local_nid + else: + return None, None + + +def _verify_mapping( + g, + part_g, + part_i, + gpb, + orig_nids, + orig_eids, + node_feats, + edge_feats, + local_nid=None, + local_eid=None, + store_inner_node=False, + store_inner_edge=False, + store_eids=False, +): + """ + check list: + make sure nodes and edges's data type are correct. + make sure nodes and edges's ID in correct order. + make sure the number of nodes and edges's ID are correct. + """ + if store_inner_node and store_inner_edge and store_eids: + # Check the node map. + local_nodes = F.boolean_mask( + part_g.node_attributes[dgl.NID], + part_g.node_attributes["inner_node"], + ) + inner_node_index = F.nonzero_1d(part_g.node_attributes["inner_node"]) + mapping_nodes = gpb.partid2nids(part_i) + assert F.dtype(mapping_nodes) in (F.int32, F.int64) + assert np.all( + np.sort(F.asnumpy(local_nodes)) == np.sort(F.asnumpy(mapping_nodes)) + ) + assert np.all( + F.asnumpy(inner_node_index) == np.arange(len(inner_node_index)) + ) + + # Check the edge map. + + local_edges = F.boolean_mask( + part_g.edge_attributes[dgl.EID], + part_g.edge_attributes["inner_edge"], + ) + inner_edge_index = F.nonzero_1d(part_g.edge_attributes["inner_edge"]) + mapping_edges = gpb.partid2eids(part_i) + assert F.dtype(mapping_edges) in (F.int32, F.int64) + assert np.all( + np.sort(F.asnumpy(local_edges)) == np.sort(F.asnumpy(mapping_edges)) + ) + assert np.all( + F.asnumpy(inner_edge_index) == np.arange(len(inner_edge_index)) + ) + + local_nodes = orig_nids[local_nodes] + local_edges = orig_eids[local_edges] + + for name in ["labels", "feats"]: + assert "_N/" + name in node_feats + assert node_feats["_N/" + name].shape[0] == len(local_nodes) + true_feats = F.gather_row(g.ndata[name], local_nodes) + ndata = F.gather_row(node_feats["_N/" + name], local_nid) + assert np.all(F.asnumpy(true_feats) == F.asnumpy(ndata)) + for name in ["feats"]: + efeat_name = _etype_tuple_to_str(DEFAULT_ETYPE) + "/" + name + assert efeat_name in edge_feats + assert edge_feats[efeat_name].shape[0] == len(local_edges) + true_feats = F.gather_row(g.edata[name], local_edges) + edata = F.gather_row(edge_feats[efeat_name], local_eid) + assert np.all(F.asnumpy(true_feats) == F.asnumpy(edata)) + + if store_eids: + # Verify the mapping between the reshuffled IDs and the original IDs. + indices, indptr = part_g.indices, part_g.csc_indptr + adj_matrix = dglsp.from_csc(indptr, indices) + part_src_ids, part_dst_ids = adj_matrix.coo() + part_src_ids = F.gather_row( + part_g.node_attributes[dgl.NID], part_src_ids + ) + part_dst_ids = F.gather_row( + part_g.node_attributes[dgl.NID], part_dst_ids + ) + part_eids = part_g.edge_attributes[dgl.EID] + orig_src_ids = F.gather_row(orig_nids, part_src_ids) + orig_dst_ids = F.gather_row(orig_nids, part_dst_ids) + orig_eids1 = F.gather_row(orig_eids, part_eids) + orig_eids2 = g.edge_ids(orig_src_ids, orig_dst_ids) + assert F.shape(orig_eids1)[0] == F.shape(orig_eids2)[0] + assert np.all(F.asnumpy(orig_eids1) == F.asnumpy(orig_eids2)) + + local_orig_nids = orig_nids[part_g.node_attributes[dgl.NID]] + local_orig_eids = orig_eids[part_g.edge_attributes[dgl.EID]] + part_g.node_attributes["feats"] = F.gather_row( + g.ndata["feats"], local_orig_nids + ) + part_g.edge_attributes["feats"] = F.gather_row( + g.edata["feats"], local_orig_eids + ) + else: + assert dgl.EID not in part_g.edge_attributes + + return node_feats["_N/labels"], edge_feats["_N:_E:_N/feats"] + + @pytest.mark.parametrize("part_method", ["metis", "random"]) @pytest.mark.parametrize("num_parts", [1, 4]) +@pytest.mark.parametrize("store_eids", [True, False]) +@pytest.mark.parametrize("store_inner_node", [True, False]) +@pytest.mark.parametrize("store_inner_edge", [True, False]) @pytest.mark.parametrize("debug_mode", [True, False]) def test_partition_graph_graphbolt_homo( part_method, num_parts, + store_eids, + store_inner_node, + store_inner_edge, debug_mode, - num_trainers_per_machine=1, ): + """ + check list: + _verify_metadata: + number of edges, nodes, partitions for all + number of edges, nodes in each partitions + order and data type of local nid and eid + + _verify_mapping: + data type, ID's order and ID's number of edges and nodes + + verify_graph_feats: + graph's feats + + _verify_reconstrunt_IDs: + check if feats and IDs can be reconstructed + + """ reset_envs() if debug_mode: os.environ["DGL_DIST_DEBUG"] = "1" @@ -1211,8 +1398,6 @@ def test_partition_graph_graphbolt_homo( g.edata["feats"] = F.tensor( np.random.randn(g.num_edges(), 10), F.float32 ) - g.update_all(fn.copy_u("feats", "msg"), fn.sum("msg", "h")) - g.update_all(fn.copy_e("feats", "msg"), fn.sum("msg", "eh")) orig_nids, orig_eids = partition_graph( g, @@ -1221,195 +1406,271 @@ def test_partition_graph_graphbolt_homo( test_dir, part_method=part_method, use_graphbolt=True, - store_eids=True, - store_inner_node=True, - store_inner_edge=True, + store_eids=store_eids, + store_inner_node=store_inner_node, + store_inner_edge=store_inner_edge, return_mapping=True, ) + if debug_mode: + store_eids = True + store_inner_node = True + store_inner_edge = True part_sizes = [] shuffled_labels = [] shuffled_edata = [] part_config = os.path.join(test_dir, f"{graph_name}.json") - for i in range(num_parts): + for part_i in range(num_parts): part_g, node_feats, edge_feats, gpb, _, _, _ = load_partition( - part_config, i, load_feats=True, use_graphbolt=True + part_config, part_i, load_feats=True, use_graphbolt=True ) - if num_trainers_per_machine > 1: - for ntype in g.ntypes: - name = ntype + "/trainer_id" - assert name in node_feats - part_ids = F.floor_div( - node_feats[name], num_trainers_per_machine - ) - assert np.all(F.asnumpy(part_ids) == i) - for etype in g.canonical_etypes: - name = _etype_tuple_to_str(etype) + "/trainer_id" - assert name in edge_feats - part_ids = F.floor_div( - edge_feats[name], num_trainers_per_machine - ) - assert np.all(F.asnumpy(part_ids) == i) - - # Check the metadata - assert gpb._num_nodes() == g.num_nodes() - assert gpb._num_edges() == g.num_edges() - - assert gpb.num_partitions() == num_parts - gpb_meta = gpb.metadata() - assert len(gpb_meta) == num_parts - assert len(gpb.partid2nids(i)) == gpb_meta[i]["num_nodes"] - assert len(gpb.partid2eids(i)) == gpb_meta[i]["num_edges"] - part_sizes.append( - (gpb_meta[i]["num_nodes"], gpb_meta[i]["num_edges"]) + local_eid, local_nid = _verify_metadata( + g, + gpb, + part_g, + num_parts, + part_sizes, + part_i, + store_inner_node, + store_inner_edge, + store_eids, ) - nid = F.boolean_mask( - part_g.node_attributes[dgl.NID], - part_g.node_attributes["inner_node"], - ) - local_nid = gpb.nid2localnid(nid, i) - assert F.dtype(local_nid) in (F.int64, F.int32) - assert np.all(F.asnumpy(local_nid) == np.arange(0, len(local_nid))) - eid = F.boolean_mask( - part_g.edge_attributes[dgl.EID], - part_g.edge_attributes["inner_edge"], + node_feat, edge_feat = _verify_mapping( + g, + part_g, + part_i, + gpb, + orig_nids, + orig_eids, + node_feats, + edge_feats, + local_nid=local_nid, + local_eid=local_eid, + store_inner_node=store_inner_node, + store_inner_edge=store_inner_edge, + store_eids=store_eids, ) - local_eid = gpb.eid2localeid(eid, i) - assert F.dtype(local_eid) in (F.int64, F.int32) - assert np.all( - np.sort(F.asnumpy(local_eid)) == np.arange(0, len(local_eid)) + shuffled_labels.append(node_feat) + shuffled_edata.append(edge_feat) + + verify_graph_feats( + g, + gpb, + part_g, + node_feats, + edge_feats, + orig_nids, + orig_eids, + store_eids=store_eids, + store_inner_edge=store_inner_edge, + store_inner_node=store_inner_node, + use_graphbolt=True, + is_homo=True, ) - # Check the node map. - local_nodes = F.boolean_mask( - part_g.node_attributes[dgl.NID], - part_g.node_attributes["inner_node"], - ) - llocal_nodes = F.nonzero_1d(part_g.node_attributes["inner_node"]) - local_nodes1 = gpb.partid2nids(i) - assert F.dtype(local_nodes1) in (F.int32, F.int64) - assert np.all( - np.sort(F.asnumpy(local_nodes)) - == np.sort(F.asnumpy(local_nodes1)) - ) - assert np.all( - F.asnumpy(llocal_nodes) == np.arange(len(llocal_nodes)) - ) + _verify_reconstrunt_data( + g, + gpb, + orig_nids, + orig_eids, + part_sizes, + shuffled_labels, + shuffled_edata, + ) - # Check the edge map. - local_edges = F.boolean_mask( - part_g.edge_attributes[dgl.EID], - part_g.edge_attributes["inner_edge"], - ) - llocal_edges = F.nonzero_1d(part_g.edge_attributes["inner_edge"]) - local_edges1 = gpb.partid2eids(i) - assert F.dtype(local_edges1) in (F.int32, F.int64) - assert np.all( - np.sort(F.asnumpy(local_edges)) - == np.sort(F.asnumpy(local_edges1)) - ) - assert np.all( - F.asnumpy(llocal_edges) == np.arange(len(llocal_edges)) - ) - # Verify the mapping between the reshuffled IDs and the original IDs. - indices, indptr = part_g.indices, part_g.csc_indptr - adj_matrix = dglsp.from_csc(indptr, indices) - part_src_ids, part_dst_ids = adj_matrix.coo() - part_src_ids = F.gather_row( - part_g.node_attributes[dgl.NID], part_src_ids +def _vertify_original_IDs(g, orig_nids, orig_eids): + """ + check list: + make sure nodes and edges' data types are correct + make sure nodes and edges' number in each type is correct + """ + assert len(orig_nids) == len(g.ntypes) + assert len(orig_eids) == len(g.canonical_etypes) + for ntype in g.ntypes: + assert len(orig_nids[ntype]) == g.num_nodes(ntype) + for etype in g.canonical_etypes: + assert len(orig_eids[etype]) == g.num_edges(etype) + + +def _verify_reconstrunt_data( + g, gpb, orig_nids, orig_eids, part_sizes, shuffled_labels, shuffled_edata +): + """ + check list: + make sure labels and feats are correct. + make sure nodes and edges' id are correct. + make sure node and edges' part + """ + # Verify that we can reconstruct node/edge data for original IDs. + shuffled_labels = F.asnumpy(F.cat(shuffled_labels, 0)) + shuffled_edata = F.asnumpy(F.cat(shuffled_edata, 0)) + orig_labels = np.zeros(shuffled_labels.shape, dtype=shuffled_labels.dtype) + orig_edata = np.zeros(shuffled_edata.shape, dtype=shuffled_edata.dtype) + orig_labels[F.asnumpy(orig_nids)] = shuffled_labels + orig_edata[F.asnumpy(orig_eids)] = shuffled_edata + assert np.all(orig_labels == F.asnumpy(g.ndata["labels"])) + assert np.all(orig_edata == F.asnumpy(g.edata["feats"])) + + node_map = [] + edge_map = [] + for part_i, (num_nodes, num_edges) in enumerate(part_sizes): + node_map.append(np.ones(num_nodes) * part_i) + edge_map.append(np.ones(num_edges) * part_i) + node_map = np.concatenate(node_map) + edge_map = np.concatenate(edge_map) + nid2pid = gpb.nid2partid(F.arange(0, len(node_map))) + assert F.dtype(nid2pid) in (F.int32, F.int64) + assert np.all(F.asnumpy(nid2pid) == node_map) + eid2pid = gpb.eid2partid(F.arange(0, len(edge_map))) + assert F.dtype(eid2pid) in (F.int32, F.int64) + assert np.all(F.asnumpy(eid2pid) == edge_map) + + +def _verify_graphbolt_mapping_IDs( + g, + part_g, + gpb, + orig_nids, + orig_eids, + node_feats, + edge_feats, + test_ntype, + test_etype, + store_eids, + store_inner_node, + store_inner_edge, +): + """ + check list: + make sure nodes and edges' ids have correct type. + make sure nodes and edges have corrert map ids. + """ + # Verify the mapping between the reshuffled IDs and the original IDs. + # These are partition-local IDs. + indices, indptr = part_g.indices, part_g.csc_indptr + csc_matrix = dglsp.from_csc(indptr, indices) + part_src_ids, part_dst_ids = csc_matrix.coo() + # These are reshuffled global homogeneous IDs. + part_src_ids = F.gather_row(part_g.node_attributes[dgl.NID], part_src_ids) + part_dst_ids = F.gather_row(part_g.node_attributes[dgl.NID], part_dst_ids) + # These are reshuffled per-type IDs. + src_ntype_ids, part_src_ids = gpb.map_to_per_ntype(part_src_ids) + dst_ntype_ids, part_dst_ids = gpb.map_to_per_ntype(part_dst_ids) + # `IdMap` is in int64 by default. + assert src_ntype_ids.dtype == F.int64 + assert dst_ntype_ids.dtype == F.int64 + + with pytest.raises(dgl.utils.internal.InconsistentDtypeException): + gpb.map_to_per_ntype(F.tensor([0], F.int32)) + with pytest.raises(dgl.utils.internal.InconsistentDtypeException): + gpb.map_to_per_etype(F.tensor([0], F.int32)) + + if store_eids: + part_eids = part_g.edge_attributes[dgl.EID] + etype_ids, part_eids = gpb.map_to_per_etype(part_eids) + # `IdMap` is in int64 by default. + assert etype_ids.dtype == F.int64 + + # These are original per-type IDs. + for etype_id, etype in enumerate(g.canonical_etypes): + part_src_ids1 = F.boolean_mask(part_src_ids, etype_ids == etype_id) + src_ntype_ids1 = F.boolean_mask( + src_ntype_ids, etype_ids == etype_id ) - part_dst_ids = F.gather_row( - part_g.node_attributes[dgl.NID], part_dst_ids + part_dst_ids1 = F.boolean_mask(part_dst_ids, etype_ids == etype_id) + dst_ntype_ids1 = F.boolean_mask( + dst_ntype_ids, etype_ids == etype_id ) - part_eids = part_g.edge_attributes[dgl.EID] - orig_src_ids = F.gather_row(orig_nids, part_src_ids) - orig_dst_ids = F.gather_row(orig_nids, part_dst_ids) - orig_eids1 = F.gather_row(orig_eids, part_eids) - orig_eids2 = g.edge_ids(orig_src_ids, orig_dst_ids) - assert F.shape(orig_eids1)[0] == F.shape(orig_eids2)[0] + part_eids1 = F.boolean_mask(part_eids, etype_ids == etype_id) + assert np.all(F.asnumpy(src_ntype_ids1 == src_ntype_ids1[0])) + assert np.all(F.asnumpy(dst_ntype_ids1 == dst_ntype_ids1[0])) + src_ntype = g.ntypes[F.as_scalar(src_ntype_ids1[0])] + dst_ntype = g.ntypes[F.as_scalar(dst_ntype_ids1[0])] + orig_src_ids1 = F.gather_row(orig_nids[src_ntype], part_src_ids1) + orig_dst_ids1 = F.gather_row(orig_nids[dst_ntype], part_dst_ids1) + orig_eids1 = F.gather_row(orig_eids[etype], part_eids1) + orig_eids2 = g.edge_ids(orig_src_ids1, orig_dst_ids1, etype=etype) + assert len(orig_eids1) == len(orig_eids2) assert np.all(F.asnumpy(orig_eids1) == F.asnumpy(orig_eids2)) + else: + assert dgl.EID not in part_g.edge_attributes + verify_graph_feats( + g, + gpb, + part_g, + node_feats, + edge_feats, + orig_nids, + orig_eids, + store_eids, + store_inner_edge=store_inner_edge, + store_inner_node=store_inner_node, + use_graphbolt=True, + ) - local_orig_nids = orig_nids[part_g.node_attributes[dgl.NID]] - local_orig_eids = orig_eids[part_g.edge_attributes[dgl.EID]] - part_g.node_attributes["feats"] = F.gather_row( - g.ndata["feats"], local_orig_nids - ) - part_g.edge_attributes["feats"] = F.gather_row( - g.edata["feats"], local_orig_eids - ) - local_nodes = orig_nids[local_nodes] - local_edges = orig_eids[local_edges] - - # part_g.update_all(fn.copy_u("feats", "msg"), fn.sum("msg", "h")) - # part_g.update_all(fn.copy_e("feats", "msg"), fn.sum("msg", "eh")) - # part_g.node_attributes["h"] = adj_matrix@part_g.node_attributes["h"] - - # assert F.allclose( - # F.gather_row(g.ndata["h"], local_nodes), - # F.gather_row(part_g.node_attributes["h"], llocal_nodes), - # ) - # assert F.allclose( - # F.gather_row(g.ndata["eh"], local_nodes), - # F.gather_row(part_g.node_attributes["eh"], llocal_nodes), - # ) - - for name in ["labels", "feats"]: - assert "_N/" + name in node_feats - assert node_feats["_N/" + name].shape[0] == len(local_nodes) - true_feats = F.gather_row(g.ndata[name], local_nodes) - ndata = F.gather_row(node_feats["_N/" + name], local_nid) - assert np.all(F.asnumpy(true_feats) == F.asnumpy(ndata)) - for name in ["feats"]: - efeat_name = _etype_tuple_to_str(DEFAULT_ETYPE) + "/" + name - assert efeat_name in edge_feats - assert edge_feats[efeat_name].shape[0] == len(local_edges) - true_feats = F.gather_row(g.edata[name], local_edges) - edata = F.gather_row(edge_feats[efeat_name], local_eid) - assert np.all(F.asnumpy(true_feats) == F.asnumpy(edata)) - - # This only works if node/edge IDs are shuffled. - shuffled_labels.append(node_feats["_N/labels"]) - shuffled_edata.append(edge_feats["_N:_E:_N/feats"]) - - # Verify that we can reconstruct node/edge data for original IDs. - shuffled_labels = F.asnumpy(F.cat(shuffled_labels, 0)) - shuffled_edata = F.asnumpy(F.cat(shuffled_edata, 0)) - orig_labels = np.zeros( - shuffled_labels.shape, dtype=shuffled_labels.dtype - ) - orig_edata = np.zeros(shuffled_edata.shape, dtype=shuffled_edata.dtype) - orig_labels[F.asnumpy(orig_nids)] = shuffled_labels - orig_edata[F.asnumpy(orig_eids)] = shuffled_edata - assert np.all(orig_labels == F.asnumpy(g.ndata["labels"])) - assert np.all(orig_edata == F.asnumpy(g.edata["feats"])) - - node_map = [] - edge_map = [] - for i, (num_nodes, num_edges) in enumerate(part_sizes): - node_map.append(np.ones(num_nodes) * i) - edge_map.append(np.ones(num_edges) * i) - node_map = np.concatenate(node_map) - edge_map = np.concatenate(edge_map) - nid2pid = gpb.nid2partid(F.arange(0, len(node_map))) - assert F.dtype(nid2pid) in (F.int32, F.int64) - assert np.all(F.asnumpy(nid2pid) == node_map) - eid2pid = gpb.eid2partid(F.arange(0, len(edge_map))) - assert F.dtype(eid2pid) in (F.int32, F.int64) - assert np.all(F.asnumpy(eid2pid) == edge_map) + shuffled_label = node_feats[test_ntype + "/labels"] + shuffled_elabel = edge_feats[_etype_tuple_to_str(test_etype) + "/labels"] + return part_g, shuffled_label, shuffled_elabel + + +def _verify_labels( + g, + shuffled_labels, + shuffled_elabels, + orig_nids, + orig_eids, + test_ntype, + test_etype, +): + """ + check list: + make sure node labels are correct. + make sure edge labels are correct. + """ + shuffled_labels = F.asnumpy(F.cat(shuffled_labels, 0)) + shuffled_elabels = F.asnumpy(F.cat(shuffled_elabels, 0)) + orig_labels = np.zeros(shuffled_labels.shape, dtype=shuffled_labels.dtype) + orig_elabels = np.zeros( + shuffled_elabels.shape, dtype=shuffled_elabels.dtype + ) + orig_labels[F.asnumpy(orig_nids[test_ntype])] = shuffled_labels + orig_elabels[F.asnumpy(orig_eids[test_etype])] = shuffled_elabels + assert np.all(orig_labels == F.asnumpy(g.nodes[test_ntype].data["labels"])) + assert np.all(orig_elabels == F.asnumpy(g.edges[test_etype].data["labels"])) @pytest.mark.parametrize("part_method", ["metis", "random"]) @pytest.mark.parametrize("num_parts", [1, 4]) +@pytest.mark.parametrize("store_eids", [True, False]) +@pytest.mark.parametrize("store_inner_node", [True, False]) +@pytest.mark.parametrize("store_inner_edge", [True, False]) @pytest.mark.parametrize("debug_mode", [True, False]) def test_partition_graph_graphbolt_hetero( part_method, num_parts, + store_eids, + store_inner_node, + store_inner_edge, debug_mode, n_jobs=1, - num_trainers_per_machine=1, ): + """ + check list: + _vertify_original_IDs: + number of edges and nodes' type and number of them in each type + + _verify_graphbolt_mapping_IDs: + mapping node and edge IDs + feats in graph + + _verify_hetero_graph: + number, order of elements in hetero graph + + _verify_labels: + labels of nodes and edges + """ test_ntype = "n1" test_etype = ("n1", "r1", "n2") reset_envs() @@ -1417,6 +1678,7 @@ def test_partition_graph_graphbolt_hetero( os.environ["DGL_DIST_DEBUG"] = "1" with tempfile.TemporaryDirectory() as test_dir: hg = create_random_hetero() + # TODO create graph data graph_name = "test" hg.nodes[test_ntype].data["labels"] = F.arange( 0, hg.num_nodes(test_ntype) @@ -1430,7 +1692,6 @@ def test_partition_graph_graphbolt_hetero( hg.edges[test_etype].data["labels"] = F.arange( 0, hg.num_edges(test_etype) ) - num_hops = 1 orig_nids, orig_eids = partition_graph( hg, graph_name, @@ -1440,132 +1701,65 @@ def test_partition_graph_graphbolt_hetero( return_mapping=True, num_trainers_per_machine=1, use_graphbolt=True, - store_eids=True, - store_inner_node=True, - store_inner_edge=True, + store_eids=store_eids, + store_inner_node=store_inner_node, + store_inner_edge=store_inner_edge, n_jobs=n_jobs, ) - assert len(orig_nids) == len(hg.ntypes) - assert len(orig_eids) == len(hg.canonical_etypes) - for ntype in hg.ntypes: - assert len(orig_nids[ntype]) == hg.num_nodes(ntype) - for etype in hg.canonical_etypes: - assert len(orig_eids[etype]) == hg.num_edges(etype) + _vertify_original_IDs(hg, orig_nids, orig_eids) + + if debug_mode: + store_eids = True + store_inner_node = True + store_inner_edge = True + parts = [] shuffled_labels = [] shuffled_elabels = [] part_config = os.path.join(test_dir, f"{graph_name}.json") + # test each part for part_id in range(num_parts): part_g, node_feats, edge_feats, gpb, _, _, _ = load_partition( part_config, part_id, load_feats=True, use_graphbolt=True ) - if num_trainers_per_machine > 1: - for ntype in hg.ntypes: - name = ntype + "/trainer_id" - assert name in node_feats - part_ids = F.floor_div( - node_feats[name], num_trainers_per_machine - ) - assert np.all(F.asnumpy(part_ids) == part_id) - - for etype in hg.canonical_etypes: - name = _etype_tuple_to_str(etype) + "/trainer_id" - assert name in edge_feats - part_ids = F.floor_div( - edge_feats[name], num_trainers_per_machine - ) - assert np.all(F.asnumpy(part_ids) == part_id) - - # Verify the mapping between the reshuffled IDs and the original IDs. - # These are partition-local IDs. - indices, indptr = part_g.indices, part_g.csc_indptr - csc_matrix = dglsp.from_csc(indptr, indices) - part_src_ids, part_dst_ids = csc_matrix.coo() - # These are reshuffled global homogeneous IDs. - part_src_ids = F.gather_row( - part_g.node_attributes[dgl.NID], part_src_ids - ) - part_dst_ids = F.gather_row( - part_g.node_attributes[dgl.NID], part_dst_ids + # TODO verify mapping IDs + ( + part_g, + shuffled_label, + shuffled_elabel, + ) = _verify_graphbolt_mapping_IDs( + hg, + part_g, + gpb, + orig_nids, + orig_eids, + node_feats, + edge_feats, + test_ntype, + test_etype, + store_eids=store_eids, + store_inner_node=store_inner_node, + store_inner_edge=store_inner_edge, ) - part_eids = part_g.edge_attributes[dgl.EID] - # These are reshuffled per-type IDs. - src_ntype_ids, part_src_ids = gpb.map_to_per_ntype(part_src_ids) - dst_ntype_ids, part_dst_ids = gpb.map_to_per_ntype(part_dst_ids) - etype_ids, part_eids = gpb.map_to_per_etype(part_eids) - # `IdMap` is in int64 by default. - assert src_ntype_ids.dtype == F.int64 - assert dst_ntype_ids.dtype == F.int64 - assert etype_ids.dtype == F.int64 - with pytest.raises(dgl.utils.internal.InconsistentDtypeException): - gpb.map_to_per_ntype(F.tensor([0], F.int32)) - with pytest.raises(dgl.utils.internal.InconsistentDtypeException): - gpb.map_to_per_etype(F.tensor([0], F.int32)) - # These are original per-type IDs. - for etype_id, etype in enumerate(hg.canonical_etypes): - part_src_ids1 = F.boolean_mask( - part_src_ids, etype_ids == etype_id - ) - src_ntype_ids1 = F.boolean_mask( - src_ntype_ids, etype_ids == etype_id - ) - part_dst_ids1 = F.boolean_mask( - part_dst_ids, etype_ids == etype_id - ) - dst_ntype_ids1 = F.boolean_mask( - dst_ntype_ids, etype_ids == etype_id - ) - part_eids1 = F.boolean_mask(part_eids, etype_ids == etype_id) - assert np.all(F.asnumpy(src_ntype_ids1 == src_ntype_ids1[0])) - assert np.all(F.asnumpy(dst_ntype_ids1 == dst_ntype_ids1[0])) - src_ntype = hg.ntypes[F.as_scalar(src_ntype_ids1[0])] - dst_ntype = hg.ntypes[F.as_scalar(dst_ntype_ids1[0])] - orig_src_ids1 = F.gather_row( - orig_nids[src_ntype], part_src_ids1 - ) - orig_dst_ids1 = F.gather_row( - orig_nids[dst_ntype], part_dst_ids1 - ) - orig_eids1 = F.gather_row(orig_eids[etype], part_eids1) - orig_eids2 = hg.edge_ids( - orig_src_ids1, orig_dst_ids1, etype=etype - ) - assert len(orig_eids1) == len(orig_eids2) - assert np.all(F.asnumpy(orig_eids1) == F.asnumpy(orig_eids2)) parts.append(part_g) - if NTYPE in part_g.node_attributes: - verify_graph_feats( - hg, - gpb, - part_g, - node_feats, - edge_feats, - orig_nids, - orig_eids, - use_graphbolt=True, - ) - - shuffled_labels.append(node_feats[test_ntype + "/labels"]) - shuffled_elabels.append( - edge_feats[_etype_tuple_to_str(test_etype) + "/labels"] - ) - verify_hetero_graph(hg, parts, True) - - shuffled_labels = F.asnumpy(F.cat(shuffled_labels, 0)) - shuffled_elabels = F.asnumpy(F.cat(shuffled_elabels, 0)) - orig_labels = np.zeros( - shuffled_labels.shape, dtype=shuffled_labels.dtype - ) - orig_elabels = np.zeros( - shuffled_elabels.shape, dtype=shuffled_elabels.dtype - ) - orig_labels[F.asnumpy(orig_nids[test_ntype])] = shuffled_labels - orig_elabels[F.asnumpy(orig_eids[test_etype])] = shuffled_elabels - assert np.all( - orig_labels == F.asnumpy(hg.nodes[test_ntype].data["labels"]) + shuffled_labels.append(shuffled_label) + shuffled_elabels.append(shuffled_elabel) + _verify_hetero_graph( + hg, + parts, + True, + store_eids=store_eids, + store_inner_node=store_inner_node, + store_inner_edge=store_inner_edge, ) - assert np.all( - orig_elabels == F.asnumpy(hg.edges[test_etype].data["labels"]) + _verify_labels( + hg, + shuffled_labels, + shuffled_elabels, + orig_nids, + orig_eids, + test_ntype, + test_etype, ) @@ -1793,6 +1987,9 @@ def test_partition_graph_graphbolt_hetero_multi( part_method="random", num_parts=num_parts, n_jobs=4, + store_eids=True, + store_inner_node=True, + store_inner_edge=True, debug_mode=False, ) From 46afb4bc59545d89cdd1bc79bdccfb9c07f55c9b Mon Sep 17 00:00:00 2001 From: Ubuntu <2649624957@qq.com> Date: Fri, 23 Aug 2024 09:37:45 +0000 Subject: [PATCH 09/39] change partition --- python/dgl/distributed/partition.py | 118 ++++++++++++++-------------- 1 file changed, 61 insertions(+), 57 deletions(-) diff --git a/python/dgl/distributed/partition.py b/python/dgl/distributed/partition.py index 0f5e317b1627..e15b723775f8 100644 --- a/python/dgl/distributed/partition.py +++ b/python/dgl/distributed/partition.py @@ -109,7 +109,7 @@ def _process_partitions(g_list, formats=None, sort_etypes=False): return g_list -def _save_graphs(filename, g_list, formats=None, sort_etypes=False): +def _save_dgl_graphs(filename, g_list, formats=None, sort_etypes=False): g_list = _process_partitions( g_list, formats=formats, sort_etypes=sort_etypes ) @@ -117,48 +117,29 @@ def _save_graphs(filename, g_list, formats=None, sort_etypes=False): def _get_inner_node_mask(graph, ntype_id, use_graphbolt=False): - if use_graphbolt: - if NTYPE in graph.node_attributes: - dtype = F.dtype(graph.node_attributes["inner_node"]) - return ( - graph.node_attributes["inner_node"] - * F.astype(graph.node_attributes[NTYPE] == ntype_id, dtype) - == 1 - ) - else: - return graph.node_attributes["inner_node"] == 1 + ndata = graph.node_attributes if use_graphbolt else graph.ndata + assert "inner_node" in ndata, '"inner_node" is not nodes\' data' + if NTYPE in ndata: + dtype = F.dtype(ndata["inner_node"]) + return ( + ndata["inner_node"] * F.astype(ndata[NTYPE] == ntype_id, dtype) == 1 + ) else: - if NTYPE in graph.ndata: - dtype = F.dtype(graph.ndata["inner_node"]) - return ( - graph.ndata["inner_node"] - * F.astype(graph.ndata[NTYPE] == ntype_id, dtype) - == 1 - ) - else: - return graph.ndata["inner_node"] == 1 + return ndata["inner_node"] == 1 def _get_inner_edge_mask(graph, etype_id, use_graphbolt=False): - if use_graphbolt: - if graph.type_per_edge is not None: - dtype = F.dtype(graph.edge_attributes["inner_edge"]) - return ( - graph.edge_attributes["inner_edge"] - * F.astype(graph.type_per_edge == etype_id, dtype) - == 1 - ) - else: - return graph.edge_attributes["inner_edge"] == 1 - if ETYPE in graph.edata: - dtype = F.dtype(graph.edata["inner_edge"]) - return ( - graph.edata["inner_edge"] - * F.astype(graph.edata[ETYPE] == etype_id, dtype) - == 1 - ) + edata = graph.edge_attributes if use_graphbolt else graph.edata + etype = ( + graph.type_per_edge + if use_graphbolt + else (graph.edata[ETYPE] if ETYPE in graph.edata else None) + ) + if etype is not None: + dtype = F.dtype(edata["inner_edge"]) + return edata["inner_edge"] * F.astype(etype == etype_id, dtype) == 1 else: - return graph.edata["inner_edge"] == 1 + return edata["inner_edge"] == 1 def _get_part_ranges(id_ranges): @@ -1311,6 +1292,7 @@ def get_homogeneous(g, balance_ntypes): "ntypes": ntypes, "etypes": etypes, } + part_config = os.path.join(out_path, graph_name + ".json") for part_id in range(num_parts): part = parts[part_id] @@ -1443,31 +1425,52 @@ def get_homogeneous(g, balance_ntypes): "edge_feats": os.path.relpath(edge_feat_file, out_path), } sort_etypes = len(g.etypes) > 1 - if not use_graphbolt: + # save graph + if use_graphbolt: + + def _partition_to_graphbolt( + part_config, + part_meta, + parts, + *, + store_eids=True, + store_inner_node=False, + store_inner_edge=False, + graph_formats=None, + n_jobs=1, + ): + rel_path_result = gb_convert_single_dgl_partition( + part_id, + parts, + part_metadata, + part_config=part_config, + store_eids=store_eids, + store_inner_edge=store_inner_edge, + store_inner_node=store_inner_node, + graph_formats=graph_formats, + ) + part_meta[f"part-{part_id}"][ + "part_graph_graphbolt" + ] = rel_path_result + + part = _process_partitions([part], graph_formats, sort_etypes)[0] + # save FusedCSCSamplingGraph + kwargs["graph_formats"] = graph_formats + _partition_to_graphbolt(part_config, part_metadata, parts, **kwargs) + else: part_graph_file = os.path.join(part_dir, "graph.dgl") part_metadata["part-{}".format(part_id)][ "part_graph" ] = os.path.relpath(part_graph_file, out_path) - _save_graphs( + # save DGLGraph + _save_dgl_graphs( part_graph_file, [part], formats=graph_formats, sort_etypes=sort_etypes, ) - else: - part = _process_partitions([part], graph_formats, sort_etypes)[0] - part_config = os.path.join(out_path, graph_name + ".json") - if use_graphbolt: - kwargs["graph_formats"] = graph_formats - _dgl_partition_to_graphbolt( - part_config, - parts=parts, - part_meta=part_metadata, - **kwargs, - ) - else: - _dump_part_config(part_config, part_metadata) + _dump_part_config(part_config, part_metadata) num_cuts = sim_g.num_edges() - tot_num_inner_edges if num_parts == 1: @@ -1771,8 +1774,7 @@ def _convert_partition_to_graphbolt( part_meta["node_map_dtype"] = "int64" part_meta["edge_map_dtype"] = "int64" - _dump_part_config(part_config, part_meta) - print(f"Converted partitions to GraphBolt format into {part_config}") + return part_meta def _dgl_partition_to_graphbolt( @@ -1794,7 +1796,7 @@ def _dgl_partition_to_graphbolt( ) new_part_meta = copy.deepcopy(part_meta) num_parts = part_meta["num_parts"] - _convert_partition_to_graphbolt( + part_meta = _convert_partition_to_graphbolt( new_part_meta, graph_formats, part_config, @@ -1805,6 +1807,7 @@ def _dgl_partition_to_graphbolt( num_parts, parts=parts, ) + return part_meta def dgl_partition_to_graphbolt( @@ -1855,7 +1858,7 @@ def dgl_partition_to_graphbolt( part_meta = _load_part_config(part_config) new_part_meta = copy.deepcopy(part_meta) num_parts = part_meta["num_parts"] - _convert_partition_to_graphbolt( + part_meta = _convert_partition_to_graphbolt( new_part_meta, graph_formats, part_config, @@ -1865,3 +1868,4 @@ def dgl_partition_to_graphbolt( n_jobs, num_parts, ) + _dump_part_config(part_config, part_meta) From 6120a2684b691b91a13aeb08df3ad88f9b80547e Mon Sep 17 00:00:00 2001 From: Ubuntu <2649624957@qq.com> Date: Fri, 23 Aug 2024 09:47:54 +0000 Subject: [PATCH 10/39] change code format --- python/dgl/distributed/partition.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/dgl/distributed/partition.py b/python/dgl/distributed/partition.py index e15b723775f8..7b85cdc79351 100644 --- a/python/dgl/distributed/partition.py +++ b/python/dgl/distributed/partition.py @@ -1437,7 +1437,6 @@ def _partition_to_graphbolt( store_inner_node=False, store_inner_edge=False, graph_formats=None, - n_jobs=1, ): rel_path_result = gb_convert_single_dgl_partition( part_id, @@ -1456,6 +1455,7 @@ def _partition_to_graphbolt( part = _process_partitions([part], graph_formats, sort_etypes)[0] # save FusedCSCSamplingGraph kwargs["graph_formats"] = graph_formats + kwargs.pop("n_jobs", None) _partition_to_graphbolt(part_config, part_metadata, parts, **kwargs) else: part_graph_file = os.path.join(part_dir, "graph.dgl") From c1dcbc061a733cbf100f89f983bf9c5b3e86cd10 Mon Sep 17 00:00:00 2001 From: Ubuntu <2649624957@qq.com> Date: Fri, 23 Aug 2024 09:59:34 +0000 Subject: [PATCH 11/39] change variable --- python/dgl/distributed/partition.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/python/dgl/distributed/partition.py b/python/dgl/distributed/partition.py index 7b85cdc79351..318f14bfe8f2 100644 --- a/python/dgl/distributed/partition.py +++ b/python/dgl/distributed/partition.py @@ -1432,6 +1432,7 @@ def _partition_to_graphbolt( part_config, part_meta, parts, + part_i, *, store_eids=True, store_inner_node=False, @@ -1439,7 +1440,7 @@ def _partition_to_graphbolt( graph_formats=None, ): rel_path_result = gb_convert_single_dgl_partition( - part_id, + part_i, parts, part_metadata, part_config=part_config, @@ -1448,7 +1449,7 @@ def _partition_to_graphbolt( store_inner_node=store_inner_node, graph_formats=graph_formats, ) - part_meta[f"part-{part_id}"][ + part_meta[f"part-{part_i}"][ "part_graph_graphbolt" ] = rel_path_result From f046ec355b0362a33fd9a2efdbe4767bbdffcbc0 Mon Sep 17 00:00:00 2001 From: Ubuntu <2649624957@qq.com> Date: Mon, 26 Aug 2024 03:22:46 +0000 Subject: [PATCH 12/39] fix bug --- python/dgl/distributed/partition.py | 7 +- tests/distributed/test_partition.py | 1072 ++++++++++++++++----------- 2 files changed, 639 insertions(+), 440 deletions(-) diff --git a/python/dgl/distributed/partition.py b/python/dgl/distributed/partition.py index 318f14bfe8f2..4341893cadf8 100644 --- a/python/dgl/distributed/partition.py +++ b/python/dgl/distributed/partition.py @@ -1425,14 +1425,13 @@ def get_homogeneous(g, balance_ntypes): "edge_feats": os.path.relpath(edge_feat_file, out_path), } sort_etypes = len(g.etypes) > 1 - # save graph if use_graphbolt: def _partition_to_graphbolt( part_config, - part_meta, parts, part_i, + part_metadata, *, store_eids=True, store_inner_node=False, @@ -1449,7 +1448,7 @@ def _partition_to_graphbolt( store_inner_node=store_inner_node, graph_formats=graph_formats, ) - part_meta[f"part-{part_i}"][ + part_metadata[f"part-{part_i}"][ "part_graph_graphbolt" ] = rel_path_result @@ -1457,7 +1456,7 @@ def _partition_to_graphbolt( # save FusedCSCSamplingGraph kwargs["graph_formats"] = graph_formats kwargs.pop("n_jobs", None) - _partition_to_graphbolt(part_config, part_metadata, parts, **kwargs) + _partition_to_graphbolt(part_i=part_id, part_config=part_config, part_metadata=part_metadata, parts=parts, **kwargs) else: part_graph_file = os.path.join(part_dir, "graph.dgl") part_metadata["part-{}".format(part_id)][ diff --git a/tests/distributed/test_partition.py b/tests/distributed/test_partition.py index 0f2425cb054d..93eb22a82c20 100644 --- a/tests/distributed/test_partition.py +++ b/tests/distributed/test_partition.py @@ -38,18 +38,18 @@ def _verify_partition_data_types(part_g, use_graphbolt=False): - if not use_graphbolt: - for k, dtype in RESERVED_FIELD_DTYPE.items(): - if k in part_g.ndata: - assert part_g.ndata[k].dtype == dtype - if k in part_g.edata: - assert part_g.edata[k].dtype == dtype - else: - for k, dtype in RESERVED_FIELD_DTYPE.items(): - if k in part_g.node_attributes: - assert part_g.node_attributes[k].dtype == dtype - if k in part_g.edge_attributes: - assert part_g.edge_attributes[k].dtype == dtype + """ + check list: + make sure nodes and edges have correct type. + """ + ndata = part_g.node_attributes if use_graphbolt else part_g.ndata + edata = part_g.edge_attributes if use_graphbolt else part_g.edata + + for k, dtype in RESERVED_FIELD_DTYPE.items(): + if k in ndata: + assert ndata[k].dtype == dtype + if k in edata: + assert edata[k].dtype == dtype def _verify_partition_formats(part_g, formats): @@ -90,11 +90,34 @@ def create_random_hetero(): return dgl.heterograph(edges, num_nodes) -def verify_hetero_graph(g, parts, use_graphbolt=False): - if use_graphbolt: - num_nodes = {ntype: 0 for ntype in g.ntypes} - num_edges = {etype: 0 for etype in g.canonical_etypes} - for part in parts: +def _verify_hetero_graph_elements_number( + g, + parts, + store_inner_node, + store_inner_edge, + use_graphbolt, +): + """ + check list: + make sure edge type are correct. + make sure the number of nodes in each node type are correct. + make sure the argument store_inner_edge and store_inner_node work. + """ + num_nodes = {ntype: 0 for ntype in g.ntypes} + num_edges = {etype: 0 for etype in g.canonical_etypes} + for part in parts: + edata = part.edge_attributes if use_graphbolt else part.edata + if dgl.ETYPE in edata: + assert len(g.canonical_etypes) == len(F.unique(edata[dgl.ETYPE])) + if not use_graphbolt: + for ntype in g.ntypes: + ntype_id = g.get_ntype_id(ntype) + inner_node_mask = _get_inner_node_mask( + part, ntype_id, use_graphbolt + ) + num_inner_nodes = F.sum(F.astype(inner_node_mask, F.int64), 0) + num_nodes[ntype] += num_inner_nodes + if store_inner_edge or not use_graphbolt: for etype in g.canonical_etypes: etype_id = g.get_etype_id(etype) inner_edge_mask = _get_inner_edge_mask( @@ -103,7 +126,19 @@ def verify_hetero_graph(g, parts, use_graphbolt=False): num_inner_edges = F.sum(F.astype(inner_edge_mask, F.int64), 0) num_edges[etype] += num_inner_edges - # Verify the number of edges are correct. + # Verify the number of nodes are correct. + if not use_graphbolt: + for ntype in g.ntypes: + print( + "node {}: {}, {}".format( + ntype, g.num_nodes(ntype), num_nodes[ntype] + ) + ) + assert g.num_nodes(ntype) == num_nodes[ntype] + elif store_inner_node: + assert "inner_node" in parts[0].node_attributes + # Verify the number of edges are correct. + if store_inner_edge or not use_graphbolt: for etype in g.canonical_etypes: print( "edge {}: {}, {}".format( @@ -111,109 +146,112 @@ def verify_hetero_graph(g, parts, use_graphbolt=False): ) ) assert g.num_edges(etype) == num_edges[etype] + elif not store_inner_edge: + assert "inner_edge" not in parts[0].edge_attributes + +def _verify_hetero_graph_attributes( + g, + parts, + store_eids, + store_inner_edge, + use_graphbolt, +): + """ + check list: + make sure edge ids fall into a range. + make sure inner nodes have Ids fall into a range. + make sure all nodes is included. + make sure all edges is included. + make sure store_eids performs its function. + """ + if store_eids or not use_graphbolt: nids = {ntype: [] for ntype in g.ntypes} eids = {etype: [] for etype in g.canonical_etypes} for part in parts: - eid = th.arange(len(part.edge_attributes[dgl.EID])) - etype_arr = F.gather_row(part.type_per_edge, eid) - eid_type = F.gather_row(part.edge_attributes[dgl.EID], eid) + edata = part.edge_attributes if use_graphbolt else part.edata + etype = part.type_per_edge if use_graphbolt else edata[dgl.ETYPE] + eid = th.arange(len(edata[dgl.EID])) + etype_arr = F.gather_row(etype, eid) + eid_arr = F.gather_row(edata[dgl.EID], eid) for etype in g.canonical_etypes: etype_id = g.get_etype_id(etype) eids[etype].append( - F.boolean_mask(eid_type, etype_arr == etype_id) + F.boolean_mask(eid_arr, etype_arr == etype_id) ) # Make sure edge Ids fall into a range. - inner_edge_mask = _get_inner_edge_mask( - part, etype_id, use_graphbolt - ) - inner_eids = np.sort( - F.asnumpy( - F.boolean_mask( - part.edge_attributes[dgl.EID], inner_edge_mask + if store_inner_edge or not use_graphbolt: + inner_edge_mask = _get_inner_edge_mask( + part, etype_id, use_graphbolt=use_graphbolt + ) + inner_eids = np.sort( + F.asnumpy( + F.boolean_mask(edata[dgl.EID], inner_edge_mask) ) ) - ) - assert np.all( - inner_eids == np.arange(inner_eids[0], inner_eids[-1] + 1) - ) - return - - num_nodes = {ntype: 0 for ntype in g.ntypes} - num_edges = {etype: 0 for etype in g.canonical_etypes} - for part in parts: - assert len(g.canonical_etypes) == len(F.unique(part.edata[dgl.ETYPE])) - for ntype in g.ntypes: - ntype_id = g.get_ntype_id(ntype) - inner_node_mask = _get_inner_node_mask(part, ntype_id) - num_inner_nodes = F.sum(F.astype(inner_node_mask, F.int64), 0) - num_nodes[ntype] += num_inner_nodes - for etype in g.canonical_etypes: - etype_id = g.get_etype_id(etype) - inner_edge_mask = _get_inner_edge_mask(part, etype_id) - num_inner_edges = F.sum(F.astype(inner_edge_mask, F.int64), 0) - num_edges[etype] += num_inner_edges - # Verify the number of nodes are correct. - for ntype in g.ntypes: - print( - "node {}: {}, {}".format( - ntype, g.num_nodes(ntype), num_nodes[ntype] - ) - ) - assert g.num_nodes(ntype) == num_nodes[ntype] - # Verify the number of edges are correct. - for etype in g.canonical_etypes: - print( - "edge {}: {}, {}".format( - etype, g.num_edges(etype), num_edges[etype] - ) - ) - assert g.num_edges(etype) == num_edges[etype] - - nids = {ntype: [] for ntype in g.ntypes} - eids = {etype: [] for etype in g.canonical_etypes} - for part in parts: - _, _, eid = part.edges(form="all") - etype_arr = F.gather_row(part.edata[dgl.ETYPE], eid) - eid_type = F.gather_row(part.edata[dgl.EID], eid) - for etype in g.canonical_etypes: - etype_id = g.get_etype_id(etype) - eids[etype].append(F.boolean_mask(eid_type, etype_arr == etype_id)) - # Make sure edge Ids fall into a range. - inner_edge_mask = _get_inner_edge_mask(part, etype_id) - inner_eids = np.sort( - F.asnumpy(F.boolean_mask(part.edata[dgl.EID], inner_edge_mask)) - ) - assert np.all( - inner_eids == np.arange(inner_eids[0], inner_eids[-1] + 1) - ) - - for ntype in g.ntypes: - ntype_id = g.get_ntype_id(ntype) - # Make sure inner nodes have Ids fall into a range. - inner_node_mask = _get_inner_node_mask(part, ntype_id) - inner_nids = F.boolean_mask(part.ndata[dgl.NID], inner_node_mask) - assert np.all( - F.asnumpy( - inner_nids - == F.arange( - F.as_scalar(inner_nids[0]), - F.as_scalar(inner_nids[-1]) + 1, + assert np.all( + inner_eids + == np.arange(inner_eids[0], inner_eids[-1] + 1) ) - ) - ) - nids[ntype].append(inner_nids) - for ntype in nids: - nids_type = F.cat(nids[ntype], 0) - uniq_ids = F.unique(nids_type) - # We should get all nodes. - assert len(uniq_ids) == g.num_nodes(ntype) - for etype in eids: - eids_type = F.cat(eids[etype], 0) - uniq_ids = F.unique(eids_type) - assert len(uniq_ids) == g.num_edges(etype) - # TODO(zhengda) this doesn't check 'part_id' + if not use_graphbolt: + for ntype in g.ntypes: + ntype_id = g.get_ntype_id(ntype) + # Make sure inner nodes have Ids fall into a range. + inner_node_mask = _get_inner_node_mask(part, ntype_id) + inner_nids = F.boolean_mask( + part.ndata[dgl.NID], inner_node_mask + ) + assert np.all( + F.asnumpy( + inner_nids + == F.arange( + F.as_scalar(inner_nids[0]), + F.as_scalar(inner_nids[-1]) + 1, + ) + ) + ) + nids[ntype].append(inner_nids) + + if not use_graphbolt: + for ntype in nids: + nids_type = F.cat(nids[ntype], 0) + uniq_ids = F.unique(nids_type) + # We should get all nodes. + assert len(uniq_ids) == g.num_nodes(ntype) + + for etype in eids: + eids_type = F.cat(eids[etype], 0) + uniq_ids = F.unique(eids_type) + # We should get all nodes. + assert len(uniq_ids) == g.num_edges(etype) + # TODO(zhengda) this doesn't check 'part_id' + elif not store_eids: + assert dgl.EID not in parts[0].edge_attributes + + +def _verify_hetero_graph( + g, + parts, + use_graphbolt=False, + store_eids=False, + store_inner_node=False, + store_inner_edge=False, +): + _verify_hetero_graph_elements_number( + g, + parts, + store_inner_edge=store_inner_edge, + store_inner_node=store_inner_node, + use_graphbolt=use_graphbolt, + ) + _verify_hetero_graph_attributes( + g, + parts, + store_eids=store_eids, + store_inner_edge=store_inner_edge, + use_graphbolt=use_graphbolt, + ) def verify_graph_feats( @@ -224,23 +262,33 @@ def verify_graph_feats( edge_feats, orig_nids, orig_eids, + store_eids=False, + store_inner_edge=False, + store_inner_node=False, use_graphbolt=False, + is_homo=False, ): - if use_graphbolt: + """ + check list: + make sure the feats of nodes and edges are correct + """ + if (is_homo and store_inner_node) or not use_graphbolt: for ntype in g.ntypes: + ndata = part.node_attributes if use_graphbolt else part.ndata ntype_id = g.get_ntype_id(ntype) inner_node_mask = _get_inner_node_mask( part, ntype_id, use_graphbolt ) - inner_nids = F.boolean_mask( - part.node_attributes[dgl.NID], inner_node_mask - ) + inner_nids = F.boolean_mask(ndata[dgl.NID], inner_node_mask) ntype_ids, inner_type_nids = gpb.map_to_per_ntype(inner_nids) partid = gpb.nid2partid(inner_type_nids, ntype) assert np.all(F.asnumpy(ntype_ids) == ntype_id) assert np.all(F.asnumpy(partid) == gpb.partid) - orig_id = orig_nids[ntype][inner_type_nids] + if is_homo: + orig_id = orig_nids[inner_type_nids] + else: + orig_id = orig_nids[ntype][inner_type_nids] local_nids = gpb.nid2localnid(inner_type_nids, gpb.partid, ntype) for name in g.nodes[ntype].data: @@ -250,63 +298,23 @@ def verify_graph_feats( ndata = F.gather_row(node_feats[ntype + "/" + name], local_nids) assert np.all(F.asnumpy(ndata == true_feats)) + if (store_inner_edge and store_eids) or not use_graphbolt: for etype in g.canonical_etypes: + edata = part.edge_attributes if use_graphbolt else part.edata etype_id = g.get_etype_id(etype) inner_edge_mask = _get_inner_edge_mask( part, etype_id, use_graphbolt ) - inner_eids = F.boolean_mask( - part.edge_attributes[dgl.EID], inner_edge_mask - ) + inner_eids = F.boolean_mask(edata[dgl.EID], inner_edge_mask) etype_ids, inner_type_eids = gpb.map_to_per_etype(inner_eids) partid = gpb.eid2partid(inner_type_eids, etype) assert np.all(F.asnumpy(etype_ids) == etype_id) assert np.all(F.asnumpy(partid) == gpb.partid) - orig_id = orig_eids[etype][inner_type_eids] - local_eids = gpb.eid2localeid(inner_type_eids, gpb.partid, etype) - - for name in g.edges[etype].data: - if name in [dgl.EID, "inner_edge"]: - continue - true_feats = F.gather_row(g.edges[etype].data[name], orig_id) - edata = F.gather_row( - edge_feats[_etype_tuple_to_str(etype) + "/" + name], - local_eids, - ) - assert np.all(F.asnumpy(edata == true_feats)) - else: - for ntype in g.ntypes: - ntype_id = g.get_ntype_id(ntype) - inner_node_mask = _get_inner_node_mask( - part, ntype_id, use_graphbolt - ) - inner_nids = F.boolean_mask(part.ndata[dgl.NID], inner_node_mask) - ntype_ids, inner_type_nids = gpb.map_to_per_ntype(inner_nids) - partid = gpb.nid2partid(inner_type_nids, ntype) - assert np.all(F.asnumpy(ntype_ids) == ntype_id) - assert np.all(F.asnumpy(partid) == gpb.partid) - - orig_id = orig_nids[ntype][inner_type_nids] - local_nids = gpb.nid2localnid(inner_type_nids, gpb.partid, ntype) - - for name in g.nodes[ntype].data: - if name in [dgl.NID, "inner_node"]: - continue - true_feats = F.gather_row(g.nodes[ntype].data[name], orig_id) - ndata = F.gather_row(node_feats[ntype + "/" + name], local_nids) - assert np.all(F.asnumpy(ndata == true_feats)) - - for etype in g.canonical_etypes: - etype_id = g.get_etype_id(etype) - inner_edge_mask = _get_inner_edge_mask(part, etype_id) - inner_eids = F.boolean_mask(part.edata[dgl.EID], inner_edge_mask) - etype_ids, inner_type_eids = gpb.map_to_per_etype(inner_eids) - partid = gpb.eid2partid(inner_type_eids, etype) - assert np.all(F.asnumpy(etype_ids) == etype_id) - assert np.all(F.asnumpy(partid) == gpb.partid) - - orig_id = orig_eids[etype][inner_type_eids] + if is_homo: + orig_id = orig_eids[inner_type_eids] + else: + orig_id = orig_eids[etype][inner_type_eids] local_eids = gpb.eid2localeid(inner_type_eids, gpb.partid, etype) for name in g.edges[etype].data: @@ -437,7 +445,7 @@ def check_hetero_partition( shuffled_elabels.append( edge_feats[_etype_tuple_to_str(test_etype) + "/labels"] ) - verify_hetero_graph(hg, parts) + _verify_hetero_graph(hg, parts) shuffled_labels = F.asnumpy(F.cat(shuffled_labels, 0)) shuffled_elabels = F.asnumpy(F.cat(shuffled_elabels, 0)) @@ -1189,15 +1197,194 @@ def test_not_sorted_node_edge_map(): assert gpb.local_etype_offset == [0, 500, 1100, 1800, 2600] +def _verify_metadata( + g, + gpb, + part_g, + num_parts, + part_sizes, + part_i, + store_inner_node, + store_inner_edge, + store_eids, +): + """ + # Check the metadata + check list: + make sure gpb have correct node and edge number. + make sure gpb have correct number of partitions. + make sure gpb have correct number of nodes and edges in each partition. + make sure local nid and eid have correct dtype. + make sure local nid have correct order + """ + assert gpb._num_nodes() == g.num_nodes() + assert gpb._num_edges() == g.num_edges() + + assert gpb.num_partitions() == num_parts + gpb_meta = gpb.metadata() + assert len(gpb_meta) == num_parts + assert len(gpb.partid2nids(part_i)) == gpb_meta[part_i]["num_nodes"] + assert len(gpb.partid2eids(part_i)) == gpb_meta[part_i]["num_edges"] + part_sizes.append( + (gpb_meta[part_i]["num_nodes"], gpb_meta[part_i]["num_edges"]) + ) + + if store_inner_node and store_inner_edge and store_eids: + nid = F.boolean_mask( + part_g.node_attributes[dgl.NID], + part_g.node_attributes["inner_node"], + ) + local_nid = gpb.nid2localnid(nid, part_i) + assert F.dtype(local_nid) in (F.int64, F.int32) + assert np.all(F.asnumpy(local_nid) == np.arange(0, len(local_nid))) + eid = F.boolean_mask( + part_g.edge_attributes[dgl.EID], + part_g.edge_attributes["inner_edge"], + ) + local_eid = gpb.eid2localeid(eid, part_i) + assert F.dtype(local_eid) in (F.int64, F.int32) + assert np.all( + np.sort(F.asnumpy(local_eid)) == np.arange(0, len(local_eid)) + ) + return local_eid, local_nid + else: + return None, None + + +def _verify_mapping( + g, + part_g, + part_i, + gpb, + orig_nids, + orig_eids, + node_feats, + edge_feats, + local_nid=None, + local_eid=None, + store_inner_node=False, + store_inner_edge=False, + store_eids=False, +): + """ + check list: + make sure nodes and edges's data type are correct. + make sure nodes and edges's ID in correct order. + make sure the number of nodes and edges's ID are correct. + """ + if store_inner_node and store_inner_edge and store_eids: + # Check the node map. + local_nodes = F.boolean_mask( + part_g.node_attributes[dgl.NID], + part_g.node_attributes["inner_node"], + ) + inner_node_index = F.nonzero_1d(part_g.node_attributes["inner_node"]) + mapping_nodes = gpb.partid2nids(part_i) + assert F.dtype(mapping_nodes) in (F.int32, F.int64) + assert np.all( + np.sort(F.asnumpy(local_nodes)) == np.sort(F.asnumpy(mapping_nodes)) + ) + assert np.all( + F.asnumpy(inner_node_index) == np.arange(len(inner_node_index)) + ) + + # Check the edge map. + + local_edges = F.boolean_mask( + part_g.edge_attributes[dgl.EID], + part_g.edge_attributes["inner_edge"], + ) + inner_edge_index = F.nonzero_1d(part_g.edge_attributes["inner_edge"]) + mapping_edges = gpb.partid2eids(part_i) + assert F.dtype(mapping_edges) in (F.int32, F.int64) + assert np.all( + np.sort(F.asnumpy(local_edges)) == np.sort(F.asnumpy(mapping_edges)) + ) + assert np.all( + F.asnumpy(inner_edge_index) == np.arange(len(inner_edge_index)) + ) + + local_nodes = orig_nids[local_nodes] + local_edges = orig_eids[local_edges] + + for name in ["labels", "feats"]: + assert "_N/" + name in node_feats + assert node_feats["_N/" + name].shape[0] == len(local_nodes) + true_feats = F.gather_row(g.ndata[name], local_nodes) + ndata = F.gather_row(node_feats["_N/" + name], local_nid) + assert np.all(F.asnumpy(true_feats) == F.asnumpy(ndata)) + for name in ["feats"]: + efeat_name = _etype_tuple_to_str(DEFAULT_ETYPE) + "/" + name + assert efeat_name in edge_feats + assert edge_feats[efeat_name].shape[0] == len(local_edges) + true_feats = F.gather_row(g.edata[name], local_edges) + edata = F.gather_row(edge_feats[efeat_name], local_eid) + assert np.all(F.asnumpy(true_feats) == F.asnumpy(edata)) + + if store_eids: + # Verify the mapping between the reshuffled IDs and the original IDs. + indices, indptr = part_g.indices, part_g.csc_indptr + adj_matrix = dglsp.from_csc(indptr, indices) + part_src_ids, part_dst_ids = adj_matrix.coo() + part_src_ids = F.gather_row( + part_g.node_attributes[dgl.NID], part_src_ids + ) + part_dst_ids = F.gather_row( + part_g.node_attributes[dgl.NID], part_dst_ids + ) + part_eids = part_g.edge_attributes[dgl.EID] + orig_src_ids = F.gather_row(orig_nids, part_src_ids) + orig_dst_ids = F.gather_row(orig_nids, part_dst_ids) + orig_eids1 = F.gather_row(orig_eids, part_eids) + orig_eids2 = g.edge_ids(orig_src_ids, orig_dst_ids) + assert F.shape(orig_eids1)[0] == F.shape(orig_eids2)[0] + assert np.all(F.asnumpy(orig_eids1) == F.asnumpy(orig_eids2)) + + local_orig_nids = orig_nids[part_g.node_attributes[dgl.NID]] + local_orig_eids = orig_eids[part_g.edge_attributes[dgl.EID]] + part_g.node_attributes["feats"] = F.gather_row( + g.ndata["feats"], local_orig_nids + ) + part_g.edge_attributes["feats"] = F.gather_row( + g.edata["feats"], local_orig_eids + ) + else: + assert dgl.EID not in part_g.edge_attributes + + return node_feats["_N/labels"], edge_feats["_N:_E:_N/feats"] + + @pytest.mark.parametrize("part_method", ["metis", "random"]) @pytest.mark.parametrize("num_parts", [1, 4]) +@pytest.mark.parametrize("store_eids", [True, False]) +@pytest.mark.parametrize("store_inner_node", [True, False]) +@pytest.mark.parametrize("store_inner_edge", [True, False]) @pytest.mark.parametrize("debug_mode", [True, False]) def test_partition_graph_graphbolt_homo( part_method, num_parts, + store_eids, + store_inner_node, + store_inner_edge, debug_mode, - num_trainers_per_machine=1, ): + """ + check list: + _verify_metadata: + number of edges, nodes, partitions for all + number of edges, nodes in each partitions + order and data type of local nid and eid + + _verify_mapping: + data type, ID's order and ID's number of edges and nodes + + verify_graph_feats: + graph's feats + + _verify_reconstrunt_IDs: + check if feats and IDs can be reconstructed + + """ reset_envs() if debug_mode: os.environ["DGL_DIST_DEBUG"] = "1" @@ -1211,8 +1398,6 @@ def test_partition_graph_graphbolt_homo( g.edata["feats"] = F.tensor( np.random.randn(g.num_edges(), 10), F.float32 ) - g.update_all(fn.copy_u("feats", "msg"), fn.sum("msg", "h")) - g.update_all(fn.copy_e("feats", "msg"), fn.sum("msg", "eh")) orig_nids, orig_eids = partition_graph( g, @@ -1221,195 +1406,271 @@ def test_partition_graph_graphbolt_homo( test_dir, part_method=part_method, use_graphbolt=True, - store_eids=True, - store_inner_node=True, - store_inner_edge=True, + store_eids=store_eids, + store_inner_node=store_inner_node, + store_inner_edge=store_inner_edge, return_mapping=True, ) + if debug_mode: + store_eids = True + store_inner_node = True + store_inner_edge = True part_sizes = [] shuffled_labels = [] shuffled_edata = [] part_config = os.path.join(test_dir, f"{graph_name}.json") - for i in range(num_parts): + for part_i in range(num_parts): part_g, node_feats, edge_feats, gpb, _, _, _ = load_partition( - part_config, i, load_feats=True, use_graphbolt=True + part_config, part_i, load_feats=True, use_graphbolt=True ) - if num_trainers_per_machine > 1: - for ntype in g.ntypes: - name = ntype + "/trainer_id" - assert name in node_feats - part_ids = F.floor_div( - node_feats[name], num_trainers_per_machine - ) - assert np.all(F.asnumpy(part_ids) == i) - for etype in g.canonical_etypes: - name = _etype_tuple_to_str(etype) + "/trainer_id" - assert name in edge_feats - part_ids = F.floor_div( - edge_feats[name], num_trainers_per_machine - ) - assert np.all(F.asnumpy(part_ids) == i) - - # Check the metadata - assert gpb._num_nodes() == g.num_nodes() - assert gpb._num_edges() == g.num_edges() - - assert gpb.num_partitions() == num_parts - gpb_meta = gpb.metadata() - assert len(gpb_meta) == num_parts - assert len(gpb.partid2nids(i)) == gpb_meta[i]["num_nodes"] - assert len(gpb.partid2eids(i)) == gpb_meta[i]["num_edges"] - part_sizes.append( - (gpb_meta[i]["num_nodes"], gpb_meta[i]["num_edges"]) + local_eid, local_nid = _verify_metadata( + g, + gpb, + part_g, + num_parts, + part_sizes, + part_i, + store_inner_node, + store_inner_edge, + store_eids, ) - nid = F.boolean_mask( - part_g.node_attributes[dgl.NID], - part_g.node_attributes["inner_node"], - ) - local_nid = gpb.nid2localnid(nid, i) - assert F.dtype(local_nid) in (F.int64, F.int32) - assert np.all(F.asnumpy(local_nid) == np.arange(0, len(local_nid))) - eid = F.boolean_mask( - part_g.edge_attributes[dgl.EID], - part_g.edge_attributes["inner_edge"], + node_feat, edge_feat = _verify_mapping( + g, + part_g, + part_i, + gpb, + orig_nids, + orig_eids, + node_feats, + edge_feats, + local_nid=local_nid, + local_eid=local_eid, + store_inner_node=store_inner_node, + store_inner_edge=store_inner_edge, + store_eids=store_eids, ) - local_eid = gpb.eid2localeid(eid, i) - assert F.dtype(local_eid) in (F.int64, F.int32) - assert np.all( - np.sort(F.asnumpy(local_eid)) == np.arange(0, len(local_eid)) + shuffled_labels.append(node_feat) + shuffled_edata.append(edge_feat) + + verify_graph_feats( + g, + gpb, + part_g, + node_feats, + edge_feats, + orig_nids, + orig_eids, + store_eids=store_eids, + store_inner_edge=store_inner_edge, + store_inner_node=store_inner_node, + use_graphbolt=True, + is_homo=True, ) - # Check the node map. - local_nodes = F.boolean_mask( - part_g.node_attributes[dgl.NID], - part_g.node_attributes["inner_node"], - ) - llocal_nodes = F.nonzero_1d(part_g.node_attributes["inner_node"]) - local_nodes1 = gpb.partid2nids(i) - assert F.dtype(local_nodes1) in (F.int32, F.int64) - assert np.all( - np.sort(F.asnumpy(local_nodes)) - == np.sort(F.asnumpy(local_nodes1)) - ) - assert np.all( - F.asnumpy(llocal_nodes) == np.arange(len(llocal_nodes)) - ) + _verify_reconstrunt_data( + g, + gpb, + orig_nids, + orig_eids, + part_sizes, + shuffled_labels, + shuffled_edata, + ) + + +def _vertify_original_IDs(g, orig_nids, orig_eids): + """ + check list: + make sure nodes and edges' data types are correct + make sure nodes and edges' number in each type is correct + """ + assert len(orig_nids) == len(g.ntypes) + assert len(orig_eids) == len(g.canonical_etypes) + for ntype in g.ntypes: + assert len(orig_nids[ntype]) == g.num_nodes(ntype) + for etype in g.canonical_etypes: + assert len(orig_eids[etype]) == g.num_edges(etype) - # Check the edge map. - local_edges = F.boolean_mask( - part_g.edge_attributes[dgl.EID], - part_g.edge_attributes["inner_edge"], - ) - llocal_edges = F.nonzero_1d(part_g.edge_attributes["inner_edge"]) - local_edges1 = gpb.partid2eids(i) - assert F.dtype(local_edges1) in (F.int32, F.int64) - assert np.all( - np.sort(F.asnumpy(local_edges)) - == np.sort(F.asnumpy(local_edges1)) - ) - assert np.all( - F.asnumpy(llocal_edges) == np.arange(len(llocal_edges)) - ) - # Verify the mapping between the reshuffled IDs and the original IDs. - indices, indptr = part_g.indices, part_g.csc_indptr - adj_matrix = dglsp.from_csc(indptr, indices) - part_src_ids, part_dst_ids = adj_matrix.coo() - part_src_ids = F.gather_row( - part_g.node_attributes[dgl.NID], part_src_ids +def _verify_reconstrunt_data( + g, gpb, orig_nids, orig_eids, part_sizes, shuffled_labels, shuffled_edata +): + """ + check list: + make sure labels and feats are correct. + make sure nodes and edges' id are correct. + make sure node and edges' part + """ + # Verify that we can reconstruct node/edge data for original IDs. + shuffled_labels = F.asnumpy(F.cat(shuffled_labels, 0)) + shuffled_edata = F.asnumpy(F.cat(shuffled_edata, 0)) + orig_labels = np.zeros(shuffled_labels.shape, dtype=shuffled_labels.dtype) + orig_edata = np.zeros(shuffled_edata.shape, dtype=shuffled_edata.dtype) + orig_labels[F.asnumpy(orig_nids)] = shuffled_labels + orig_edata[F.asnumpy(orig_eids)] = shuffled_edata + assert np.all(orig_labels == F.asnumpy(g.ndata["labels"])) + assert np.all(orig_edata == F.asnumpy(g.edata["feats"])) + + node_map = [] + edge_map = [] + for part_i, (num_nodes, num_edges) in enumerate(part_sizes): + node_map.append(np.ones(num_nodes) * part_i) + edge_map.append(np.ones(num_edges) * part_i) + node_map = np.concatenate(node_map) + edge_map = np.concatenate(edge_map) + nid2pid = gpb.nid2partid(F.arange(0, len(node_map))) + assert F.dtype(nid2pid) in (F.int32, F.int64) + assert np.all(F.asnumpy(nid2pid) == node_map) + eid2pid = gpb.eid2partid(F.arange(0, len(edge_map))) + assert F.dtype(eid2pid) in (F.int32, F.int64) + assert np.all(F.asnumpy(eid2pid) == edge_map) + + +def _verify_graphbolt_mapping_IDs( + g, + part_g, + gpb, + orig_nids, + orig_eids, + node_feats, + edge_feats, + test_ntype, + test_etype, + store_eids, + store_inner_node, + store_inner_edge, +): + """ + check list: + make sure nodes and edges' ids have correct type. + make sure nodes and edges have corrert map ids. + """ + # Verify the mapping between the reshuffled IDs and the original IDs. + # These are partition-local IDs. + indices, indptr = part_g.indices, part_g.csc_indptr + csc_matrix = dglsp.from_csc(indptr, indices) + part_src_ids, part_dst_ids = csc_matrix.coo() + # These are reshuffled global homogeneous IDs. + part_src_ids = F.gather_row(part_g.node_attributes[dgl.NID], part_src_ids) + part_dst_ids = F.gather_row(part_g.node_attributes[dgl.NID], part_dst_ids) + # These are reshuffled per-type IDs. + src_ntype_ids, part_src_ids = gpb.map_to_per_ntype(part_src_ids) + dst_ntype_ids, part_dst_ids = gpb.map_to_per_ntype(part_dst_ids) + # `IdMap` is in int64 by default. + assert src_ntype_ids.dtype == F.int64 + assert dst_ntype_ids.dtype == F.int64 + + with pytest.raises(dgl.utils.internal.InconsistentDtypeException): + gpb.map_to_per_ntype(F.tensor([0], F.int32)) + with pytest.raises(dgl.utils.internal.InconsistentDtypeException): + gpb.map_to_per_etype(F.tensor([0], F.int32)) + + if store_eids: + part_eids = part_g.edge_attributes[dgl.EID] + etype_ids, part_eids = gpb.map_to_per_etype(part_eids) + # `IdMap` is in int64 by default. + assert etype_ids.dtype == F.int64 + + # These are original per-type IDs. + for etype_id, etype in enumerate(g.canonical_etypes): + part_src_ids1 = F.boolean_mask(part_src_ids, etype_ids == etype_id) + src_ntype_ids1 = F.boolean_mask( + src_ntype_ids, etype_ids == etype_id ) - part_dst_ids = F.gather_row( - part_g.node_attributes[dgl.NID], part_dst_ids + part_dst_ids1 = F.boolean_mask(part_dst_ids, etype_ids == etype_id) + dst_ntype_ids1 = F.boolean_mask( + dst_ntype_ids, etype_ids == etype_id ) - part_eids = part_g.edge_attributes[dgl.EID] - orig_src_ids = F.gather_row(orig_nids, part_src_ids) - orig_dst_ids = F.gather_row(orig_nids, part_dst_ids) - orig_eids1 = F.gather_row(orig_eids, part_eids) - orig_eids2 = g.edge_ids(orig_src_ids, orig_dst_ids) - assert F.shape(orig_eids1)[0] == F.shape(orig_eids2)[0] + part_eids1 = F.boolean_mask(part_eids, etype_ids == etype_id) + assert np.all(F.asnumpy(src_ntype_ids1 == src_ntype_ids1[0])) + assert np.all(F.asnumpy(dst_ntype_ids1 == dst_ntype_ids1[0])) + src_ntype = g.ntypes[F.as_scalar(src_ntype_ids1[0])] + dst_ntype = g.ntypes[F.as_scalar(dst_ntype_ids1[0])] + orig_src_ids1 = F.gather_row(orig_nids[src_ntype], part_src_ids1) + orig_dst_ids1 = F.gather_row(orig_nids[dst_ntype], part_dst_ids1) + orig_eids1 = F.gather_row(orig_eids[etype], part_eids1) + orig_eids2 = g.edge_ids(orig_src_ids1, orig_dst_ids1, etype=etype) + assert len(orig_eids1) == len(orig_eids2) assert np.all(F.asnumpy(orig_eids1) == F.asnumpy(orig_eids2)) + else: + assert dgl.EID not in part_g.edge_attributes + verify_graph_feats( + g, + gpb, + part_g, + node_feats, + edge_feats, + orig_nids, + orig_eids, + store_eids, + store_inner_edge=store_inner_edge, + store_inner_node=store_inner_node, + use_graphbolt=True, + ) - local_orig_nids = orig_nids[part_g.node_attributes[dgl.NID]] - local_orig_eids = orig_eids[part_g.edge_attributes[dgl.EID]] - part_g.node_attributes["feats"] = F.gather_row( - g.ndata["feats"], local_orig_nids - ) - part_g.edge_attributes["feats"] = F.gather_row( - g.edata["feats"], local_orig_eids - ) - local_nodes = orig_nids[local_nodes] - local_edges = orig_eids[local_edges] - - # part_g.update_all(fn.copy_u("feats", "msg"), fn.sum("msg", "h")) - # part_g.update_all(fn.copy_e("feats", "msg"), fn.sum("msg", "eh")) - # part_g.node_attributes["h"] = adj_matrix@part_g.node_attributes["h"] - - # assert F.allclose( - # F.gather_row(g.ndata["h"], local_nodes), - # F.gather_row(part_g.node_attributes["h"], llocal_nodes), - # ) - # assert F.allclose( - # F.gather_row(g.ndata["eh"], local_nodes), - # F.gather_row(part_g.node_attributes["eh"], llocal_nodes), - # ) - - for name in ["labels", "feats"]: - assert "_N/" + name in node_feats - assert node_feats["_N/" + name].shape[0] == len(local_nodes) - true_feats = F.gather_row(g.ndata[name], local_nodes) - ndata = F.gather_row(node_feats["_N/" + name], local_nid) - assert np.all(F.asnumpy(true_feats) == F.asnumpy(ndata)) - for name in ["feats"]: - efeat_name = _etype_tuple_to_str(DEFAULT_ETYPE) + "/" + name - assert efeat_name in edge_feats - assert edge_feats[efeat_name].shape[0] == len(local_edges) - true_feats = F.gather_row(g.edata[name], local_edges) - edata = F.gather_row(edge_feats[efeat_name], local_eid) - assert np.all(F.asnumpy(true_feats) == F.asnumpy(edata)) - - # This only works if node/edge IDs are shuffled. - shuffled_labels.append(node_feats["_N/labels"]) - shuffled_edata.append(edge_feats["_N:_E:_N/feats"]) - - # Verify that we can reconstruct node/edge data for original IDs. - shuffled_labels = F.asnumpy(F.cat(shuffled_labels, 0)) - shuffled_edata = F.asnumpy(F.cat(shuffled_edata, 0)) - orig_labels = np.zeros( - shuffled_labels.shape, dtype=shuffled_labels.dtype - ) - orig_edata = np.zeros(shuffled_edata.shape, dtype=shuffled_edata.dtype) - orig_labels[F.asnumpy(orig_nids)] = shuffled_labels - orig_edata[F.asnumpy(orig_eids)] = shuffled_edata - assert np.all(orig_labels == F.asnumpy(g.ndata["labels"])) - assert np.all(orig_edata == F.asnumpy(g.edata["feats"])) - - node_map = [] - edge_map = [] - for i, (num_nodes, num_edges) in enumerate(part_sizes): - node_map.append(np.ones(num_nodes) * i) - edge_map.append(np.ones(num_edges) * i) - node_map = np.concatenate(node_map) - edge_map = np.concatenate(edge_map) - nid2pid = gpb.nid2partid(F.arange(0, len(node_map))) - assert F.dtype(nid2pid) in (F.int32, F.int64) - assert np.all(F.asnumpy(nid2pid) == node_map) - eid2pid = gpb.eid2partid(F.arange(0, len(edge_map))) - assert F.dtype(eid2pid) in (F.int32, F.int64) - assert np.all(F.asnumpy(eid2pid) == edge_map) + shuffled_label = node_feats[test_ntype + "/labels"] + shuffled_elabel = edge_feats[_etype_tuple_to_str(test_etype) + "/labels"] + return part_g, shuffled_label, shuffled_elabel + + +def _verify_labels( + g, + shuffled_labels, + shuffled_elabels, + orig_nids, + orig_eids, + test_ntype, + test_etype, +): + """ + check list: + make sure node labels are correct. + make sure edge labels are correct. + """ + shuffled_labels = F.asnumpy(F.cat(shuffled_labels, 0)) + shuffled_elabels = F.asnumpy(F.cat(shuffled_elabels, 0)) + orig_labels = np.zeros(shuffled_labels.shape, dtype=shuffled_labels.dtype) + orig_elabels = np.zeros( + shuffled_elabels.shape, dtype=shuffled_elabels.dtype + ) + orig_labels[F.asnumpy(orig_nids[test_ntype])] = shuffled_labels + orig_elabels[F.asnumpy(orig_eids[test_etype])] = shuffled_elabels + assert np.all(orig_labels == F.asnumpy(g.nodes[test_ntype].data["labels"])) + assert np.all(orig_elabels == F.asnumpy(g.edges[test_etype].data["labels"])) @pytest.mark.parametrize("part_method", ["metis", "random"]) @pytest.mark.parametrize("num_parts", [1, 4]) +@pytest.mark.parametrize("store_eids", [True, False]) +@pytest.mark.parametrize("store_inner_node", [True, False]) +@pytest.mark.parametrize("store_inner_edge", [True, False]) @pytest.mark.parametrize("debug_mode", [True, False]) def test_partition_graph_graphbolt_hetero( part_method, num_parts, + store_eids, + store_inner_node, + store_inner_edge, debug_mode, n_jobs=1, - num_trainers_per_machine=1, ): + """ + check list: + _vertify_original_IDs: + number of edges and nodes' type and number of them in each type + + _verify_graphbolt_mapping_IDs: + mapping node and edge IDs + feats in graph + + _verify_hetero_graph: + number, order of elements in hetero graph + + _verify_labels: + labels of nodes and edges + """ test_ntype = "n1" test_etype = ("n1", "r1", "n2") reset_envs() @@ -1417,6 +1678,7 @@ def test_partition_graph_graphbolt_hetero( os.environ["DGL_DIST_DEBUG"] = "1" with tempfile.TemporaryDirectory() as test_dir: hg = create_random_hetero() + # TODO create graph data graph_name = "test" hg.nodes[test_ntype].data["labels"] = F.arange( 0, hg.num_nodes(test_ntype) @@ -1430,7 +1692,6 @@ def test_partition_graph_graphbolt_hetero( hg.edges[test_etype].data["labels"] = F.arange( 0, hg.num_edges(test_etype) ) - num_hops = 1 orig_nids, orig_eids = partition_graph( hg, graph_name, @@ -1440,132 +1701,65 @@ def test_partition_graph_graphbolt_hetero( return_mapping=True, num_trainers_per_machine=1, use_graphbolt=True, - store_eids=True, - store_inner_node=True, - store_inner_edge=True, + store_eids=store_eids, + store_inner_node=store_inner_node, + store_inner_edge=store_inner_edge, n_jobs=n_jobs, ) - assert len(orig_nids) == len(hg.ntypes) - assert len(orig_eids) == len(hg.canonical_etypes) - for ntype in hg.ntypes: - assert len(orig_nids[ntype]) == hg.num_nodes(ntype) - for etype in hg.canonical_etypes: - assert len(orig_eids[etype]) == hg.num_edges(etype) + _vertify_original_IDs(hg, orig_nids, orig_eids) + + if debug_mode: + store_eids = True + store_inner_node = True + store_inner_edge = True + parts = [] shuffled_labels = [] shuffled_elabels = [] part_config = os.path.join(test_dir, f"{graph_name}.json") + # test each part for part_id in range(num_parts): part_g, node_feats, edge_feats, gpb, _, _, _ = load_partition( part_config, part_id, load_feats=True, use_graphbolt=True ) - if num_trainers_per_machine > 1: - for ntype in hg.ntypes: - name = ntype + "/trainer_id" - assert name in node_feats - part_ids = F.floor_div( - node_feats[name], num_trainers_per_machine - ) - assert np.all(F.asnumpy(part_ids) == part_id) - - for etype in hg.canonical_etypes: - name = _etype_tuple_to_str(etype) + "/trainer_id" - assert name in edge_feats - part_ids = F.floor_div( - edge_feats[name], num_trainers_per_machine - ) - assert np.all(F.asnumpy(part_ids) == part_id) - - # Verify the mapping between the reshuffled IDs and the original IDs. - # These are partition-local IDs. - indices, indptr = part_g.indices, part_g.csc_indptr - csc_matrix = dglsp.from_csc(indptr, indices) - part_src_ids, part_dst_ids = csc_matrix.coo() - # These are reshuffled global homogeneous IDs. - part_src_ids = F.gather_row( - part_g.node_attributes[dgl.NID], part_src_ids - ) - part_dst_ids = F.gather_row( - part_g.node_attributes[dgl.NID], part_dst_ids + # TODO verify mapping IDs + ( + part_g, + shuffled_label, + shuffled_elabel, + ) = _verify_graphbolt_mapping_IDs( + hg, + part_g, + gpb, + orig_nids, + orig_eids, + node_feats, + edge_feats, + test_ntype, + test_etype, + store_eids=store_eids, + store_inner_node=store_inner_node, + store_inner_edge=store_inner_edge, ) - part_eids = part_g.edge_attributes[dgl.EID] - # These are reshuffled per-type IDs. - src_ntype_ids, part_src_ids = gpb.map_to_per_ntype(part_src_ids) - dst_ntype_ids, part_dst_ids = gpb.map_to_per_ntype(part_dst_ids) - etype_ids, part_eids = gpb.map_to_per_etype(part_eids) - # `IdMap` is in int64 by default. - assert src_ntype_ids.dtype == F.int64 - assert dst_ntype_ids.dtype == F.int64 - assert etype_ids.dtype == F.int64 - with pytest.raises(dgl.utils.internal.InconsistentDtypeException): - gpb.map_to_per_ntype(F.tensor([0], F.int32)) - with pytest.raises(dgl.utils.internal.InconsistentDtypeException): - gpb.map_to_per_etype(F.tensor([0], F.int32)) - # These are original per-type IDs. - for etype_id, etype in enumerate(hg.canonical_etypes): - part_src_ids1 = F.boolean_mask( - part_src_ids, etype_ids == etype_id - ) - src_ntype_ids1 = F.boolean_mask( - src_ntype_ids, etype_ids == etype_id - ) - part_dst_ids1 = F.boolean_mask( - part_dst_ids, etype_ids == etype_id - ) - dst_ntype_ids1 = F.boolean_mask( - dst_ntype_ids, etype_ids == etype_id - ) - part_eids1 = F.boolean_mask(part_eids, etype_ids == etype_id) - assert np.all(F.asnumpy(src_ntype_ids1 == src_ntype_ids1[0])) - assert np.all(F.asnumpy(dst_ntype_ids1 == dst_ntype_ids1[0])) - src_ntype = hg.ntypes[F.as_scalar(src_ntype_ids1[0])] - dst_ntype = hg.ntypes[F.as_scalar(dst_ntype_ids1[0])] - orig_src_ids1 = F.gather_row( - orig_nids[src_ntype], part_src_ids1 - ) - orig_dst_ids1 = F.gather_row( - orig_nids[dst_ntype], part_dst_ids1 - ) - orig_eids1 = F.gather_row(orig_eids[etype], part_eids1) - orig_eids2 = hg.edge_ids( - orig_src_ids1, orig_dst_ids1, etype=etype - ) - assert len(orig_eids1) == len(orig_eids2) - assert np.all(F.asnumpy(orig_eids1) == F.asnumpy(orig_eids2)) parts.append(part_g) - if NTYPE in part_g.node_attributes: - verify_graph_feats( - hg, - gpb, - part_g, - node_feats, - edge_feats, - orig_nids, - orig_eids, - use_graphbolt=True, - ) - - shuffled_labels.append(node_feats[test_ntype + "/labels"]) - shuffled_elabels.append( - edge_feats[_etype_tuple_to_str(test_etype) + "/labels"] - ) - verify_hetero_graph(hg, parts, True) - - shuffled_labels = F.asnumpy(F.cat(shuffled_labels, 0)) - shuffled_elabels = F.asnumpy(F.cat(shuffled_elabels, 0)) - orig_labels = np.zeros( - shuffled_labels.shape, dtype=shuffled_labels.dtype - ) - orig_elabels = np.zeros( - shuffled_elabels.shape, dtype=shuffled_elabels.dtype - ) - orig_labels[F.asnumpy(orig_nids[test_ntype])] = shuffled_labels - orig_elabels[F.asnumpy(orig_eids[test_etype])] = shuffled_elabels - assert np.all( - orig_labels == F.asnumpy(hg.nodes[test_ntype].data["labels"]) + shuffled_labels.append(shuffled_label) + shuffled_elabels.append(shuffled_elabel) + _verify_hetero_graph( + hg, + parts, + True, + store_eids=store_eids, + store_inner_node=store_inner_node, + store_inner_edge=store_inner_edge, ) - assert np.all( - orig_elabels == F.asnumpy(hg.edges[test_etype].data["labels"]) + _verify_labels( + hg, + shuffled_labels, + shuffled_elabels, + orig_nids, + orig_eids, + test_ntype, + test_etype, ) @@ -1793,6 +1987,9 @@ def test_partition_graph_graphbolt_hetero_multi( part_method="random", num_parts=num_parts, n_jobs=4, + store_eids=True, + store_inner_node=True, + store_inner_edge=True, debug_mode=False, ) @@ -1819,3 +2016,6 @@ def test_partition_graph_graphbolt_hetero_find_edges_multi( graph_formats="coo", n_jobs=4, ) + +if __name__ == '__main__': + test_partition_graph_graphbolt_hetero('metis',4,True,False,False,False) \ No newline at end of file From f279e3dfc958e8e93cfaa9c3398f2dd9a2401175 Mon Sep 17 00:00:00 2001 From: Ubuntu <2649624957@qq.com> Date: Mon, 26 Aug 2024 03:34:07 +0000 Subject: [PATCH 13/39] change format --- python/dgl/distributed/partition.py | 8 +++++++- tests/distributed/test_partition.py | 3 --- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/python/dgl/distributed/partition.py b/python/dgl/distributed/partition.py index 4341893cadf8..3ba410f5100d 100644 --- a/python/dgl/distributed/partition.py +++ b/python/dgl/distributed/partition.py @@ -1456,7 +1456,13 @@ def _partition_to_graphbolt( # save FusedCSCSamplingGraph kwargs["graph_formats"] = graph_formats kwargs.pop("n_jobs", None) - _partition_to_graphbolt(part_i=part_id, part_config=part_config, part_metadata=part_metadata, parts=parts, **kwargs) + _partition_to_graphbolt( + part_i=part_id, + part_config=part_config, + part_metadata=part_metadata, + parts=parts, + **kwargs, + ) else: part_graph_file = os.path.join(part_dir, "graph.dgl") part_metadata["part-{}".format(part_id)][ diff --git a/tests/distributed/test_partition.py b/tests/distributed/test_partition.py index 93eb22a82c20..ab877c19f6f7 100644 --- a/tests/distributed/test_partition.py +++ b/tests/distributed/test_partition.py @@ -2016,6 +2016,3 @@ def test_partition_graph_graphbolt_hetero_find_edges_multi( graph_formats="coo", n_jobs=4, ) - -if __name__ == '__main__': - test_partition_graph_graphbolt_hetero('metis',4,True,False,False,False) \ No newline at end of file From 081c1ad6c99120df32dbceb098c0c648a81dfc07 Mon Sep 17 00:00:00 2001 From: Ubuntu <2649624957@qq.com> Date: Mon, 26 Aug 2024 07:54:58 +0000 Subject: [PATCH 14/39] fix pr --- python/dgl/distributed/partition.py | 361 +++++++--------------------- 1 file changed, 86 insertions(+), 275 deletions(-) diff --git a/python/dgl/distributed/partition.py b/python/dgl/distributed/partition.py index f5d81decea92..081001ca86ac 100644 --- a/python/dgl/distributed/partition.py +++ b/python/dgl/distributed/partition.py @@ -88,7 +88,7 @@ def _dump_part_config(part_config, part_metadata): json.dump(part_metadata, outfile, sort_keys=False, indent=4) -def _process_partitions(g_list, formats=None, sort_etypes=False): +def _save_graphs(filename, g_list, formats=None, sort_etypes=False): """Preprocess partitions before saving: 1. format data types. 2. sort csc/csr by tag. @@ -106,13 +106,6 @@ def _process_partitions(g_list, formats=None, sort_etypes=False): g = sort_csr_by_tag(g, tag=g.edata[ETYPE], tag_type="edge") if "csc" in formats: g = sort_csc_by_tag(g, tag=g.edata[ETYPE], tag_type="edge") - return g_list - - -def _save_graphs(filename, g_list, formats=None, sort_etypes=False): - g_list = _process_partitions( - g_list, formats=formats, sort_etypes=sort_etypes - ) save_graphs(filename, g_list, formats=formats) @@ -324,10 +317,9 @@ def load_partition(part_config, part_id, load_feats=True, use_graphbolt=False): "part-{}".format(part_id) in part_metadata ), "part-{} does not exist".format(part_id) part_files = part_metadata["part-{}".format(part_id)] + part_graph_field = "part_graph" if use_graphbolt: part_graph_field = "part_graph_graphbolt" - else: - part_graph_field = "part_graph" assert ( part_graph_field in part_files ), f"the partition does not contain graph structure: {part_graph_field}" @@ -454,105 +446,6 @@ def load_partition_feats( return node_feats, edge_feats -def _load_partition_book_from_metadata(part_metadata, part_id): - assert "num_parts" in part_metadata, "num_parts does not exist." - assert ( - part_metadata["num_parts"] > part_id - ), "part {} is out of range (#parts: {})".format( - part_id, part_metadata["num_parts"] - ) - num_parts = part_metadata["num_parts"] - assert ( - "num_nodes" in part_metadata - ), "cannot get the number of nodes of the global graph." - assert ( - "num_edges" in part_metadata - ), "cannot get the number of edges of the global graph." - assert "node_map" in part_metadata, "cannot get the node map." - assert "edge_map" in part_metadata, "cannot get the edge map." - assert "graph_name" in part_metadata, "cannot get the graph name" - - # If this is a range partitioning, node_map actually stores a list, whose elements - # indicate the boundary of range partitioning. Otherwise, node_map stores a filename - # that contains node map in a NumPy array. - node_map = part_metadata["node_map"] - edge_map = part_metadata["edge_map"] - if isinstance(node_map, dict): - for key in node_map: - is_range_part = isinstance(node_map[key], list) - break - elif isinstance(node_map, list): - is_range_part = True - node_map = {DEFAULT_NTYPE: node_map} - else: - is_range_part = False - if isinstance(edge_map, list): - edge_map = {DEFAULT_ETYPE: edge_map} - - ntypes = {DEFAULT_NTYPE: 0} - etypes = {DEFAULT_ETYPE: 0} - if "ntypes" in part_metadata: - ntypes = part_metadata["ntypes"] - if "etypes" in part_metadata: - etypes = part_metadata["etypes"] - - if isinstance(node_map, dict): - for key in node_map: - assert key in ntypes, "The node type {} is invalid".format(key) - if isinstance(edge_map, dict): - for key in edge_map: - assert key in etypes, "The edge type {} is invalid".format(key) - - if not is_range_part: - raise TypeError("Only RangePartitionBook is supported currently.") - - node_map = _get_part_ranges(node_map) - edge_map = _get_part_ranges(edge_map) - - # Format dtype of node/edge map if dtype is specified. - def _format_node_edge_map(part_metadata, map_type, data): - key = f"{map_type}_map_dtype" - if key not in part_metadata: - return data - dtype = part_metadata[key] - assert dtype in ["int32", "int64"], ( - f"The {map_type} map dtype should be either int32 or int64, " - f"but got {dtype}." - ) - for key in data: - data[key] = data[key].astype(dtype) - return data - - node_map = _format_node_edge_map(part_metadata, "node", node_map) - edge_map = _format_node_edge_map(part_metadata, "edge", edge_map) - - # Sort the node/edge maps by the node/edge type ID. - node_map = dict(sorted(node_map.items(), key=lambda x: ntypes[x[0]])) - edge_map = dict(sorted(edge_map.items(), key=lambda x: etypes[x[0]])) - - def _assert_is_sorted(id_map): - id_ranges = np.array(list(id_map.values())) - ids = [] - for i in range(num_parts): - ids.append(id_ranges[:, i, :]) - ids = np.array(ids).flatten() - assert np.all( - ids[:-1] <= ids[1:] - ), f"The node/edge map is not sorted: {ids}" - - _assert_is_sorted(node_map) - _assert_is_sorted(edge_map) - - return ( - RangePartitionBook( - part_id, num_parts, node_map, edge_map, ntypes, etypes - ), - part_metadata["graph_name"], - ntypes, - etypes, - ) - - def load_partition_book(part_config, part_id): """Load a graph partition book from the partition config file. @@ -1414,41 +1307,31 @@ def get_homogeneous(g, balance_ntypes): part_dir = os.path.join(out_path, "part" + str(part_id)) node_feat_file = os.path.join(part_dir, "node_feat.dgl") edge_feat_file = os.path.join(part_dir, "edge_feat.dgl") - - os.makedirs(part_dir, mode=0o775, exist_ok=True) - save_tensors(node_feat_file, node_feats) - save_tensors(edge_feat_file, edge_feats) - + part_graph_file = os.path.join(part_dir, "graph.dgl") part_metadata["part-{}".format(part_id)] = { "node_feats": os.path.relpath(node_feat_file, out_path), "edge_feats": os.path.relpath(edge_feat_file, out_path), + "part_graph": os.path.relpath(part_graph_file, out_path), } + os.makedirs(part_dir, mode=0o775, exist_ok=True) + save_tensors(node_feat_file, node_feats) + save_tensors(edge_feat_file, edge_feats) + sort_etypes = len(g.etypes) > 1 - if not use_graphbolt: - part_graph_file = os.path.join(part_dir, "graph.dgl") - part_metadata["part-{}".format(part_id)][ - "part_graph" - ] = os.path.relpath(part_graph_file, out_path) - _save_graphs( - part_graph_file, - [part], - formats=graph_formats, - sort_etypes=sort_etypes, - ) - else: - part = _process_partitions([part], graph_formats, sort_etypes)[0] + _save_graphs( + part_graph_file, + [part], + formats=graph_formats, + sort_etypes=sort_etypes, + ) + print( + "Save partitions: {:.3f} seconds, peak memory: {:.3f} GB".format( + time.time() - start, get_peak_mem() + ) + ) part_config = os.path.join(out_path, graph_name + ".json") - if use_graphbolt: - kwargs["graph_formats"] = graph_formats - _dgl_partition_to_graphbolt( - part_config, - parts=parts, - part_meta=part_metadata, - **kwargs, - ) - else: - _dump_part_config(part_config, part_metadata) + _dump_part_config(part_config, part_metadata) num_cuts = sim_g.num_edges() - tot_num_inner_edges if num_parts == 1: @@ -1459,11 +1342,12 @@ def get_homogeneous(g, balance_ntypes): ) ) - print( - "Save partitions: {:.3f} seconds, peak memory: {:.3f} GB".format( - time.time() - start, get_peak_mem() + if use_graphbolt: + kwargs["graph_formats"] = graph_formats + dgl_partition_to_graphbolt( + part_config, + **kwargs, ) - ) if return_mapping: return orig_nids, orig_eids @@ -1511,21 +1395,8 @@ def init_type_per_edge(graph, gpb): return etype_ids -def _load_parts(part_config, part_id, parts): - """load parts from variable or dist.""" - if parts is None: - graph, _, _, _, _, _, _ = load_partition( - part_config, part_id, load_feats=False - ) - else: - graph = parts[part_id] - return graph - - def gb_convert_single_dgl_partition( part_id, - parts, - part_meta, graph_formats, part_config, store_eids, @@ -1558,18 +1429,14 @@ def gb_convert_single_dgl_partition( "Running in debug mode which means all attributes of DGL partitions" " will be saved to the new format." ) - if part_meta is None: - part_meta = _load_part_config(part_config) - num_parts = part_meta["num_parts"] - graph = _load_parts(part_config, part_id, parts) + part_meta = _load_part_config(part_config) + num_parts = part_meta["num_parts"] - gpb, _, ntypes, etypes = ( - load_partition_book(part_config, part_id) - if part_meta is None - else _load_partition_book_from_metadata(part_meta, part_id) + graph, _, _, gpb, _, _, _ = load_partition( + part_config, part_id, load_feats=False ) - + _, _, ntypes, etypes = load_partition_book(part_config, part_id) is_homo = is_homogeneous(ntypes, etypes) node_type_to_id = ( None if is_homo else {ntype: ntid for ntid, ntype in enumerate(ntypes)} @@ -1675,12 +1542,12 @@ def gb_convert_single_dgl_partition( node_type_to_id=node_type_to_id, edge_type_to_id=edge_type_to_id, ) - orig_feats_path = os.path.join( + orig_graph_path = os.path.join( os.path.dirname(part_config), - part_meta[f"part-{part_id}"]["node_feats"], + part_meta[f"part-{part_id}"]["part_graph"], ) csc_graph_path = os.path.join( - os.path.dirname(orig_feats_path), "fused_csc_sampling_graph.pt" + os.path.dirname(orig_graph_path), "fused_csc_sampling_graph.pt" ) torch.save(csc_graph, csc_graph_path) @@ -1688,106 +1555,6 @@ def gb_convert_single_dgl_partition( # Update graph path. -def _convert_partition_to_graphbolt( - part_meta, - graph_formats, - part_config, - store_eids, - store_inner_node, - store_inner_edge, - n_jobs, - num_parts, - parts=None, -): - # [Rui] DGL partitions are always saved as homogeneous graphs even though - # the original graph is heterogeneous. But heterogeneous information like - # node/edge types are saved as node/edge data alongside with partitions. - # What needs more attention is that due to the existence of HALO nodes in - # each partition, the local node IDs are not sorted according to the node - # types. So we fail to assign ``node_type_offset`` as required by GraphBolt. - # But this is not a problem since such information is not used in sampling. - # We can simply pass None to it. - - # Iterate over partitions. - convert_with_format = partial( - gb_convert_single_dgl_partition, - parts=parts, - part_meta=part_meta, - graph_formats=graph_formats, - part_config=part_config, - store_eids=store_eids, - store_inner_node=store_inner_node, - store_inner_edge=store_inner_edge, - ) - # Need to create entirely new interpreters, because we call C++ downstream - # See https://docs.python.org/3.12/library/multiprocessing.html#contexts-and-start-methods - # and https://pybind11.readthedocs.io/en/stable/advanced/misc.html#global-interpreter-lock-gil - rel_path_results = [] - if n_jobs > 1 and num_parts > 1: - mp_ctx = mp.get_context("spawn") - with concurrent.futures.ProcessPoolExecutor( # pylint: disable=unexpected-keyword-arg - max_workers=min(num_parts, n_jobs), - mp_context=mp_ctx, - ) as executor: - futures = [] - for part_id in range(num_parts): - futures.append(executor.submit(convert_with_format, part_id)) - - for part_id in range(num_parts): - rel_path_results.append(futures[part_id].result()) - else: - # If running single-threaded, avoid spawning new interpreter, which is slow - for part_id in range(num_parts): - rel_path_results.append(convert_with_format(part_id)) - - for part_id in range(num_parts): - # Update graph path. - part_meta[f"part-{part_id}"]["part_graph_graphbolt"] = rel_path_results[ - part_id - ] - - # Save dtype info into partition config. - # [TODO][Rui] Always use int64_t for node/edge IDs in GraphBolt. See more - # details in #7175. - part_meta["node_map_dtype"] = "int64" - part_meta["edge_map_dtype"] = "int64" - - _dump_part_config(part_config, part_meta) - print(f"Converted partitions to GraphBolt format into {part_config}") - - -def _dgl_partition_to_graphbolt( - part_config, - part_meta, - parts, - *, - store_eids=True, - store_inner_node=False, - store_inner_edge=False, - graph_formats=None, - n_jobs=1, -): - debug_mode = "DGL_DIST_DEBUG" in os.environ - if debug_mode: - dgl_warning( - "Running in debug mode which means all attributes of DGL partitions" - " will be saved to the new format." - ) - new_part_meta = copy.deepcopy(part_meta) - num_parts = part_meta["num_parts"] - _convert_partition_to_graphbolt( - new_part_meta, - graph_formats, - part_config, - store_eids, - store_inner_node, - store_inner_edge, - n_jobs, - num_parts, - parts=parts, - ) - - def dgl_partition_to_graphbolt( part_config, *, @@ -1836,13 +1603,57 @@ def dgl_partition_to_graphbolt( part_meta = _load_part_config(part_config) new_part_meta = copy.deepcopy(part_meta) num_parts = part_meta["num_parts"] - _convert_partition_to_graphbolt( - new_part_meta, - graph_formats, - part_config, - store_eids, - store_inner_node, - store_inner_edge, - n_jobs, - num_parts, + + # [Rui] DGL partitions are always saved as homogeneous graphs even though + # the original graph is heterogeneous. But heterogeneous information like + # node/edge types are saved as node/edge data alongside with partitions. + # What needs more attention is that due to the existence of HALO nodes in + # each partition, the local node IDs are not sorted according to the node + # types. So we fail to assign ``node_type_offset`` as required by GraphBolt. + # But this is not a problem since such information is not used in sampling. + # We can simply pass None to it. + + # Iterate over partitions. + convert_with_format = partial( + gb_convert_single_dgl_partition, + graph_formats=graph_formats, + part_config=part_config, + store_eids=store_eids, + store_inner_node=store_inner_node, + store_inner_edge=store_inner_edge, ) + # Need to create entirely new interpreters, because we call C++ downstream + # See https://docs.python.org/3.12/library/multiprocessing.html#contexts-and-start-methods + # and https://pybind11.readthedocs.io/en/stable/advanced/misc.html#global-interpreter-lock-gil + rel_path_results = [] + if n_jobs > 1 and num_parts > 1: + mp_ctx = mp.get_context("spawn") + with concurrent.futures.ProcessPoolExecutor( # pylint: disable=unexpected-keyword-arg + max_workers=min(num_parts, n_jobs), + mp_context=mp_ctx, + ) as executor: + futures = [] + for part_id in range(num_parts): + futures.append(executor.submit(convert_with_format, part_id)) + + for part_id in range(num_parts): + rel_path_results.append(futures[part_id].result()) + else: + # If running single-threaded, avoid spawning new interpreter, which is slow + for part_id in range(num_parts): + rel_path_results.append(convert_with_format(part_id)) + + for part_id in range(num_parts): + # Update graph path. + new_part_meta[f"part-{part_id}"][ + "part_graph_graphbolt" + ] = rel_path_results[part_id] + + # Save dtype info into partition config. + # [TODO][Rui] Always use int64_t for node/edge IDs in GraphBolt. See more + # details in #7175. + new_part_meta["node_map_dtype"] = "int64" + new_part_meta["edge_map_dtype"] = "int64" + + _dump_part_config(part_config, new_part_meta) + print(f"Converted partitions to GraphBolt format into {part_config}") \ No newline at end of file From d1beec22e25d9ab20f8dceb1f0041af6058c897c Mon Sep 17 00:00:00 2001 From: Ubuntu <2649624957@qq.com> Date: Mon, 26 Aug 2024 08:32:01 +0000 Subject: [PATCH 15/39] change test_partition --- python/dgl/distributed/partition.py | 2 +- tests/distributed/test_partition.py | 19 ++++++++++++------- 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/python/dgl/distributed/partition.py b/python/dgl/distributed/partition.py index 081001ca86ac..3965adac111d 100644 --- a/python/dgl/distributed/partition.py +++ b/python/dgl/distributed/partition.py @@ -1656,4 +1656,4 @@ def dgl_partition_to_graphbolt( new_part_meta["edge_map_dtype"] = "int64" _dump_part_config(part_config, new_part_meta) - print(f"Converted partitions to GraphBolt format into {part_config}") \ No newline at end of file + print(f"Converted partitions to GraphBolt format into {part_config}") diff --git a/tests/distributed/test_partition.py b/tests/distributed/test_partition.py index ab877c19f6f7..e360ee94b158 100644 --- a/tests/distributed/test_partition.py +++ b/tests/distributed/test_partition.py @@ -5,7 +5,6 @@ import dgl import dgl.backend as F -import dgl.sparse as dglsp import numpy as np import pytest import torch as th @@ -1323,9 +1322,12 @@ def _verify_mapping( if store_eids: # Verify the mapping between the reshuffled IDs and the original IDs. - indices, indptr = part_g.indices, part_g.csc_indptr - adj_matrix = dglsp.from_csc(indptr, indices) - part_src_ids, part_dst_ids = adj_matrix.coo() + indices, indptr = part_g.indices.numpy(), part_g.csc_indptr.numpy() + csc_matrix = spsp.csc_matrix( + (np.ones(len(part_g.indices), dtype=float), indices, indptr) + ) + coo_matrix = csc_matrix.tocoo() + part_src_ids, part_dst_ids = th.tensor(coo_matrix.row), th.tensor(coo_matrix.col) part_src_ids = F.gather_row( part_g.node_attributes[dgl.NID], part_src_ids ) @@ -1549,9 +1551,12 @@ def _verify_graphbolt_mapping_IDs( """ # Verify the mapping between the reshuffled IDs and the original IDs. # These are partition-local IDs. - indices, indptr = part_g.indices, part_g.csc_indptr - csc_matrix = dglsp.from_csc(indptr, indices) - part_src_ids, part_dst_ids = csc_matrix.coo() + indices, indptr = part_g.indices.numpy(), part_g.csc_indptr.numpy() + csc_matrix = spsp.csc_matrix( + (np.ones(len(part_g.indices), dtype=float), indices, indptr) + ) + coo_matrix = csc_matrix.tocoo() + part_src_ids, part_dst_ids = th.tensor(coo_matrix.row), th.tensor(coo_matrix.col) # These are reshuffled global homogeneous IDs. part_src_ids = F.gather_row(part_g.node_attributes[dgl.NID], part_src_ids) part_dst_ids = F.gather_row(part_g.node_attributes[dgl.NID], part_dst_ids) From 5d3dc7837fb77f738d2ba582eeb0a6e76cf8b987 Mon Sep 17 00:00:00 2001 From: Ubuntu <2649624957@qq.com> Date: Mon, 26 Aug 2024 08:34:47 +0000 Subject: [PATCH 16/39] change format --- tests/distributed/test_partition.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tests/distributed/test_partition.py b/tests/distributed/test_partition.py index e360ee94b158..b4d18c66b5b5 100644 --- a/tests/distributed/test_partition.py +++ b/tests/distributed/test_partition.py @@ -1327,7 +1327,9 @@ def _verify_mapping( (np.ones(len(part_g.indices), dtype=float), indices, indptr) ) coo_matrix = csc_matrix.tocoo() - part_src_ids, part_dst_ids = th.tensor(coo_matrix.row), th.tensor(coo_matrix.col) + part_src_ids, part_dst_ids = th.tensor(coo_matrix.row), th.tensor( + coo_matrix.col + ) part_src_ids = F.gather_row( part_g.node_attributes[dgl.NID], part_src_ids ) @@ -1556,7 +1558,9 @@ def _verify_graphbolt_mapping_IDs( (np.ones(len(part_g.indices), dtype=float), indices, indptr) ) coo_matrix = csc_matrix.tocoo() - part_src_ids, part_dst_ids = th.tensor(coo_matrix.row), th.tensor(coo_matrix.col) + part_src_ids, part_dst_ids = th.tensor(coo_matrix.row), th.tensor( + coo_matrix.col + ) # These are reshuffled global homogeneous IDs. part_src_ids = F.gather_row(part_g.node_attributes[dgl.NID], part_src_ids) part_dst_ids = F.gather_row(part_g.node_attributes[dgl.NID], part_dst_ids) From 1c9638076a54e9dd0486c2561bc85a226c45e3ef Mon Sep 17 00:00:00 2001 From: Ubuntu <2649624957@qq.com> Date: Mon, 26 Aug 2024 08:49:04 +0000 Subject: [PATCH 17/39] change issues --- python/dgl/distributed/partition.py | 1 + 1 file changed, 1 insertion(+) diff --git a/python/dgl/distributed/partition.py b/python/dgl/distributed/partition.py index 3ba410f5100d..2d3029faa835 100644 --- a/python/dgl/distributed/partition.py +++ b/python/dgl/distributed/partition.py @@ -130,6 +130,7 @@ def _get_inner_node_mask(graph, ntype_id, use_graphbolt=False): def _get_inner_edge_mask(graph, etype_id, use_graphbolt=False): edata = graph.edge_attributes if use_graphbolt else graph.edata + assert "inner_edge" in edata, "'inner_edge' is not edges\' data" etype = ( graph.type_per_edge if use_graphbolt From b83f2bf7adf2fe074eef8647930db957e107e815 Mon Sep 17 00:00:00 2001 From: Ubuntu <2649624957@qq.com> Date: Mon, 26 Aug 2024 08:53:01 +0000 Subject: [PATCH 18/39] new --- python/dgl/distributed/partition.py | 1 + 1 file changed, 1 insertion(+) diff --git a/python/dgl/distributed/partition.py b/python/dgl/distributed/partition.py index 3965adac111d..9f16e3cbc10e 100644 --- a/python/dgl/distributed/partition.py +++ b/python/dgl/distributed/partition.py @@ -123,6 +123,7 @@ def _get_inner_node_mask(graph, ntype_id, use_graphbolt=False): def _get_inner_edge_mask(graph, etype_id, use_graphbolt=False): edata = graph.edge_attributes if use_graphbolt else graph.edata + assert "inner_edge" in edata, "'inner_edge' is not edges' data" etype = ( graph.type_per_edge if use_graphbolt From 6d4c2e5a8dbd0a406fc55d03c54a85151d0aa3ba Mon Sep 17 00:00:00 2001 From: Ubuntu <2649624957@qq.com> Date: Tue, 27 Aug 2024 10:37:05 +0000 Subject: [PATCH 19/39] change format --- python/dgl/distributed/partition.py | 393 +++++++++++++++++++++------- tests/distributed/test_partition.py | 248 +++++++++--------- 2 files changed, 429 insertions(+), 212 deletions(-) diff --git a/python/dgl/distributed/partition.py b/python/dgl/distributed/partition.py index 9f16e3cbc10e..cdbfbd240c1d 100644 --- a/python/dgl/distributed/partition.py +++ b/python/dgl/distributed/partition.py @@ -88,7 +88,7 @@ def _dump_part_config(part_config, part_metadata): json.dump(part_metadata, outfile, sort_keys=False, indent=4) -def _save_graphs(filename, g_list, formats=None, sort_etypes=False): +def _process_partitions(g_list, formats=None, sort_etypes=False): """Preprocess partitions before saving: 1. format data types. 2. sort csc/csr by tag. @@ -106,12 +106,19 @@ def _save_graphs(filename, g_list, formats=None, sort_etypes=False): g = sort_csr_by_tag(g, tag=g.edata[ETYPE], tag_type="edge") if "csc" in formats: g = sort_csc_by_tag(g, tag=g.edata[ETYPE], tag_type="edge") + return g_list + + +def _save_dgl_graphs(filename, g_list, formats=None, sort_etypes=False): + g_list = _process_partitions( + g_list, formats=formats, sort_etypes=sort_etypes + ) save_graphs(filename, g_list, formats=formats) def _get_inner_node_mask(graph, ntype_id, use_graphbolt=False): ndata = graph.node_attributes if use_graphbolt else graph.ndata - assert "inner_node" in ndata, '"inner_node" is not nodes\' data' + assert "inner_node" in ndata, '"inner_node" is not in nodes\' data' if NTYPE in ndata: dtype = F.dtype(ndata["inner_node"]) return ( @@ -123,7 +130,6 @@ def _get_inner_node_mask(graph, ntype_id, use_graphbolt=False): def _get_inner_edge_mask(graph, etype_id, use_graphbolt=False): edata = graph.edge_attributes if use_graphbolt else graph.edata - assert "inner_edge" in edata, "'inner_edge' is not edges' data" etype = ( graph.type_per_edge if use_graphbolt @@ -318,9 +324,10 @@ def load_partition(part_config, part_id, load_feats=True, use_graphbolt=False): "part-{}".format(part_id) in part_metadata ), "part-{} does not exist".format(part_id) part_files = part_metadata["part-{}".format(part_id)] - part_graph_field = "part_graph" if use_graphbolt: part_graph_field = "part_graph_graphbolt" + else: + part_graph_field = "part_graph" assert ( part_graph_field in part_files ), f"the partition does not contain graph structure: {part_graph_field}" @@ -447,6 +454,105 @@ def load_partition_feats( return node_feats, edge_feats +def _load_partition_book_from_metadata(part_metadata, part_id): + assert "num_parts" in part_metadata, "num_parts does not exist." + assert ( + part_metadata["num_parts"] > part_id + ), "part {} is out of range (#parts: {})".format( + part_id, part_metadata["num_parts"] + ) + num_parts = part_metadata["num_parts"] + assert ( + "num_nodes" in part_metadata + ), "cannot get the number of nodes of the global graph." + assert ( + "num_edges" in part_metadata + ), "cannot get the number of edges of the global graph." + assert "node_map" in part_metadata, "cannot get the node map." + assert "edge_map" in part_metadata, "cannot get the edge map." + assert "graph_name" in part_metadata, "cannot get the graph name" + + # If this is a range partitioning, node_map actually stores a list, whose elements + # indicate the boundary of range partitioning. Otherwise, node_map stores a filename + # that contains node map in a NumPy array. + node_map = part_metadata["node_map"] + edge_map = part_metadata["edge_map"] + if isinstance(node_map, dict): + for key in node_map: + is_range_part = isinstance(node_map[key], list) + break + elif isinstance(node_map, list): + is_range_part = True + node_map = {DEFAULT_NTYPE: node_map} + else: + is_range_part = False + if isinstance(edge_map, list): + edge_map = {DEFAULT_ETYPE: edge_map} + + ntypes = {DEFAULT_NTYPE: 0} + etypes = {DEFAULT_ETYPE: 0} + if "ntypes" in part_metadata: + ntypes = part_metadata["ntypes"] + if "etypes" in part_metadata: + etypes = part_metadata["etypes"] + + if isinstance(node_map, dict): + for key in node_map: + assert key in ntypes, "The node type {} is invalid".format(key) + if isinstance(edge_map, dict): + for key in edge_map: + assert key in etypes, "The edge type {} is invalid".format(key) + + if not is_range_part: + raise TypeError("Only RangePartitionBook is supported currently.") + + node_map = _get_part_ranges(node_map) + edge_map = _get_part_ranges(edge_map) + + # Format dtype of node/edge map if dtype is specified. + def _format_node_edge_map(part_metadata, map_type, data): + key = f"{map_type}_map_dtype" + if key not in part_metadata: + return data + dtype = part_metadata[key] + assert dtype in ["int32", "int64"], ( + f"The {map_type} map dtype should be either int32 or int64, " + f"but got {dtype}." + ) + for key in data: + data[key] = data[key].astype(dtype) + return data + + node_map = _format_node_edge_map(part_metadata, "node", node_map) + edge_map = _format_node_edge_map(part_metadata, "edge", edge_map) + + # Sort the node/edge maps by the node/edge type ID. + node_map = dict(sorted(node_map.items(), key=lambda x: ntypes[x[0]])) + edge_map = dict(sorted(edge_map.items(), key=lambda x: etypes[x[0]])) + + def _assert_is_sorted(id_map): + id_ranges = np.array(list(id_map.values())) + ids = [] + for i in range(num_parts): + ids.append(id_ranges[:, i, :]) + ids = np.array(ids).flatten() + assert np.all( + ids[:-1] <= ids[1:] + ), f"The node/edge map is not sorted: {ids}" + + _assert_is_sorted(node_map) + _assert_is_sorted(edge_map) + + return ( + RangePartitionBook( + part_id, num_parts, node_map, edge_map, ntypes, etypes + ), + part_metadata["graph_name"], + ntypes, + etypes, + ) + + def load_partition_book(part_config, part_id): """Load a graph partition book from the partition config file. @@ -1186,6 +1292,7 @@ def get_homogeneous(g, balance_ntypes): "ntypes": ntypes, "etypes": etypes, } + part_config = os.path.join(out_path, graph_name + ".json") for part_id in range(num_parts): part = parts[part_id] @@ -1308,30 +1415,67 @@ def get_homogeneous(g, balance_ntypes): part_dir = os.path.join(out_path, "part" + str(part_id)) node_feat_file = os.path.join(part_dir, "node_feat.dgl") edge_feat_file = os.path.join(part_dir, "edge_feat.dgl") - part_graph_file = os.path.join(part_dir, "graph.dgl") - part_metadata["part-{}".format(part_id)] = { - "node_feats": os.path.relpath(node_feat_file, out_path), - "edge_feats": os.path.relpath(edge_feat_file, out_path), - "part_graph": os.path.relpath(part_graph_file, out_path), - } + os.makedirs(part_dir, mode=0o775, exist_ok=True) save_tensors(node_feat_file, node_feats) save_tensors(edge_feat_file, edge_feats) + part_metadata["part-{}".format(part_id)] = { + "node_feats": os.path.relpath(node_feat_file, out_path), + "edge_feats": os.path.relpath(edge_feat_file, out_path), + } sort_etypes = len(g.etypes) > 1 - _save_graphs( - part_graph_file, - [part], - formats=graph_formats, - sort_etypes=sort_etypes, - ) - print( - "Save partitions: {:.3f} seconds, peak memory: {:.3f} GB".format( - time.time() - start, get_peak_mem() - ) - ) + if use_graphbolt: + + def _partition_to_graphbolt( + part_config, + parts, + part_i, + part_metadata, + *, + store_eids=True, + store_inner_node=False, + store_inner_edge=False, + graph_formats=None, + ): + rel_path_result = gb_convert_single_dgl_partition( + part_i, + parts, + part_metadata, + part_config=part_config, + store_eids=store_eids, + store_inner_edge=store_inner_edge, + store_inner_node=store_inner_node, + graph_formats=graph_formats, + ) + part_metadata[f"part-{part_i}"][ + "part_graph_graphbolt" + ] = rel_path_result + + part = _process_partitions([part], graph_formats, sort_etypes)[0] + # save FusedCSCSamplingGraph + kwargs["graph_formats"] = graph_formats + kwargs.pop("n_jobs", None) + _partition_to_graphbolt( + part_i=part_id, + part_config=part_config, + part_metadata=part_metadata, + parts=parts, + **kwargs, + ) + else: + part_graph_file = os.path.join(part_dir, "graph.dgl") + part_metadata["part-{}".format(part_id)][ + "part_graph" + ] = os.path.relpath(part_graph_file, out_path) + # save DGLGraph + _save_dgl_graphs( + part_graph_file, + [part], + formats=graph_formats, + sort_etypes=sort_etypes, + ) - part_config = os.path.join(out_path, graph_name + ".json") _dump_part_config(part_config, part_metadata) num_cuts = sim_g.num_edges() - tot_num_inner_edges @@ -1343,12 +1487,11 @@ def get_homogeneous(g, balance_ntypes): ) ) - if use_graphbolt: - kwargs["graph_formats"] = graph_formats - dgl_partition_to_graphbolt( - part_config, - **kwargs, + print( + "Save partitions: {:.3f} seconds, peak memory: {:.3f} GB".format( + time.time() - start, get_peak_mem() ) + ) if return_mapping: return orig_nids, orig_eids @@ -1396,8 +1539,21 @@ def init_type_per_edge(graph, gpb): return etype_ids +def _load_parts(part_config, part_id, parts): + """load parts from variable or dist.""" + if parts is None: + graph, _, _, _, _, _, _ = load_partition( + part_config, part_id, load_feats=False + ) + else: + graph = parts[part_id] + return graph + + def gb_convert_single_dgl_partition( part_id, + parts, + part_meta, graph_formats, part_config, store_eids, @@ -1430,14 +1586,18 @@ def gb_convert_single_dgl_partition( "Running in debug mode which means all attributes of DGL partitions" " will be saved to the new format." ) - - part_meta = _load_part_config(part_config) + if part_meta is None: + part_meta = _load_part_config(part_config) num_parts = part_meta["num_parts"] - graph, _, _, gpb, _, _, _ = load_partition( - part_config, part_id, load_feats=False + graph = _load_parts(part_config, part_id, parts) + + gpb, _, ntypes, etypes = ( + load_partition_book(part_config, part_id) + if part_meta is None + else _load_partition_book_from_metadata(part_meta, part_id) ) - _, _, ntypes, etypes = load_partition_book(part_config, part_id) + is_homo = is_homogeneous(ntypes, etypes) node_type_to_id = ( None if is_homo else {ntype: ntid for ntid, ntype in enumerate(ntypes)} @@ -1543,12 +1703,12 @@ def gb_convert_single_dgl_partition( node_type_to_id=node_type_to_id, edge_type_to_id=edge_type_to_id, ) - orig_graph_path = os.path.join( + orig_feats_path = os.path.join( os.path.dirname(part_config), - part_meta[f"part-{part_id}"]["part_graph"], + part_meta[f"part-{part_id}"]["node_feats"], ) csc_graph_path = os.path.join( - os.path.dirname(orig_graph_path), "fused_csc_sampling_graph.pt" + os.path.dirname(orig_feats_path), "fused_csc_sampling_graph.pt" ) torch.save(csc_graph, csc_graph_path) @@ -1556,6 +1716,106 @@ def gb_convert_single_dgl_partition( # Update graph path. +def _convert_partition_to_graphbolt( + part_meta, + graph_formats, + part_config, + store_eids, + store_inner_node, + store_inner_edge, + n_jobs, + num_parts, + parts=None, +): + # [Rui] DGL partitions are always saved as homogeneous graphs even though + # the original graph is heterogeneous. But heterogeneous information like + # node/edge types are saved as node/edge data alongside with partitions. + # What needs more attention is that due to the existence of HALO nodes in + # each partition, the local node IDs are not sorted according to the node + # types. So we fail to assign ``node_type_offset`` as required by GraphBolt. + # But this is not a problem since such information is not used in sampling. + # We can simply pass None to it. + + # Iterate over partitions. + convert_with_format = partial( + gb_convert_single_dgl_partition, + parts=parts, + part_meta=part_meta, + graph_formats=graph_formats, + part_config=part_config, + store_eids=store_eids, + store_inner_node=store_inner_node, + store_inner_edge=store_inner_edge, + ) + # Need to create entirely new interpreters, because we call C++ downstream + # See https://docs.python.org/3.12/library/multiprocessing.html#contexts-and-start-methods + # and https://pybind11.readthedocs.io/en/stable/advanced/misc.html#global-interpreter-lock-gil + rel_path_results = [] + if n_jobs > 1 and num_parts > 1: + mp_ctx = mp.get_context("spawn") + with concurrent.futures.ProcessPoolExecutor( # pylint: disable=unexpected-keyword-arg + max_workers=min(num_parts, n_jobs), + mp_context=mp_ctx, + ) as executor: + futures = [] + for part_id in range(num_parts): + futures.append(executor.submit(convert_with_format, part_id)) + + for part_id in range(num_parts): + rel_path_results.append(futures[part_id].result()) + else: + # If running single-threaded, avoid spawning new interpreter, which is slow + for part_id in range(num_parts): + rel_path_results.append(convert_with_format(part_id)) + + for part_id in range(num_parts): + # Update graph path. + part_meta[f"part-{part_id}"]["part_graph_graphbolt"] = rel_path_results[ + part_id + ] + + # Save dtype info into partition config. + # [TODO][Rui] Always use int64_t for node/edge IDs in GraphBolt. See more + # details in #7175. + part_meta["node_map_dtype"] = "int64" + part_meta["edge_map_dtype"] = "int64" + + return part_meta + + +def _dgl_partition_to_graphbolt( + part_config, + part_meta, + parts, + *, + store_eids=True, + store_inner_node=False, + store_inner_edge=False, + graph_formats=None, + n_jobs=1, +): + debug_mode = "DGL_DIST_DEBUG" in os.environ + if debug_mode: + dgl_warning( + "Running in debug mode which means all attributes of DGL partitions" + " will be saved to the new format." + ) + new_part_meta = copy.deepcopy(part_meta) + num_parts = part_meta["num_parts"] + part_meta = _convert_partition_to_graphbolt( + new_part_meta, + graph_formats, + part_config, + store_eids, + store_inner_node, + store_inner_edge, + n_jobs, + num_parts, + parts=parts, + ) + return part_meta + + def dgl_partition_to_graphbolt( part_config, *, @@ -1604,57 +1864,14 @@ def dgl_partition_to_graphbolt( part_meta = _load_part_config(part_config) new_part_meta = copy.deepcopy(part_meta) num_parts = part_meta["num_parts"] - - # [Rui] DGL partitions are always saved as homogeneous graphs even though - # the original graph is heterogeneous. But heterogeneous information like - # node/edge types are saved as node/edge data alongside with partitions. - # What needs more attention is that due to the existence of HALO nodes in - # each partition, the local node IDs are not sorted according to the node - # types. So we fail to assign ``node_type_offset`` as required by GraphBolt. - # But this is not a problem since such information is not used in sampling. - # We can simply pass None to it. - - # Iterate over partitions. - convert_with_format = partial( - gb_convert_single_dgl_partition, - graph_formats=graph_formats, - part_config=part_config, - store_eids=store_eids, - store_inner_node=store_inner_node, - store_inner_edge=store_inner_edge, + part_meta = _convert_partition_to_graphbolt( + new_part_meta, + graph_formats, + part_config, + store_eids, + store_inner_node, + store_inner_edge, + n_jobs, + num_parts, ) - # Need to create entirely new interpreters, because we call C++ downstream - # See https://docs.python.org/3.12/library/multiprocessing.html#contexts-and-start-methods - # and https://pybind11.readthedocs.io/en/stable/advanced/misc.html#global-interpreter-lock-gil - rel_path_results = [] - if n_jobs > 1 and num_parts > 1: - mp_ctx = mp.get_context("spawn") - with concurrent.futures.ProcessPoolExecutor( # pylint: disable=unexpected-keyword-arg - max_workers=min(num_parts, n_jobs), - mp_context=mp_ctx, - ) as executor: - futures = [] - for part_id in range(num_parts): - futures.append(executor.submit(convert_with_format, part_id)) - - for part_id in range(num_parts): - rel_path_results.append(futures[part_id].result()) - else: - # If running single-threaded, avoid spawning new interpreter, which is slow - for part_id in range(num_parts): - rel_path_results.append(convert_with_format(part_id)) - - for part_id in range(num_parts): - # Update graph path. - new_part_meta[f"part-{part_id}"][ - "part_graph_graphbolt" - ] = rel_path_results[part_id] - - # Save dtype info into partition config. - # [TODO][Rui] Always use int64_t for node/edge IDs in GraphBolt. See more - # details in #7175. - new_part_meta["node_map_dtype"] = "int64" - new_part_meta["edge_map_dtype"] = "int64" - - _dump_part_config(part_config, new_part_meta) - print(f"Converted partitions to GraphBolt format into {part_config}") + _dump_part_config(part_config, part_meta) diff --git a/tests/distributed/test_partition.py b/tests/distributed/test_partition.py index b4d18c66b5b5..daa46deac80d 100644 --- a/tests/distributed/test_partition.py +++ b/tests/distributed/test_partition.py @@ -9,7 +9,6 @@ import pytest import torch as th from dgl import function as fn -from dgl.base import NTYPE from dgl.distributed import ( dgl_partition_to_graphbolt, load_partition, @@ -89,18 +88,36 @@ def create_random_hetero(): return dgl.heterograph(edges, num_nodes) +def _verify_augument_for_graphbolt( + parts, store_inner_node, store_inner_edge, store_eids, debug_mode +): + if not debug_mode: + for part in parts: + if store_inner_edge: + assert "inner_edge" in part.edge_attributes + else: + assert "inner_edge" not in part.edge_attributes + if store_inner_node: + assert "inner_node" in part.node_attributes + else: + assert "inner_node" not in part.node_attributes + if store_eids: + assert dgl.EID in part.edge_attributes + else: + assert dgl.EID not in part.edge_attributes + + def _verify_hetero_graph_elements_number( g, parts, - store_inner_node, store_inner_edge, use_graphbolt, + debug_mode, ): """ check list: make sure edge type are correct. make sure the number of nodes in each node type are correct. - make sure the argument store_inner_edge and store_inner_node work. """ num_nodes = {ntype: 0 for ntype in g.ntypes} num_edges = {etype: 0 for etype in g.canonical_etypes} @@ -108,7 +125,7 @@ def _verify_hetero_graph_elements_number( edata = part.edge_attributes if use_graphbolt else part.edata if dgl.ETYPE in edata: assert len(g.canonical_etypes) == len(F.unique(edata[dgl.ETYPE])) - if not use_graphbolt: + if debug_mode or not use_graphbolt: for ntype in g.ntypes: ntype_id = g.get_ntype_id(ntype) inner_node_mask = _get_inner_node_mask( @@ -126,7 +143,7 @@ def _verify_hetero_graph_elements_number( num_edges[etype] += num_inner_edges # Verify the number of nodes are correct. - if not use_graphbolt: + if debug_mode or not use_graphbolt: for ntype in g.ntypes: print( "node {}: {}, {}".format( @@ -134,8 +151,6 @@ def _verify_hetero_graph_elements_number( ) ) assert g.num_nodes(ntype) == num_nodes[ntype] - elif store_inner_node: - assert "inner_node" in parts[0].node_attributes # Verify the number of edges are correct. if store_inner_edge or not use_graphbolt: for etype in g.canonical_etypes: @@ -145,8 +160,6 @@ def _verify_hetero_graph_elements_number( ) ) assert g.num_edges(etype) == num_edges[etype] - elif not store_inner_edge: - assert "inner_edge" not in parts[0].edge_attributes def _verify_hetero_graph_attributes( @@ -162,7 +175,6 @@ def _verify_hetero_graph_attributes( make sure inner nodes have Ids fall into a range. make sure all nodes is included. make sure all edges is included. - make sure store_eids performs its function. """ if store_eids or not use_graphbolt: nids = {ntype: [] for ntype in g.ntypes} @@ -224,9 +236,6 @@ def _verify_hetero_graph_attributes( uniq_ids = F.unique(eids_type) # We should get all nodes. assert len(uniq_ids) == g.num_edges(etype) - # TODO(zhengda) this doesn't check 'part_id' - elif not store_eids: - assert dgl.EID not in parts[0].edge_attributes def _verify_hetero_graph( @@ -234,15 +243,15 @@ def _verify_hetero_graph( parts, use_graphbolt=False, store_eids=False, - store_inner_node=False, store_inner_edge=False, + debug_mode=False, ): _verify_hetero_graph_elements_number( g, parts, store_inner_edge=store_inner_edge, - store_inner_node=store_inner_node, use_graphbolt=use_graphbolt, + debug_mode=debug_mode, ) _verify_hetero_graph_attributes( g, @@ -445,7 +454,6 @@ def check_hetero_partition( edge_feats[_etype_tuple_to_str(test_etype) + "/labels"] ) _verify_hetero_graph(hg, parts) - shuffled_labels = F.asnumpy(F.cat(shuffled_labels, 0)) shuffled_elabels = F.asnumpy(F.cat(shuffled_elabels, 0)) orig_labels = np.zeros(shuffled_labels.shape, dtype=shuffled_labels.dtype) @@ -912,8 +920,6 @@ def test_dgl_partition_to_graphbolt_homo( orig_g.ndata["inner_node"], new_g.node_attributes["inner_node"], ) - else: - assert "inner_node" not in new_g.node_attributes if store_eids or debug_mode: assert orig_g.edata[dgl.EID].dtype == th.int64 assert new_g.edge_attributes[dgl.EID].dtype == th.int64 @@ -921,8 +927,6 @@ def test_dgl_partition_to_graphbolt_homo( orig_g.edata[dgl.EID][orig_eids], new_g.edge_attributes[dgl.EID], ) - else: - assert dgl.EID not in new_g.edge_attributes if store_inner_edge or debug_mode: assert orig_g.edata["inner_edge"].dtype == th.uint8 assert new_g.edge_attributes["inner_edge"].dtype == th.uint8 @@ -930,8 +934,6 @@ def test_dgl_partition_to_graphbolt_homo( orig_g.edata["inner_edge"][orig_eids], new_g.edge_attributes["inner_edge"], ) - else: - assert "inner_edge" not in new_g.edge_attributes assert new_g.type_per_edge is None assert new_g.node_type_to_id is None assert new_g.edge_type_to_id is None @@ -1038,16 +1040,12 @@ def test_dgl_partition_to_graphbolt_hetero( orig_g.ndata["inner_node"], new_g.node_attributes["inner_node"], ) - else: - assert "inner_node" not in new_g.node_attributes if debug_mode: assert orig_g.ndata[dgl.NTYPE].dtype == th.int32 assert new_g.node_attributes[dgl.NTYPE].dtype == th.int8 assert th.equal( orig_g.ndata[dgl.NTYPE], new_g.node_attributes[dgl.NTYPE] ) - else: - assert dgl.NTYPE not in new_g.node_attributes if store_eids or debug_mode: assert orig_g.edata[dgl.EID].dtype == th.int64 assert new_g.edge_attributes[dgl.EID].dtype == th.int64 @@ -1055,8 +1053,6 @@ def test_dgl_partition_to_graphbolt_hetero( orig_g.edata[dgl.EID][orig_eids], new_g.edge_attributes[dgl.EID], ) - else: - assert dgl.EID not in new_g.edge_attributes if store_inner_edge or debug_mode: assert orig_g.edata["inner_edge"].dtype == th.uint8 assert new_g.edge_attributes["inner_edge"].dtype == th.uint8 @@ -1064,8 +1060,6 @@ def test_dgl_partition_to_graphbolt_hetero( orig_g.edata["inner_edge"], new_g.edge_attributes["inner_edge"], ) - else: - assert "inner_edge" not in new_g.edge_attributes if debug_mode: assert orig_g.edata[dgl.ETYPE].dtype == th.int32 assert new_g.edge_attributes[dgl.ETYPE].dtype == th.int8 @@ -1073,8 +1067,6 @@ def test_dgl_partition_to_graphbolt_hetero( orig_g.edata[dgl.ETYPE][orig_eids], new_g.edge_attributes[dgl.ETYPE], ) - else: - assert dgl.ETYPE not in new_g.edge_attributes assert th.equal( orig_g.edata[dgl.ETYPE][orig_eids], new_g.type_per_edge ) @@ -1196,7 +1188,7 @@ def test_not_sorted_node_edge_map(): assert gpb.local_etype_offset == [0, 500, 1100, 1800, 2600] -def _verify_metadata( +def _verify_metadata_homo_graphbolt( g, gpb, part_g, @@ -1250,7 +1242,47 @@ def _verify_metadata( return None, None -def _verify_mapping( +def _get_part_IDs(part_g): + # These are partition-local IDs. + num_columns = part_g.csc_indptr.diff() + part_src_ids = part_g.indices + part_dst_ids = th.arange(part_g.total_num_nodes).repeat_interleave( + num_columns + ) + # These are reshuffled global homogeneous IDs. + part_src_ids = F.gather_row(part_g.node_attributes[dgl.NID], part_src_ids) + part_dst_ids = F.gather_row(part_g.node_attributes[dgl.NID], part_dst_ids) + return part_src_ids, part_dst_ids + + +def _verify_orig_IDs( + g, + orig_nids, + orig_eids, + part_eids, + part_src_ids, + part_dst_ids, + src_ntype=None, + dst_ntype=None, + etype=None, +): + if src_ntype is not None and dst_ntype is not None: + orig_src_nid = orig_nids[src_ntype] + orig_dst_nid = orig_nids[dst_ntype] + else: + orig_src_nid = orig_nids + orig_dst_nid = orig_nids + orig_src_ids = F.gather_row(orig_src_nid, part_src_ids) + orig_dst_ids = F.gather_row(orig_dst_nid, part_dst_ids) + if etype is not None: + orig_eids = orig_eids[etype] + orig_eids = F.gather_row(orig_eids, part_eids) + orig_eids = g.edge_ids(orig_src_ids, orig_dst_ids, etype=etype) + assert len(orig_eids) == len(orig_eids) + assert np.all(F.asnumpy(orig_eids) == F.asnumpy(orig_eids)) + + +def _verify_homo_graphbolt_mapping_ID( g, part_g, part_i, @@ -1322,27 +1354,11 @@ def _verify_mapping( if store_eids: # Verify the mapping between the reshuffled IDs and the original IDs. - indices, indptr = part_g.indices.numpy(), part_g.csc_indptr.numpy() - csc_matrix = spsp.csc_matrix( - (np.ones(len(part_g.indices), dtype=float), indices, indptr) - ) - coo_matrix = csc_matrix.tocoo() - part_src_ids, part_dst_ids = th.tensor(coo_matrix.row), th.tensor( - coo_matrix.col - ) - part_src_ids = F.gather_row( - part_g.node_attributes[dgl.NID], part_src_ids - ) - part_dst_ids = F.gather_row( - part_g.node_attributes[dgl.NID], part_dst_ids - ) + part_src_ids, part_dst_ids = _get_part_IDs(part_g) part_eids = part_g.edge_attributes[dgl.EID] - orig_src_ids = F.gather_row(orig_nids, part_src_ids) - orig_dst_ids = F.gather_row(orig_nids, part_dst_ids) - orig_eids1 = F.gather_row(orig_eids, part_eids) - orig_eids2 = g.edge_ids(orig_src_ids, orig_dst_ids) - assert F.shape(orig_eids1)[0] == F.shape(orig_eids2)[0] - assert np.all(F.asnumpy(orig_eids1) == F.asnumpy(orig_eids2)) + _verify_orig_IDs( + g, orig_nids, orig_eids, part_eids, part_src_ids, part_dst_ids + ) local_orig_nids = orig_nids[part_g.node_attributes[dgl.NID]] local_orig_eids = orig_eids[part_g.edge_attributes[dgl.EID]] @@ -1352,8 +1368,6 @@ def _verify_mapping( part_g.edge_attributes["feats"] = F.gather_row( g.edata["feats"], local_orig_eids ) - else: - assert dgl.EID not in part_g.edge_attributes return node_feats["_N/labels"], edge_feats["_N:_E:_N/feats"] @@ -1416,19 +1430,17 @@ def test_partition_graph_graphbolt_homo( return_mapping=True, ) if debug_mode: - store_eids = True - store_inner_node = True - store_inner_edge = True + store_eids = store_inner_node = store_inner_edge = True part_sizes = [] shuffled_labels = [] shuffled_edata = [] part_config = os.path.join(test_dir, f"{graph_name}.json") + parts = [] for part_i in range(num_parts): part_g, node_feats, edge_feats, gpb, _, _, _ = load_partition( part_config, part_i, load_feats=True, use_graphbolt=True ) - - local_eid, local_nid = _verify_metadata( + local_eid, local_nid = _verify_metadata_homo_graphbolt( g, gpb, part_g, @@ -1440,7 +1452,7 @@ def test_partition_graph_graphbolt_homo( store_eids, ) - node_feat, edge_feat = _verify_mapping( + node_feat, edge_feat = _verify_homo_graphbolt_mapping_ID( g, part_g, part_i, @@ -1457,6 +1469,7 @@ def test_partition_graph_graphbolt_homo( ) shuffled_labels.append(node_feat) shuffled_edata.append(edge_feat) + parts.append(part_g) verify_graph_feats( g, @@ -1472,8 +1485,10 @@ def test_partition_graph_graphbolt_homo( use_graphbolt=True, is_homo=True, ) - - _verify_reconstrunt_data( + _verify_augument_for_graphbolt( + parts, store_inner_node, store_inner_edge, store_eids, debug_mode + ) + _verify_homo_graphbolt_shuffled_data( g, gpb, orig_nids, @@ -1484,7 +1499,7 @@ def test_partition_graph_graphbolt_homo( ) -def _vertify_original_IDs(g, orig_nids, orig_eids): +def _verify_original_IDs_type(g, orig_nids, orig_eids): """ check list: make sure nodes and edges' data types are correct @@ -1494,11 +1509,13 @@ def _vertify_original_IDs(g, orig_nids, orig_eids): assert len(orig_eids) == len(g.canonical_etypes) for ntype in g.ntypes: assert len(orig_nids[ntype]) == g.num_nodes(ntype) + assert F.dtype(orig_nids[ntype]) in (F.int64, F.int32) for etype in g.canonical_etypes: assert len(orig_eids[etype]) == g.num_edges(etype) + assert F.dtype(orig_eids[etype]) in (F.int64, F.int32) -def _verify_reconstrunt_data( +def _verify_homo_graphbolt_shuffled_data( g, gpb, orig_nids, orig_eids, part_sizes, shuffled_labels, shuffled_edata ): """ @@ -1532,38 +1549,21 @@ def _verify_reconstrunt_data( assert np.all(F.asnumpy(eid2pid) == edge_map) -def _verify_graphbolt_mapping_IDs( +def _verify_hetero_graphbolt_mapping_type( g, part_g, gpb, orig_nids, orig_eids, - node_feats, - edge_feats, - test_ntype, - test_etype, - store_eids, - store_inner_node, - store_inner_edge, + store_eids=False, ): """ check list: - make sure nodes and edges' ids have correct type. + make sure nodes and edges have correct type. make sure nodes and edges have corrert map ids. """ # Verify the mapping between the reshuffled IDs and the original IDs. - # These are partition-local IDs. - indices, indptr = part_g.indices.numpy(), part_g.csc_indptr.numpy() - csc_matrix = spsp.csc_matrix( - (np.ones(len(part_g.indices), dtype=float), indices, indptr) - ) - coo_matrix = csc_matrix.tocoo() - part_src_ids, part_dst_ids = th.tensor(coo_matrix.row), th.tensor( - coo_matrix.col - ) - # These are reshuffled global homogeneous IDs. - part_src_ids = F.gather_row(part_g.node_attributes[dgl.NID], part_src_ids) - part_dst_ids = F.gather_row(part_g.node_attributes[dgl.NID], part_dst_ids) + part_src_ids, part_dst_ids = _get_part_IDs(part_g) # These are reshuffled per-type IDs. src_ntype_ids, part_src_ids = gpb.map_to_per_ntype(part_src_ids) dst_ntype_ids, part_dst_ids = gpb.map_to_per_ntype(part_dst_ids) @@ -1597,31 +1597,24 @@ def _verify_graphbolt_mapping_IDs( assert np.all(F.asnumpy(dst_ntype_ids1 == dst_ntype_ids1[0])) src_ntype = g.ntypes[F.as_scalar(src_ntype_ids1[0])] dst_ntype = g.ntypes[F.as_scalar(dst_ntype_ids1[0])] + + _verify_orig_IDs( + g, + orig_nids, + orig_eids, + part_eids1, + part_src_ids1, + part_dst_ids1, + src_ntype, + dst_ntype, + etype, + ) orig_src_ids1 = F.gather_row(orig_nids[src_ntype], part_src_ids1) orig_dst_ids1 = F.gather_row(orig_nids[dst_ntype], part_dst_ids1) orig_eids1 = F.gather_row(orig_eids[etype], part_eids1) orig_eids2 = g.edge_ids(orig_src_ids1, orig_dst_ids1, etype=etype) assert len(orig_eids1) == len(orig_eids2) assert np.all(F.asnumpy(orig_eids1) == F.asnumpy(orig_eids2)) - else: - assert dgl.EID not in part_g.edge_attributes - verify_graph_feats( - g, - gpb, - part_g, - node_feats, - edge_feats, - orig_nids, - orig_eids, - store_eids, - store_inner_edge=store_inner_edge, - store_inner_node=store_inner_node, - use_graphbolt=True, - ) - - shuffled_label = node_feats[test_ntype + "/labels"] - shuffled_elabel = edge_feats[_etype_tuple_to_str(test_etype) + "/labels"] - return part_g, shuffled_label, shuffled_elabel def _verify_labels( @@ -1667,7 +1660,7 @@ def test_partition_graph_graphbolt_hetero( ): """ check list: - _vertify_original_IDs: + _verify_original_IDs_type: number of edges and nodes' type and number of them in each type _verify_graphbolt_mapping_IDs: @@ -1687,7 +1680,6 @@ def test_partition_graph_graphbolt_hetero( os.environ["DGL_DIST_DEBUG"] = "1" with tempfile.TemporaryDirectory() as test_dir: hg = create_random_hetero() - # TODO create graph data graph_name = "test" hg.nodes[test_ntype].data["labels"] = F.arange( 0, hg.num_nodes(test_ntype) @@ -1715,12 +1707,10 @@ def test_partition_graph_graphbolt_hetero( store_inner_edge=store_inner_edge, n_jobs=n_jobs, ) - _vertify_original_IDs(hg, orig_nids, orig_eids) + _verify_original_IDs_type(hg, orig_nids, orig_eids) if debug_mode: - store_eids = True - store_inner_node = True - store_inner_edge = True + store_eids = store_inner_node = store_inner_edge = True parts = [] shuffled_labels = [] @@ -1731,25 +1721,32 @@ def test_partition_graph_graphbolt_hetero( part_g, node_feats, edge_feats, gpb, _, _, _ = load_partition( part_config, part_id, load_feats=True, use_graphbolt=True ) - # TODO verify mapping IDs - ( - part_g, - shuffled_label, - shuffled_elabel, - ) = _verify_graphbolt_mapping_IDs( + _verify_hetero_graphbolt_mapping_type( hg, part_g, gpb, orig_nids, orig_eids, + store_eids=store_eids, + ) + verify_graph_feats( + hg, + gpb, + part_g, node_feats, edge_feats, - test_ntype, - test_etype, - store_eids=store_eids, - store_inner_node=store_inner_node, + orig_nids, + orig_eids, + store_eids, store_inner_edge=store_inner_edge, + store_inner_node=store_inner_node, + use_graphbolt=True, ) + + shuffled_label = node_feats[test_ntype + "/labels"] + shuffled_elabel = edge_feats[ + _etype_tuple_to_str(test_etype) + "/labels" + ] parts.append(part_g) shuffled_labels.append(shuffled_label) shuffled_elabels.append(shuffled_elabel) @@ -1758,8 +1755,11 @@ def test_partition_graph_graphbolt_hetero( parts, True, store_eids=store_eids, - store_inner_node=store_inner_node, store_inner_edge=store_inner_edge, + debug_mode=debug_mode, + ) + _verify_augument_for_graphbolt( + parts, store_inner_node, store_inner_edge, store_eids, debug_mode ) _verify_labels( hg, @@ -2024,4 +2024,4 @@ def test_partition_graph_graphbolt_hetero_find_edges_multi( num_parts=num_parts, graph_formats="coo", n_jobs=4, - ) + ) \ No newline at end of file From 4330ce36a038e4f2cbc2d2cc24ac27ebf76d1676 Mon Sep 17 00:00:00 2001 From: Ubuntu <2649624957@qq.com> Date: Wed, 28 Aug 2024 06:03:09 +0000 Subject: [PATCH 20/39] change partition --- python/dgl/distributed/partition.py | 393 +++++++--------------------- tests/distributed/test_partition.py | 2 +- 2 files changed, 89 insertions(+), 306 deletions(-) diff --git a/python/dgl/distributed/partition.py b/python/dgl/distributed/partition.py index cdbfbd240c1d..b2ac6761e3c7 100644 --- a/python/dgl/distributed/partition.py +++ b/python/dgl/distributed/partition.py @@ -88,7 +88,7 @@ def _dump_part_config(part_config, part_metadata): json.dump(part_metadata, outfile, sort_keys=False, indent=4) -def _process_partitions(g_list, formats=None, sort_etypes=False): +def _save_graphs(filename, g_list, formats=None, sort_etypes=False): """Preprocess partitions before saving: 1. format data types. 2. sort csc/csr by tag. @@ -106,19 +106,12 @@ def _process_partitions(g_list, formats=None, sort_etypes=False): g = sort_csr_by_tag(g, tag=g.edata[ETYPE], tag_type="edge") if "csc" in formats: g = sort_csc_by_tag(g, tag=g.edata[ETYPE], tag_type="edge") - return g_list - - -def _save_dgl_graphs(filename, g_list, formats=None, sort_etypes=False): - g_list = _process_partitions( - g_list, formats=formats, sort_etypes=sort_etypes - ) save_graphs(filename, g_list, formats=formats) def _get_inner_node_mask(graph, ntype_id, use_graphbolt=False): ndata = graph.node_attributes if use_graphbolt else graph.ndata - assert "inner_node" in ndata, '"inner_node" is not in nodes\' data' + assert "inner_node" in ndata, "'inner_node' is not in nodes' data" if NTYPE in ndata: dtype = F.dtype(ndata["inner_node"]) return ( @@ -130,6 +123,7 @@ def _get_inner_node_mask(graph, ntype_id, use_graphbolt=False): def _get_inner_edge_mask(graph, etype_id, use_graphbolt=False): edata = graph.edge_attributes if use_graphbolt else graph.edata + assert "inner_edge" in edata, "'inner_edge' is not in edges' data" etype = ( graph.type_per_edge if use_graphbolt @@ -324,10 +318,9 @@ def load_partition(part_config, part_id, load_feats=True, use_graphbolt=False): "part-{}".format(part_id) in part_metadata ), "part-{} does not exist".format(part_id) part_files = part_metadata["part-{}".format(part_id)] + part_graph_field = "part_graph" if use_graphbolt: part_graph_field = "part_graph_graphbolt" - else: - part_graph_field = "part_graph" assert ( part_graph_field in part_files ), f"the partition does not contain graph structure: {part_graph_field}" @@ -454,105 +447,6 @@ def load_partition_feats( return node_feats, edge_feats -def _load_partition_book_from_metadata(part_metadata, part_id): - assert "num_parts" in part_metadata, "num_parts does not exist." - assert ( - part_metadata["num_parts"] > part_id - ), "part {} is out of range (#parts: {})".format( - part_id, part_metadata["num_parts"] - ) - num_parts = part_metadata["num_parts"] - assert ( - "num_nodes" in part_metadata - ), "cannot get the number of nodes of the global graph." - assert ( - "num_edges" in part_metadata - ), "cannot get the number of edges of the global graph." - assert "node_map" in part_metadata, "cannot get the node map." - assert "edge_map" in part_metadata, "cannot get the edge map." - assert "graph_name" in part_metadata, "cannot get the graph name" - - # If this is a range partitioning, node_map actually stores a list, whose elements - # indicate the boundary of range partitioning. Otherwise, node_map stores a filename - # that contains node map in a NumPy array. - node_map = part_metadata["node_map"] - edge_map = part_metadata["edge_map"] - if isinstance(node_map, dict): - for key in node_map: - is_range_part = isinstance(node_map[key], list) - break - elif isinstance(node_map, list): - is_range_part = True - node_map = {DEFAULT_NTYPE: node_map} - else: - is_range_part = False - if isinstance(edge_map, list): - edge_map = {DEFAULT_ETYPE: edge_map} - - ntypes = {DEFAULT_NTYPE: 0} - etypes = {DEFAULT_ETYPE: 0} - if "ntypes" in part_metadata: - ntypes = part_metadata["ntypes"] - if "etypes" in part_metadata: - etypes = part_metadata["etypes"] - - if isinstance(node_map, dict): - for key in node_map: - assert key in ntypes, "The node type {} is invalid".format(key) - if isinstance(edge_map, dict): - for key in edge_map: - assert key in etypes, "The edge type {} is invalid".format(key) - - if not is_range_part: - raise TypeError("Only RangePartitionBook is supported currently.") - - node_map = _get_part_ranges(node_map) - edge_map = _get_part_ranges(edge_map) - - # Format dtype of node/edge map if dtype is specified. - def _format_node_edge_map(part_metadata, map_type, data): - key = f"{map_type}_map_dtype" - if key not in part_metadata: - return data - dtype = part_metadata[key] - assert dtype in ["int32", "int64"], ( - f"The {map_type} map dtype should be either int32 or int64, " - f"but got {dtype}." - ) - for key in data: - data[key] = data[key].astype(dtype) - return data - - node_map = _format_node_edge_map(part_metadata, "node", node_map) - edge_map = _format_node_edge_map(part_metadata, "edge", edge_map) - - # Sort the node/edge maps by the node/edge type ID. - node_map = dict(sorted(node_map.items(), key=lambda x: ntypes[x[0]])) - edge_map = dict(sorted(edge_map.items(), key=lambda x: etypes[x[0]])) - - def _assert_is_sorted(id_map): - id_ranges = np.array(list(id_map.values())) - ids = [] - for i in range(num_parts): - ids.append(id_ranges[:, i, :]) - ids = np.array(ids).flatten() - assert np.all( - ids[:-1] <= ids[1:] - ), f"The node/edge map is not sorted: {ids}" - - _assert_is_sorted(node_map) - _assert_is_sorted(edge_map) - - return ( - RangePartitionBook( - part_id, num_parts, node_map, edge_map, ntypes, etypes - ), - part_metadata["graph_name"], - ntypes, - etypes, - ) - - def load_partition_book(part_config, part_id): """Load a graph partition book from the partition config file. @@ -1292,7 +1186,6 @@ def get_homogeneous(g, balance_ntypes): "ntypes": ntypes, "etypes": etypes, } - part_config = os.path.join(out_path, graph_name + ".json") for part_id in range(num_parts): part = parts[part_id] @@ -1415,67 +1308,30 @@ def get_homogeneous(g, balance_ntypes): part_dir = os.path.join(out_path, "part" + str(part_id)) node_feat_file = os.path.join(part_dir, "node_feat.dgl") edge_feat_file = os.path.join(part_dir, "edge_feat.dgl") - - os.makedirs(part_dir, mode=0o775, exist_ok=True) - save_tensors(node_feat_file, node_feats) - save_tensors(edge_feat_file, edge_feats) - + part_graph_file = os.path.join(part_dir, "graph.dgl") part_metadata["part-{}".format(part_id)] = { "node_feats": os.path.relpath(node_feat_file, out_path), "edge_feats": os.path.relpath(edge_feat_file, out_path), + "part_graph": os.path.relpath(part_graph_file, out_path), } + os.makedirs(part_dir, mode=0o775, exist_ok=True) + save_tensors(node_feat_file, node_feats) + save_tensors(edge_feat_file, edge_feats) + sort_etypes = len(g.etypes) > 1 - if use_graphbolt: - - def _partition_to_graphbolt( - part_config, - parts, - part_i, - part_metadata, - *, - store_eids=True, - store_inner_node=False, - store_inner_edge=False, - graph_formats=None, - ): - rel_path_result = gb_convert_single_dgl_partition( - part_i, - parts, - part_metadata, - part_config=part_config, - store_eids=store_eids, - store_inner_edge=store_inner_edge, - store_inner_node=store_inner_node, - graph_formats=graph_formats, - ) - part_metadata[f"part-{part_i}"][ - "part_graph_graphbolt" - ] = rel_path_result - - part = _process_partitions([part], graph_formats, sort_etypes)[0] - # save FusedCSCSamplingGraph - kwargs["graph_formats"] = graph_formats - kwargs.pop("n_jobs", None) - _partition_to_graphbolt( - part_i=part_id, - part_config=part_config, - part_metadata=part_metadata, - parts=parts, - **kwargs, - ) - else: - part_graph_file = os.path.join(part_dir, "graph.dgl") - part_metadata["part-{}".format(part_id)][ - "part_graph" - ] = os.path.relpath(part_graph_file, out_path) - # save DGLGraph - _save_dgl_graphs( - part_graph_file, - [part], - formats=graph_formats, - sort_etypes=sort_etypes, - ) + _save_graphs( + part_graph_file, + [part], + formats=graph_formats, + sort_etypes=sort_etypes, + ) + print( + "Save partitions: {:.3f} seconds, peak memory: {:.3f} GB".format( + time.time() - start, get_peak_mem() + ) + ) + part_config = os.path.join(out_path, graph_name + ".json") _dump_part_config(part_config, part_metadata) num_cuts = sim_g.num_edges() - tot_num_inner_edges @@ -1487,11 +1343,12 @@ def _partition_to_graphbolt( ) ) - print( - "Save partitions: {:.3f} seconds, peak memory: {:.3f} GB".format( - time.time() - start, get_peak_mem() + if use_graphbolt: + kwargs["graph_formats"] = graph_formats + dgl_partition_to_graphbolt( + part_config, + **kwargs, ) - ) if return_mapping: return orig_nids, orig_eids @@ -1539,21 +1396,8 @@ def init_type_per_edge(graph, gpb): return etype_ids -def _load_parts(part_config, part_id, parts): - """load parts from variable or dist.""" - if parts is None: - graph, _, _, _, _, _, _ = load_partition( - part_config, part_id, load_feats=False - ) - else: - graph = parts[part_id] - return graph - - def gb_convert_single_dgl_partition( part_id, - parts, - part_meta, graph_formats, part_config, store_eids, @@ -1586,18 +1430,14 @@ def gb_convert_single_dgl_partition( "Running in debug mode which means all attributes of DGL partitions" " will be saved to the new format." ) - if part_meta is None: - part_meta = _load_part_config(part_config) - num_parts = part_meta["num_parts"] - graph = _load_parts(part_config, part_id, parts) + part_meta = _load_part_config(part_config) + num_parts = part_meta["num_parts"] - gpb, _, ntypes, etypes = ( - load_partition_book(part_config, part_id) - if part_meta is None - else _load_partition_book_from_metadata(part_meta, part_id) + graph, _, _, gpb, _, _, _ = load_partition( + part_config, part_id, load_feats=False ) - + _, _, ntypes, etypes = load_partition_book(part_config, part_id) is_homo = is_homogeneous(ntypes, etypes) node_type_to_id = ( None if is_homo else {ntype: ntid for ntid, ntype in enumerate(ntypes)} @@ -1703,12 +1543,12 @@ def gb_convert_single_dgl_partition( node_type_to_id=node_type_to_id, edge_type_to_id=edge_type_to_id, ) - orig_feats_path = os.path.join( + orig_graph_path = os.path.join( os.path.dirname(part_config), - part_meta[f"part-{part_id}"]["node_feats"], + part_meta[f"part-{part_id}"]["part_graph"], ) csc_graph_path = os.path.join( - os.path.dirname(orig_feats_path), "fused_csc_sampling_graph.pt" + os.path.dirname(orig_graph_path), "fused_csc_sampling_graph.pt" ) torch.save(csc_graph, csc_graph_path) @@ -1716,106 +1556,6 @@ def gb_convert_single_dgl_partition( # Update graph path. -def _convert_partition_to_graphbolt( - part_meta, - graph_formats, - part_config, - store_eids, - store_inner_node, - store_inner_edge, - n_jobs, - num_parts, - parts=None, -): - # [Rui] DGL partitions are always saved as homogeneous graphs even though - # the original graph is heterogeneous. But heterogeneous information like - # node/edge types are saved as node/edge data alongside with partitions. - # What needs more attention is that due to the existence of HALO nodes in - # each partition, the local node IDs are not sorted according to the node - # types. So we fail to assign ``node_type_offset`` as required by GraphBolt. - # But this is not a problem since such information is not used in sampling. - # We can simply pass None to it. - - # Iterate over partitions. - convert_with_format = partial( - gb_convert_single_dgl_partition, - parts=parts, - part_meta=part_meta, - graph_formats=graph_formats, - part_config=part_config, - store_eids=store_eids, - store_inner_node=store_inner_node, - store_inner_edge=store_inner_edge, - ) - # Need to create entirely new interpreters, because we call C++ downstream - # See https://docs.python.org/3.12/library/multiprocessing.html#contexts-and-start-methods - # and https://pybind11.readthedocs.io/en/stable/advanced/misc.html#global-interpreter-lock-gil - rel_path_results = [] - if n_jobs > 1 and num_parts > 1: - mp_ctx = mp.get_context("spawn") - with concurrent.futures.ProcessPoolExecutor( # pylint: disable=unexpected-keyword-arg - max_workers=min(num_parts, n_jobs), - mp_context=mp_ctx, - ) as executor: - futures = [] - for part_id in range(num_parts): - futures.append(executor.submit(convert_with_format, part_id)) - - for part_id in range(num_parts): - rel_path_results.append(futures[part_id].result()) - else: - # If running single-threaded, avoid spawning new interpreter, which is slow - for part_id in range(num_parts): - rel_path_results.append(convert_with_format(part_id)) - - for part_id in range(num_parts): - # Update graph path. - part_meta[f"part-{part_id}"]["part_graph_graphbolt"] = rel_path_results[ - part_id - ] - - # Save dtype info into partition config. - # [TODO][Rui] Always use int64_t for node/edge IDs in GraphBolt. See more - # details in #7175. - part_meta["node_map_dtype"] = "int64" - part_meta["edge_map_dtype"] = "int64" - - return part_meta - - -def _dgl_partition_to_graphbolt( - part_config, - part_meta, - parts, - *, - store_eids=True, - store_inner_node=False, - store_inner_edge=False, - graph_formats=None, - n_jobs=1, -): - debug_mode = "DGL_DIST_DEBUG" in os.environ - if debug_mode: - dgl_warning( - "Running in debug mode which means all attributes of DGL partitions" - " will be saved to the new format." - ) - new_part_meta = copy.deepcopy(part_meta) - num_parts = part_meta["num_parts"] - part_meta = _convert_partition_to_graphbolt( - new_part_meta, - graph_formats, - part_config, - store_eids, - store_inner_node, - store_inner_edge, - n_jobs, - num_parts, - parts=parts, - ) - return part_meta - - def dgl_partition_to_graphbolt( part_config, *, @@ -1864,14 +1604,57 @@ def dgl_partition_to_graphbolt( part_meta = _load_part_config(part_config) new_part_meta = copy.deepcopy(part_meta) num_parts = part_meta["num_parts"] - part_meta = _convert_partition_to_graphbolt( - new_part_meta, - graph_formats, - part_config, - store_eids, - store_inner_node, - store_inner_edge, - n_jobs, - num_parts, + + # [Rui] DGL partitions are always saved as homogeneous graphs even though + # the original graph is heterogeneous. But heterogeneous information like + # node/edge types are saved as node/edge data alongside with partitions. + # What needs more attention is that due to the existence of HALO nodes in + # each partition, the local node IDs are not sorted according to the node + # types. So we fail to assign ``node_type_offset`` as required by GraphBolt. + # But this is not a problem since such information is not used in sampling. + # We can simply pass None to it. + + # Iterate over partitions. + convert_with_format = partial( + gb_convert_single_dgl_partition, + graph_formats=graph_formats, + part_config=part_config, + store_eids=store_eids, + store_inner_node=store_inner_node, + store_inner_edge=store_inner_edge, ) - _dump_part_config(part_config, part_meta) + # Need to create entirely new interpreters, because we call C++ downstream + # See https://docs.python.org/3.12/library/multiprocessing.html#contexts-and-start-methods + # and https://pybind11.readthedocs.io/en/stable/advanced/misc.html#global-interpreter-lock-gil + rel_path_results = [] + if n_jobs > 1 and num_parts > 1: + mp_ctx = mp.get_context("spawn") + with concurrent.futures.ProcessPoolExecutor( # pylint: disable=unexpected-keyword-arg + max_workers=min(num_parts, n_jobs), + mp_context=mp_ctx, + ) as executor: + futures = [] + for part_id in range(num_parts): + futures.append(executor.submit(convert_with_format, part_id)) + + for part_id in range(num_parts): + rel_path_results.append(futures[part_id].result()) + else: + # If running single-threaded, avoid spawning new interpreter, which is slow + for part_id in range(num_parts): + rel_path_results.append(convert_with_format(part_id)) + + for part_id in range(num_parts): + # Update graph path. + new_part_meta[f"part-{part_id}"][ + "part_graph_graphbolt" + ] = rel_path_results[part_id] + + # Save dtype info into partition config. + # [TODO][Rui] Always use int64_t for node/edge IDs in GraphBolt. See more + # details in #7175. + new_part_meta["node_map_dtype"] = "int64" + new_part_meta["edge_map_dtype"] = "int64" + + _dump_part_config(part_config, new_part_meta) + print(f"Converted partitions to GraphBolt format into {part_config}") diff --git a/tests/distributed/test_partition.py b/tests/distributed/test_partition.py index daa46deac80d..8e9c725804fa 100644 --- a/tests/distributed/test_partition.py +++ b/tests/distributed/test_partition.py @@ -2024,4 +2024,4 @@ def test_partition_graph_graphbolt_hetero_find_edges_multi( num_parts=num_parts, graph_formats="coo", n_jobs=4, - ) \ No newline at end of file + ) From 070d3707f556af572f7c2eb568da769c07d398f0 Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Sun, 1 Sep 2024 18:16:08 +0000 Subject: [PATCH 21/39] [DistGB]change test_partition.py, renew the code. --- tests/distributed/test_partition.py | 776 +++++++++++++++------------- 1 file changed, 416 insertions(+), 360 deletions(-) diff --git a/tests/distributed/test_partition.py b/tests/distributed/test_partition.py index 8e9c725804fa..d7bf640522fa 100644 --- a/tests/distributed/test_partition.py +++ b/tests/distributed/test_partition.py @@ -88,9 +88,13 @@ def create_random_hetero(): return dgl.heterograph(edges, num_nodes) -def _verify_augument_for_graphbolt( +def _verify_argument_for_graphbolt( parts, store_inner_node, store_inner_edge, store_eids, debug_mode ): + """ + check list: + make sure arguments work. + """ if not debug_mode: for part in parts: if store_inner_edge: @@ -1188,60 +1192,6 @@ def test_not_sorted_node_edge_map(): assert gpb.local_etype_offset == [0, 500, 1100, 1800, 2600] -def _verify_metadata_homo_graphbolt( - g, - gpb, - part_g, - num_parts, - part_sizes, - part_i, - store_inner_node, - store_inner_edge, - store_eids, -): - """ - # Check the metadata - check list: - make sure gpb have correct node and edge number. - make sure gpb have correct number of partitions. - make sure gpb have correct number of nodes and edges in each partition. - make sure local nid and eid have correct dtype. - make sure local nid have correct order - """ - assert gpb._num_nodes() == g.num_nodes() - assert gpb._num_edges() == g.num_edges() - - assert gpb.num_partitions() == num_parts - gpb_meta = gpb.metadata() - assert len(gpb_meta) == num_parts - assert len(gpb.partid2nids(part_i)) == gpb_meta[part_i]["num_nodes"] - assert len(gpb.partid2eids(part_i)) == gpb_meta[part_i]["num_edges"] - part_sizes.append( - (gpb_meta[part_i]["num_nodes"], gpb_meta[part_i]["num_edges"]) - ) - - if store_inner_node and store_inner_edge and store_eids: - nid = F.boolean_mask( - part_g.node_attributes[dgl.NID], - part_g.node_attributes["inner_node"], - ) - local_nid = gpb.nid2localnid(nid, part_i) - assert F.dtype(local_nid) in (F.int64, F.int32) - assert np.all(F.asnumpy(local_nid) == np.arange(0, len(local_nid))) - eid = F.boolean_mask( - part_g.edge_attributes[dgl.EID], - part_g.edge_attributes["inner_edge"], - ) - local_eid = gpb.eid2localeid(eid, part_i) - assert F.dtype(local_eid) in (F.int64, F.int32) - assert np.all( - np.sort(F.asnumpy(local_eid)) == np.arange(0, len(local_eid)) - ) - return local_eid, local_nid - else: - return None, None - - def _get_part_IDs(part_g): # These are partition-local IDs. num_columns = part_g.csc_indptr.diff() @@ -1255,7 +1205,7 @@ def _get_part_IDs(part_g): return part_src_ids, part_dst_ids -def _verify_orig_IDs( +def _verify_orig_edge_IDs( g, orig_nids, orig_eids, @@ -1266,6 +1216,10 @@ def _verify_orig_IDs( dst_ntype=None, etype=None, ): + """ + check list: + make sure orig edge id are correct after + """ if src_ntype is not None and dst_ntype is not None: orig_src_nid = orig_nids[src_ntype] orig_dst_nid = orig_nids[dst_ntype] @@ -1276,90 +1230,190 @@ def _verify_orig_IDs( orig_dst_ids = F.gather_row(orig_dst_nid, part_dst_ids) if etype is not None: orig_eids = orig_eids[etype] - orig_eids = F.gather_row(orig_eids, part_eids) - orig_eids = g.edge_ids(orig_src_ids, orig_dst_ids, etype=etype) - assert len(orig_eids) == len(orig_eids) - assert np.all(F.asnumpy(orig_eids) == F.asnumpy(orig_eids)) + orig_eids1 = F.gather_row(orig_eids, part_eids) + orig_eids2 = g.edge_ids(orig_src_ids, orig_dst_ids, etype=etype) + assert len(orig_eids1) == len(orig_eids2) + assert np.all(F.asnumpy(orig_eids1) == F.asnumpy(orig_eids2)) -def _verify_homo_graphbolt_mapping_ID( - g, +def _verify_graphbolt_metadata(gpb, g, num_parts, part_id, part_sizes): + """ + check list: + make sure the number of nodes and edges is correct. + make sure the number of parts is correct. + make sure the number of nodes and edges in each parts os corrcet. + """ + assert gpb._num_nodes() == g.num_nodes() + assert gpb._num_edges() == g.num_edges() + + assert gpb.num_partitions() == num_parts + gpb_meta = gpb.metadata() + assert len(gpb_meta) == num_parts + assert len(gpb.partid2nids(part_id)) == gpb_meta[part_id]["num_nodes"] + assert len(gpb.partid2eids(part_id)) == gpb_meta[part_id]["num_edges"] + part_sizes.append( + (gpb_meta[part_id]["num_nodes"], gpb_meta[part_id]["num_edges"]) + ) + + +def _verify_graphbolt_local_id(part_g, part_id, gpb): + """ + check list: + make sure the type of local id is correct. + make sure local id have a right order. + """ + nid = F.boolean_mask( + part_g.node_attributes[dgl.NID], + part_g.node_attributes["inner_node"], + ) + local_nid = gpb.nid2localnid(nid, part_id) + assert F.dtype(local_nid) in (F.int64, F.int32) + assert np.all(F.asnumpy(local_nid) == np.arange(0, len(local_nid))) + eid = F.boolean_mask( + part_g.edge_attributes[dgl.EID], + part_g.edge_attributes["inner_edge"], + ) + local_eid = gpb.eid2localeid(eid, part_id) + assert F.dtype(local_eid) in (F.int64, F.int32) + assert np.all(np.sort(F.asnumpy(local_eid)) == np.arange(0, len(local_eid))) + return local_nid, local_eid + + +def _verify_graphbolt_map( part_g, - part_i, + part_id, gpb, - orig_nids, - orig_eids, - node_feats, - edge_feats, - local_nid=None, - local_eid=None, - store_inner_node=False, - store_inner_edge=False, - store_eids=False, ): """ check list: - make sure nodes and edges's data type are correct. - make sure nodes and edges's ID in correct order. - make sure the number of nodes and edges's ID are correct. + make sure the map node and its data type is correct. """ - if store_inner_node and store_inner_edge and store_eids: - # Check the node map. - local_nodes = F.boolean_mask( - part_g.node_attributes[dgl.NID], - part_g.node_attributes["inner_node"], - ) - inner_node_index = F.nonzero_1d(part_g.node_attributes["inner_node"]) - mapping_nodes = gpb.partid2nids(part_i) - assert F.dtype(mapping_nodes) in (F.int32, F.int64) - assert np.all( - np.sort(F.asnumpy(local_nodes)) == np.sort(F.asnumpy(mapping_nodes)) - ) - assert np.all( - F.asnumpy(inner_node_index) == np.arange(len(inner_node_index)) - ) + # Check the node map. + local_nodes = F.boolean_mask( + part_g.node_attributes[dgl.NID], + part_g.node_attributes["inner_node"], + ) + inner_node_index = F.nonzero_1d(part_g.node_attributes["inner_node"]) + mapping_nodes = gpb.partid2nids(part_id) + assert F.dtype(mapping_nodes) in (F.int32, F.int64) + assert np.all( + np.sort(F.asnumpy(local_nodes)) == np.sort(F.asnumpy(mapping_nodes)) + ) + assert np.all( + F.asnumpy(inner_node_index) == np.arange(len(inner_node_index)) + ) - # Check the edge map. + # Check the edge map. - local_edges = F.boolean_mask( - part_g.edge_attributes[dgl.EID], - part_g.edge_attributes["inner_edge"], - ) - inner_edge_index = F.nonzero_1d(part_g.edge_attributes["inner_edge"]) - mapping_edges = gpb.partid2eids(part_i) - assert F.dtype(mapping_edges) in (F.int32, F.int64) - assert np.all( - np.sort(F.asnumpy(local_edges)) == np.sort(F.asnumpy(mapping_edges)) - ) - assert np.all( - F.asnumpy(inner_edge_index) == np.arange(len(inner_edge_index)) - ) + local_edges = F.boolean_mask( + part_g.edge_attributes[dgl.EID], + part_g.edge_attributes["inner_edge"], + ) + inner_edge_index = F.nonzero_1d(part_g.edge_attributes["inner_edge"]) + mapping_edges = gpb.partid2eids(part_id) + assert F.dtype(mapping_edges) in (F.int32, F.int64) + assert np.all( + np.sort(F.asnumpy(local_edges)) == np.sort(F.asnumpy(mapping_edges)) + ) + assert np.all( + F.asnumpy(inner_edge_index) == np.arange(len(inner_edge_index)) + ) + return local_nodes, local_edges - local_nodes = orig_nids[local_nodes] - local_edges = orig_eids[local_edges] - for name in ["labels", "feats"]: - assert "_N/" + name in node_feats - assert node_feats["_N/" + name].shape[0] == len(local_nodes) - true_feats = F.gather_row(g.ndata[name], local_nodes) - ndata = F.gather_row(node_feats["_N/" + name], local_nid) - assert np.all(F.asnumpy(true_feats) == F.asnumpy(ndata)) - for name in ["feats"]: - efeat_name = _etype_tuple_to_str(DEFAULT_ETYPE) + "/" + name - assert efeat_name in edge_feats - assert edge_feats[efeat_name].shape[0] == len(local_edges) - true_feats = F.gather_row(g.edata[name], local_edges) - edata = F.gather_row(edge_feats[efeat_name], local_eid) - assert np.all(F.asnumpy(true_feats) == F.asnumpy(edata)) +def _verify_graphbolt_local_and_map_id( + part_g, + part_id, + gpb, + store_inner_node, + store_inner_edge, + store_eids, + g=None, + orig_nids=None, + orig_eids=None, + node_feats=None, + edge_feats=None, + is_homo=False, +): + """ + check list: + make sure local id are correct. + make sure mapping id are correct. + make sure homo graph have correct feats. + """ + local_nid = local_eid = None + if store_inner_node and store_inner_edge and store_eids: + local_nid, local_eid = _verify_graphbolt_local_id(part_g, part_id, gpb) + local_nodes, local_edges = _verify_graphbolt_map(part_g, part_id, gpb) + if is_homo: + _verify_homo_graphbolt_feat( + g, + local_nodes, + local_edges, + orig_nids, + orig_eids, + node_feats, + edge_feats, + local_nid, + local_eid, + ) - if store_eids: - # Verify the mapping between the reshuffled IDs and the original IDs. - part_src_ids, part_dst_ids = _get_part_IDs(part_g) - part_eids = part_g.edge_attributes[dgl.EID] - _verify_orig_IDs( + +def _verify_homo_graphbolt_feat( + g, + local_nodes, + local_edges, + orig_nids, + orig_eids, + node_feats, + edge_feats, + local_nid, + local_eid, +): + """ + check list: + make sure feats of nodes and edges and its number are correct. + """ + local_nodes = orig_nids[local_nodes] + local_edges = orig_eids[local_edges] + + for name in ["labels", "feats"]: + assert "_N/" + name in node_feats + assert node_feats["_N/" + name].shape[0] == len(local_nodes) + true_feats = F.gather_row(g.ndata[name], local_nodes) + ndata = F.gather_row(node_feats["_N/" + name], local_nid) + assert np.all(F.asnumpy(true_feats) == F.asnumpy(ndata)) + for name in ["feats"]: + efeat_name = _etype_tuple_to_str(DEFAULT_ETYPE) + "/" + name + assert efeat_name in edge_feats + assert edge_feats[efeat_name].shape[0] == len(local_edges) + true_feats = F.gather_row(g.edata[name], local_edges) + edata = F.gather_row(edge_feats[efeat_name], local_eid) + assert np.all(F.asnumpy(true_feats) == F.asnumpy(edata)) + + +def _verify_graphbolt_orig_IDs( + part_g, + gpb, + g, + is_homo=False, + part_src_ids=None, + part_dst_ids=None, + src_ntype_ids=None, + dst_ntype_ids=None, + orig_nids=None, + orig_eids=None, +): + """ + check list: + make sure orig edge id are correct. + make sure hetero ntype id are correct. + """ + part_eids = part_g.edge_attributes[dgl.EID] + if is_homo: + _verify_orig_edge_IDs( g, orig_nids, orig_eids, part_eids, part_src_ids, part_dst_ids ) - local_orig_nids = orig_nids[part_g.node_attributes[dgl.NID]] local_orig_eids = orig_eids[part_g.edge_attributes[dgl.EID]] part_g.node_attributes["feats"] = F.gather_row( @@ -1368,8 +1422,38 @@ def _verify_homo_graphbolt_mapping_ID( part_g.edge_attributes["feats"] = F.gather_row( g.edata["feats"], local_orig_eids ) + else: + etype_ids, part_eids = gpb.map_to_per_etype(part_eids) + # `IdMap` is in int64 by default. + assert etype_ids.dtype == F.int64 - return node_feats["_N/labels"], edge_feats["_N:_E:_N/feats"] + # These are original per-type IDs. + for etype_id, etype in enumerate(g.canonical_etypes): + part_src_ids1 = F.boolean_mask(part_src_ids, etype_ids == etype_id) + src_ntype_ids1 = F.boolean_mask( + src_ntype_ids, etype_ids == etype_id + ) + part_dst_ids1 = F.boolean_mask(part_dst_ids, etype_ids == etype_id) + dst_ntype_ids1 = F.boolean_mask( + dst_ntype_ids, etype_ids == etype_id + ) + part_eids1 = F.boolean_mask(part_eids, etype_ids == etype_id) + assert np.all(F.asnumpy(src_ntype_ids1 == src_ntype_ids1[0])) + assert np.all(F.asnumpy(dst_ntype_ids1 == dst_ntype_ids1[0])) + src_ntype = g.ntypes[F.as_scalar(src_ntype_ids1[0])] + dst_ntype = g.ntypes[F.as_scalar(dst_ntype_ids1[0])] + + _verify_orig_edge_IDs( + g, + orig_nids, + orig_eids, + part_eids1, + part_src_ids1, + part_dst_ids1, + src_ntype, + dst_ntype, + etype, + ) @pytest.mark.parametrize("part_method", ["metis", "random"]) @@ -1386,23 +1470,6 @@ def test_partition_graph_graphbolt_homo( store_inner_edge, debug_mode, ): - """ - check list: - _verify_metadata: - number of edges, nodes, partitions for all - number of edges, nodes in each partitions - order and data type of local nid and eid - - _verify_mapping: - data type, ID's order and ID's number of edges and nodes - - verify_graph_feats: - graph's feats - - _verify_reconstrunt_IDs: - check if feats and IDs can be reconstructed - - """ reset_envs() if debug_mode: os.environ["DGL_DIST_DEBUG"] = "1" @@ -1429,110 +1496,51 @@ def test_partition_graph_graphbolt_homo( store_inner_edge=store_inner_edge, return_mapping=True, ) + if debug_mode: store_eids = store_inner_node = store_inner_edge = True - part_sizes = [] - shuffled_labels = [] - shuffled_edata = [] - part_config = os.path.join(test_dir, f"{graph_name}.json") - parts = [] - for part_i in range(num_parts): - part_g, node_feats, edge_feats, gpb, _, _, _ = load_partition( - part_config, part_i, load_feats=True, use_graphbolt=True - ) - local_eid, local_nid = _verify_metadata_homo_graphbolt( - g, - gpb, - part_g, - num_parts, - part_sizes, - part_i, - store_inner_node, - store_inner_edge, - store_eids, - ) - - node_feat, edge_feat = _verify_homo_graphbolt_mapping_ID( - g, - part_g, - part_i, - gpb, - orig_nids, - orig_eids, - node_feats, - edge_feats, - local_nid=local_nid, - local_eid=local_eid, - store_inner_node=store_inner_node, - store_inner_edge=store_inner_edge, - store_eids=store_eids, - ) - shuffled_labels.append(node_feat) - shuffled_edata.append(edge_feat) - parts.append(part_g) - verify_graph_feats( - g, - gpb, - part_g, - node_feats, - edge_feats, - orig_nids, - orig_eids, - store_eids=store_eids, - store_inner_edge=store_inner_edge, - store_inner_node=store_inner_node, - use_graphbolt=True, - is_homo=True, - ) - _verify_augument_for_graphbolt( - parts, store_inner_node, store_inner_edge, store_eids, debug_mode - ) - _verify_homo_graphbolt_shuffled_data( + _verify_graphbolt_part( g, - gpb, + test_dir, orig_nids, orig_eids, - part_sizes, - shuffled_labels, - shuffled_edata, + graph_name, + num_parts, + store_inner_node, + store_inner_edge, + store_eids, + is_homo=True, + debug_mode=debug_mode, ) -def _verify_original_IDs_type(g, orig_nids, orig_eids): - """ - check list: - make sure nodes and edges' data types are correct - make sure nodes and edges' number in each type is correct - """ - assert len(orig_nids) == len(g.ntypes) - assert len(orig_eids) == len(g.canonical_etypes) - for ntype in g.ntypes: - assert len(orig_nids[ntype]) == g.num_nodes(ntype) - assert F.dtype(orig_nids[ntype]) in (F.int64, F.int32) - for etype in g.canonical_etypes: - assert len(orig_eids[etype]) == g.num_edges(etype) - assert F.dtype(orig_eids[etype]) in (F.int64, F.int32) - - -def _verify_homo_graphbolt_shuffled_data( - g, gpb, orig_nids, orig_eids, part_sizes, shuffled_labels, shuffled_edata +def _verify_graphbolt_shuffled_data( + g, + gpb, + orig_nids, + orig_eids, + part_sizes, + shuffled_labels, + shuffled_elabels, + test_ntype=None, + test_etype=None, ): """ check list: make sure labels and feats are correct. make sure nodes and edges' id are correct. - make sure node and edges' part """ - # Verify that we can reconstruct node/edge data for original IDs. - shuffled_labels = F.asnumpy(F.cat(shuffled_labels, 0)) - shuffled_edata = F.asnumpy(F.cat(shuffled_edata, 0)) - orig_labels = np.zeros(shuffled_labels.shape, dtype=shuffled_labels.dtype) - orig_edata = np.zeros(shuffled_edata.shape, dtype=shuffled_edata.dtype) - orig_labels[F.asnumpy(orig_nids)] = shuffled_labels - orig_edata[F.asnumpy(orig_eids)] = shuffled_edata - assert np.all(orig_labels == F.asnumpy(g.ndata["labels"])) - assert np.all(orig_edata == F.asnumpy(g.edata["feats"])) + # Verify shuffled node/edge data for original IDs. + _verify_shuffled_labels( + g, + shuffled_labels, + shuffled_elabels, + orig_nids, + orig_eids, + test_ntype, + test_etype, + ) node_map = [] edge_map = [] @@ -1549,20 +1557,49 @@ def _verify_homo_graphbolt_shuffled_data( assert np.all(F.asnumpy(eid2pid) == edge_map) -def _verify_hetero_graphbolt_mapping_type( +def _verify_shuffled_labels( g, - part_g, - gpb, + shuffled_labels, + shuffled_edata, orig_nids, orig_eids, - store_eids=False, + test_ntype=None, + test_etype=None, ): """ check list: - make sure nodes and edges have correct type. - make sure nodes and edges have corrert map ids. + make sure node labels are correct. + make sure edge labels are correct. + """ + shuffled_labels = F.asnumpy(F.cat(shuffled_labels, 0)) + shuffled_edata = F.asnumpy(F.cat(shuffled_edata, 0)) + orig_labels = np.zeros(shuffled_labels.shape, dtype=shuffled_labels.dtype) + orig_elabels = np.zeros(shuffled_edata.shape, dtype=shuffled_edata.dtype) + + orig_nid = orig_nids if test_ntype is None else orig_nids[test_ntype] + orig_eid = orig_eids if test_etype is None else orig_eids[test_etype] + nlabel = ( + g.ndata["labels"] + if test_ntype is None + else g.nodes[test_ntype].data["labels"] + ) + edata = ( + g.edata["feats"] + if test_etype is None + else g.edges[test_etype].data["labels"] + ) + + orig_labels[F.asnumpy(orig_nid)] = shuffled_labels + orig_elabels[F.asnumpy(orig_eid)] = shuffled_edata + assert np.all(orig_labels == F.asnumpy(nlabel)) + assert np.all(orig_elabels == F.asnumpy(edata)) + + +def _verify_graphbolt_node_type_ID(part_g, gpb): + """ + check list: + make sure ntype id have correct data type """ - # Verify the mapping between the reshuffled IDs and the original IDs. part_src_ids, part_dst_ids = _get_part_IDs(part_g) # These are reshuffled per-type IDs. src_ntype_ids, part_src_ids = gpb.map_to_per_ntype(part_src_ids) @@ -1575,72 +1612,132 @@ def _verify_hetero_graphbolt_mapping_type( gpb.map_to_per_ntype(F.tensor([0], F.int32)) with pytest.raises(dgl.utils.internal.InconsistentDtypeException): gpb.map_to_per_etype(F.tensor([0], F.int32)) - - if store_eids: - part_eids = part_g.edge_attributes[dgl.EID] - etype_ids, part_eids = gpb.map_to_per_etype(part_eids) - # `IdMap` is in int64 by default. - assert etype_ids.dtype == F.int64 - - # These are original per-type IDs. - for etype_id, etype in enumerate(g.canonical_etypes): - part_src_ids1 = F.boolean_mask(part_src_ids, etype_ids == etype_id) - src_ntype_ids1 = F.boolean_mask( - src_ntype_ids, etype_ids == etype_id - ) - part_dst_ids1 = F.boolean_mask(part_dst_ids, etype_ids == etype_id) - dst_ntype_ids1 = F.boolean_mask( - dst_ntype_ids, etype_ids == etype_id - ) - part_eids1 = F.boolean_mask(part_eids, etype_ids == etype_id) - assert np.all(F.asnumpy(src_ntype_ids1 == src_ntype_ids1[0])) - assert np.all(F.asnumpy(dst_ntype_ids1 == dst_ntype_ids1[0])) - src_ntype = g.ntypes[F.as_scalar(src_ntype_ids1[0])] - dst_ntype = g.ntypes[F.as_scalar(dst_ntype_ids1[0])] - - _verify_orig_IDs( - g, - orig_nids, - orig_eids, - part_eids1, - part_src_ids1, - part_dst_ids1, - src_ntype, - dst_ntype, - etype, - ) - orig_src_ids1 = F.gather_row(orig_nids[src_ntype], part_src_ids1) - orig_dst_ids1 = F.gather_row(orig_nids[dst_ntype], part_dst_ids1) - orig_eids1 = F.gather_row(orig_eids[etype], part_eids1) - orig_eids2 = g.edge_ids(orig_src_ids1, orig_dst_ids1, etype=etype) - assert len(orig_eids1) == len(orig_eids2) - assert np.all(F.asnumpy(orig_eids1) == F.asnumpy(orig_eids2)) + return ( + part_src_ids, + part_dst_ids, + src_ntype_ids, + part_src_ids, + dst_ntype_ids, + ) -def _verify_labels( +def _verify_graphbolt_part( g, - shuffled_labels, - shuffled_elabels, + test_dir, orig_nids, orig_eids, - test_ntype, - test_etype, + graph_name, + num_parts, + store_inner_node, + store_inner_edge, + store_eids, + test_ntype=None, + test_etype=None, + is_homo=False, + debug_mode=False, ): """ check list: - make sure node labels are correct. - make sure edge labels are correct. + _verify_graphbolt_metadata: + data type, ID's order and ID's number of edges and nodes + + _verify_graphbolt_local_and_map_id: + local id, mapping id + _verify_graphbolt_node_type_ID: + node type id + _verify_graphbolt_orig_IDs: + orig edge, hetero ntype id + verify_graph_feats: + nodes and edges' feats + _verify_argument_for_graphbolt: + arguments + _verify_graphbolt_shuffled_data: + id after reshuffle """ - shuffled_labels = F.asnumpy(F.cat(shuffled_labels, 0)) - shuffled_elabels = F.asnumpy(F.cat(shuffled_elabels, 0)) - orig_labels = np.zeros(shuffled_labels.shape, dtype=shuffled_labels.dtype) - orig_elabels = np.zeros( - shuffled_elabels.shape, dtype=shuffled_elabels.dtype + parts = [] + shuffled_labels = [] + shuffled_edata = [] + part_sizes = [] + part_config = os.path.join(test_dir, f"{graph_name}.json") + # test each part + for part_id in range(num_parts): + part_g, node_feats, edge_feats, gpb, _, _, _ = load_partition( + part_config, part_id, load_feats=True, use_graphbolt=True + ) + # verify_metadata + _verify_graphbolt_metadata(gpb, g, num_parts, part_id, part_sizes) + _verify_graphbolt_local_and_map_id( + part_g, + part_id, + gpb, + store_inner_node, + store_inner_edge, + store_eids, + is_homo=False, + ) + + # Verify the mapping between the reshuffled IDs and the original IDs. + ( + part_src_ids, + part_dst_ids, + src_ntype_ids, + part_src_ids, + dst_ntype_ids, + ) = _verify_graphbolt_node_type_ID(part_g, gpb) + + if store_eids: + _verify_graphbolt_orig_IDs( + part_g, + gpb, + g, + part_src_ids=part_src_ids, + part_dst_ids=part_dst_ids, + src_ntype_ids=src_ntype_ids, + dst_ntype_ids=dst_ntype_ids, + orig_nids=orig_nids, + orig_eids=orig_eids, + is_homo=is_homo, + ) + if test_ntype != None: + shuffled_labels.append(node_feats[test_ntype + "/labels"]) + shuffled_edata.append( + edge_feats[_etype_tuple_to_str(test_etype) + "/labels"] + ) + else: + shuffled_labels.append(node_feats["_N/labels"]) + shuffled_edata.append(edge_feats["_N:_E:_N/feats"]) + parts.append(part_g) + + verify_graph_feats( + g, + gpb, + part_g, + node_feats, + edge_feats, + orig_nids, + orig_eids, + store_eids, + store_inner_edge=store_inner_edge, + store_inner_node=store_inner_node, + use_graphbolt=True, + is_homo=is_homo, + ) + + _verify_argument_for_graphbolt( + parts, store_inner_node, store_inner_edge, store_eids, debug_mode ) - orig_labels[F.asnumpy(orig_nids[test_ntype])] = shuffled_labels - orig_elabels[F.asnumpy(orig_eids[test_etype])] = shuffled_elabels - assert np.all(orig_labels == F.asnumpy(g.nodes[test_ntype].data["labels"])) - assert np.all(orig_elabels == F.asnumpy(g.edges[test_etype].data["labels"])) + _verify_graphbolt_shuffled_data( + g, + gpb, + orig_nids, + orig_eids, + part_sizes, + shuffled_labels, + shuffled_edata, + test_ntype, + test_etype, + ) + return parts @pytest.mark.parametrize("part_method", ["metis", "random"]) @@ -1658,21 +1755,6 @@ def test_partition_graph_graphbolt_hetero( debug_mode, n_jobs=1, ): - """ - check list: - _verify_original_IDs_type: - number of edges and nodes' type and number of them in each type - - _verify_graphbolt_mapping_IDs: - mapping node and edge IDs - feats in graph - - _verify_hetero_graph: - number, order of elements in hetero graph - - _verify_labels: - labels of nodes and edges - """ test_ntype = "n1" test_etype = ("n1", "r1", "n2") reset_envs() @@ -1707,49 +1789,35 @@ def test_partition_graph_graphbolt_hetero( store_inner_edge=store_inner_edge, n_jobs=n_jobs, ) - _verify_original_IDs_type(hg, orig_nids, orig_eids) + # _verify_original_IDs_type(hg, orig_nids, orig_eids) + + assert len(orig_nids) == len(hg.ntypes) + assert len(orig_eids) == len(hg.canonical_etypes) + for ntype in hg.ntypes: + assert len(orig_nids[ntype]) == hg.num_nodes(ntype) + assert F.dtype(orig_nids[ntype]) in (F.int64, F.int32) + for etype in hg.canonical_etypes: + assert len(orig_eids[etype]) == hg.num_edges(etype) + assert F.dtype(orig_eids[etype]) in (F.int64, F.int32) if debug_mode: store_eids = store_inner_node = store_inner_edge = True - parts = [] - shuffled_labels = [] - shuffled_elabels = [] - part_config = os.path.join(test_dir, f"{graph_name}.json") - # test each part - for part_id in range(num_parts): - part_g, node_feats, edge_feats, gpb, _, _, _ = load_partition( - part_config, part_id, load_feats=True, use_graphbolt=True - ) - _verify_hetero_graphbolt_mapping_type( - hg, - part_g, - gpb, - orig_nids, - orig_eids, - store_eids=store_eids, - ) - verify_graph_feats( - hg, - gpb, - part_g, - node_feats, - edge_feats, - orig_nids, - orig_eids, - store_eids, - store_inner_edge=store_inner_edge, - store_inner_node=store_inner_node, - use_graphbolt=True, - ) + parts = _verify_graphbolt_part( + hg, + test_dir, + orig_nids, + orig_eids, + graph_name, + num_parts, + store_inner_node, + store_inner_edge, + store_eids, + test_ntype, + test_etype, + is_homo=False, + ) - shuffled_label = node_feats[test_ntype + "/labels"] - shuffled_elabel = edge_feats[ - _etype_tuple_to_str(test_etype) + "/labels" - ] - parts.append(part_g) - shuffled_labels.append(shuffled_label) - shuffled_elabels.append(shuffled_elabel) _verify_hetero_graph( hg, parts, @@ -1758,18 +1826,6 @@ def test_partition_graph_graphbolt_hetero( store_inner_edge=store_inner_edge, debug_mode=debug_mode, ) - _verify_augument_for_graphbolt( - parts, store_inner_node, store_inner_edge, store_eids, debug_mode - ) - _verify_labels( - hg, - shuffled_labels, - shuffled_elabels, - orig_nids, - orig_eids, - test_ntype, - test_etype, - ) @pytest.mark.parametrize("part_method", ["metis", "random"]) From ba77ab4a29f5e6bb6edd4d39629f11e4b0927614 Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Mon, 2 Sep 2024 03:06:58 +0000 Subject: [PATCH 22/39] [distGB] change test_partition.py --- tests/distributed/test_partition.py | 338 +++++++++++++++------------- 1 file changed, 176 insertions(+), 162 deletions(-) diff --git a/tests/distributed/test_partition.py b/tests/distributed/test_partition.py index d7bf640522fa..42d8670cb0cb 100644 --- a/tests/distributed/test_partition.py +++ b/tests/distributed/test_partition.py @@ -97,21 +97,12 @@ def _verify_argument_for_graphbolt( """ if not debug_mode: for part in parts: - if store_inner_edge: - assert "inner_edge" in part.edge_attributes - else: - assert "inner_edge" not in part.edge_attributes - if store_inner_node: - assert "inner_node" in part.node_attributes - else: - assert "inner_node" not in part.node_attributes - if store_eids: - assert dgl.EID in part.edge_attributes - else: - assert dgl.EID not in part.edge_attributes - - -def _verify_hetero_graph_elements_number( + assert store_inner_edge == ("inner_edge" in part.edge_attributes) + assert store_inner_node == ("inner_node" in part.node_attributes) + assert store_eids == (dgl.EID in part.edge_attributes) + + +def _verify_hetero_graph_node_edge_num( g, parts, store_inner_edge, @@ -122,6 +113,7 @@ def _verify_hetero_graph_elements_number( check list: make sure edge type are correct. make sure the number of nodes in each node type are correct. + make sure the number of nodes in each node type are correct. """ num_nodes = {ntype: 0 for ntype in g.ntypes} num_edges = {etype: 0 for etype in g.canonical_etypes} @@ -166,10 +158,45 @@ def _verify_hetero_graph_elements_number( assert g.num_edges(etype) == num_edges[etype] +def _verify_node_id_range(g, part, nids): + """ + check list: + make sure inner nodes have Ids fall into a range. + """ + for ntype in g.ntypes: + ntype_id = g.get_ntype_id(ntype) + # Make sure inner nodes have Ids fall into a range. + inner_node_mask = _get_inner_node_mask(part, ntype_id) + inner_nids = F.boolean_mask(part.ndata[dgl.NID], inner_node_mask) + assert np.all( + F.asnumpy( + inner_nids + == F.arange( + F.as_scalar(inner_nids[0]), + F.as_scalar(inner_nids[-1]) + 1, + ) + ) + ) + nids[ntype].append(inner_nids) + + +def _verify_node_edge_included(g, nids, eids): + for ntype in nids: + nids_type = F.cat(nids[ntype], 0) + uniq_ids = F.unique(nids_type) + # We should get all nodes. + assert len(uniq_ids) == g.num_nodes(ntype) + + for etype in eids: + eids_type = F.cat(eids[etype], 0) + uniq_ids = F.unique(eids_type) + # We should get all nodes. + assert len(uniq_ids) == g.num_edges(etype) + + def _verify_hetero_graph_attributes( g, parts, - store_eids, store_inner_edge, use_graphbolt, ): @@ -180,66 +207,34 @@ def _verify_hetero_graph_attributes( make sure all nodes is included. make sure all edges is included. """ - if store_eids or not use_graphbolt: - nids = {ntype: [] for ntype in g.ntypes} - eids = {etype: [] for etype in g.canonical_etypes} - for part in parts: - edata = part.edge_attributes if use_graphbolt else part.edata - etype = part.type_per_edge if use_graphbolt else edata[dgl.ETYPE] - eid = th.arange(len(edata[dgl.EID])) - etype_arr = F.gather_row(etype, eid) - eid_arr = F.gather_row(edata[dgl.EID], eid) - for etype in g.canonical_etypes: - etype_id = g.get_etype_id(etype) - eids[etype].append( - F.boolean_mask(eid_arr, etype_arr == etype_id) + nids = {ntype: [] for ntype in g.ntypes} + eids = {etype: [] for etype in g.canonical_etypes} + for part in parts: + edata = part.edge_attributes if use_graphbolt else part.edata + etype = part.type_per_edge if use_graphbolt else edata[dgl.ETYPE] + eid = th.arange(len(edata[dgl.EID])) + etype_arr = F.gather_row(etype, eid) + eid_arr = F.gather_row(edata[dgl.EID], eid) + for etype in g.canonical_etypes: + etype_id = g.get_etype_id(etype) + eids[etype].append(F.boolean_mask(eid_arr, etype_arr == etype_id)) + # Make sure edge Ids fall into a range. + if store_inner_edge or not use_graphbolt: + inner_edge_mask = _get_inner_edge_mask( + part, etype_id, use_graphbolt=use_graphbolt + ) + inner_eids = np.sort( + F.asnumpy(F.boolean_mask(edata[dgl.EID], inner_edge_mask)) + ) + assert np.all( + inner_eids == np.arange(inner_eids[0], inner_eids[-1] + 1) ) - # Make sure edge Ids fall into a range. - if store_inner_edge or not use_graphbolt: - inner_edge_mask = _get_inner_edge_mask( - part, etype_id, use_graphbolt=use_graphbolt - ) - inner_eids = np.sort( - F.asnumpy( - F.boolean_mask(edata[dgl.EID], inner_edge_mask) - ) - ) - assert np.all( - inner_eids - == np.arange(inner_eids[0], inner_eids[-1] + 1) - ) - - if not use_graphbolt: - for ntype in g.ntypes: - ntype_id = g.get_ntype_id(ntype) - # Make sure inner nodes have Ids fall into a range. - inner_node_mask = _get_inner_node_mask(part, ntype_id) - inner_nids = F.boolean_mask( - part.ndata[dgl.NID], inner_node_mask - ) - assert np.all( - F.asnumpy( - inner_nids - == F.arange( - F.as_scalar(inner_nids[0]), - F.as_scalar(inner_nids[-1]) + 1, - ) - ) - ) - nids[ntype].append(inner_nids) if not use_graphbolt: - for ntype in nids: - nids_type = F.cat(nids[ntype], 0) - uniq_ids = F.unique(nids_type) - # We should get all nodes. - assert len(uniq_ids) == g.num_nodes(ntype) + _verify_node_id_range(g, part, nids) - for etype in eids: - eids_type = F.cat(eids[etype], 0) - uniq_ids = F.unique(eids_type) - # We should get all nodes. - assert len(uniq_ids) == g.num_edges(etype) + if not use_graphbolt: + _verify_node_edge_included(g, nids, eids) def _verify_hetero_graph( @@ -250,20 +245,77 @@ def _verify_hetero_graph( store_inner_edge=False, debug_mode=False, ): - _verify_hetero_graph_elements_number( + _verify_hetero_graph_node_edge_num( g, parts, store_inner_edge=store_inner_edge, use_graphbolt=use_graphbolt, debug_mode=debug_mode, ) - _verify_hetero_graph_attributes( - g, - parts, - store_eids=store_eids, - store_inner_edge=store_inner_edge, - use_graphbolt=use_graphbolt, - ) + if store_eids or not use_graphbolt: + _verify_hetero_graph_attributes( + g, + parts, + store_inner_edge=store_inner_edge, + use_graphbolt=use_graphbolt, + ) + + +def _verify_node_feats( + g, part, gpb, orig_nids, node_feats, use_graphbolt, is_homo +): + for ntype in g.ntypes: + ndata = part.node_attributes if use_graphbolt else part.ndata + ntype_id = g.get_ntype_id(ntype) + inner_node_mask = _get_inner_node_mask(part, ntype_id, use_graphbolt) + inner_nids = F.boolean_mask(ndata[dgl.NID], inner_node_mask) + ntype_ids, inner_type_nids = gpb.map_to_per_ntype(inner_nids) + partid = gpb.nid2partid(inner_type_nids, ntype) + assert np.all(F.asnumpy(ntype_ids) == ntype_id) + assert np.all(F.asnumpy(partid) == gpb.partid) + + if is_homo: + orig_id = orig_nids[inner_type_nids] + else: + orig_id = orig_nids[ntype][inner_type_nids] + local_nids = gpb.nid2localnid(inner_type_nids, gpb.partid, ntype) + + for name in g.nodes[ntype].data: + if name in [dgl.NID, "inner_node"]: + continue + true_feats = F.gather_row(g.nodes[ntype].data[name], orig_id) + ndata = F.gather_row(node_feats[ntype + "/" + name], local_nids) + assert np.all(F.asnumpy(ndata == true_feats)) + + +def _verify_edge_feats( + g, part, gpb, orig_eids, edge_feats, use_graphbolt, is_homo +): + for etype in g.canonical_etypes: + edata = part.edge_attributes if use_graphbolt else part.edata + etype_id = g.get_etype_id(etype) + inner_edge_mask = _get_inner_edge_mask(part, etype_id, use_graphbolt) + inner_eids = F.boolean_mask(edata[dgl.EID], inner_edge_mask) + etype_ids, inner_type_eids = gpb.map_to_per_etype(inner_eids) + partid = gpb.eid2partid(inner_type_eids, etype) + assert np.all(F.asnumpy(etype_ids) == etype_id) + assert np.all(F.asnumpy(partid) == gpb.partid) + + if is_homo: + orig_id = orig_eids[inner_type_eids] + else: + orig_id = orig_eids[etype][inner_type_eids] + local_eids = gpb.eid2localeid(inner_type_eids, gpb.partid, etype) + + for name in g.edges[etype].data: + if name in [dgl.EID, "inner_edge"]: + continue + true_feats = F.gather_row(g.edges[etype].data[name], orig_id) + edata = F.gather_row( + edge_feats[_etype_tuple_to_str(etype) + "/" + name], + local_eids, + ) + assert np.all(F.asnumpy(edata == true_feats)) def verify_graph_feats( @@ -285,59 +337,14 @@ def verify_graph_feats( make sure the feats of nodes and edges are correct """ if (is_homo and store_inner_node) or not use_graphbolt: - for ntype in g.ntypes: - ndata = part.node_attributes if use_graphbolt else part.ndata - ntype_id = g.get_ntype_id(ntype) - inner_node_mask = _get_inner_node_mask( - part, ntype_id, use_graphbolt - ) - inner_nids = F.boolean_mask(ndata[dgl.NID], inner_node_mask) - ntype_ids, inner_type_nids = gpb.map_to_per_ntype(inner_nids) - partid = gpb.nid2partid(inner_type_nids, ntype) - assert np.all(F.asnumpy(ntype_ids) == ntype_id) - assert np.all(F.asnumpy(partid) == gpb.partid) - - if is_homo: - orig_id = orig_nids[inner_type_nids] - else: - orig_id = orig_nids[ntype][inner_type_nids] - local_nids = gpb.nid2localnid(inner_type_nids, gpb.partid, ntype) - - for name in g.nodes[ntype].data: - if name in [dgl.NID, "inner_node"]: - continue - true_feats = F.gather_row(g.nodes[ntype].data[name], orig_id) - ndata = F.gather_row(node_feats[ntype + "/" + name], local_nids) - assert np.all(F.asnumpy(ndata == true_feats)) + _verify_node_feats( + g, part, gpb, orig_nids, node_feats, use_graphbolt, is_homo + ) if (store_inner_edge and store_eids) or not use_graphbolt: - for etype in g.canonical_etypes: - edata = part.edge_attributes if use_graphbolt else part.edata - etype_id = g.get_etype_id(etype) - inner_edge_mask = _get_inner_edge_mask( - part, etype_id, use_graphbolt - ) - inner_eids = F.boolean_mask(edata[dgl.EID], inner_edge_mask) - etype_ids, inner_type_eids = gpb.map_to_per_etype(inner_eids) - partid = gpb.eid2partid(inner_type_eids, etype) - assert np.all(F.asnumpy(etype_ids) == etype_id) - assert np.all(F.asnumpy(partid) == gpb.partid) - - if is_homo: - orig_id = orig_eids[inner_type_eids] - else: - orig_id = orig_eids[etype][inner_type_eids] - local_eids = gpb.eid2localeid(inner_type_eids, gpb.partid, etype) - - for name in g.edges[etype].data: - if name in [dgl.EID, "inner_edge"]: - continue - true_feats = F.gather_row(g.edges[etype].data[name], orig_id) - edata = F.gather_row( - edge_feats[_etype_tuple_to_str(etype) + "/" + name], - local_eids, - ) - assert np.all(F.asnumpy(edata == true_feats)) + _verify_edge_feats( + g, part, gpb, orig_eids, edge_feats, use_graphbolt, is_homo + ) def check_hetero_partition( @@ -1205,7 +1212,7 @@ def _get_part_IDs(part_g): return part_src_ids, part_dst_ids -def _verify_orig_edge_IDs( +def _verify_orig_edge_IDs_gb( g, orig_nids, orig_eids, @@ -1236,7 +1243,7 @@ def _verify_orig_edge_IDs( assert np.all(F.asnumpy(orig_eids1) == F.asnumpy(orig_eids2)) -def _verify_graphbolt_metadata(gpb, g, num_parts, part_id, part_sizes): +def _verify_metadata_gb(gpb, g, num_parts, part_id, part_sizes): """ check list: make sure the number of nodes and edges is correct. @@ -1256,7 +1263,7 @@ def _verify_graphbolt_metadata(gpb, g, num_parts, part_id, part_sizes): ) -def _verify_graphbolt_local_id(part_g, part_id, gpb): +def _verify_local_id_gb(part_g, part_id, gpb): """ check list: make sure the type of local id is correct. @@ -1279,7 +1286,7 @@ def _verify_graphbolt_local_id(part_g, part_id, gpb): return local_nid, local_eid -def _verify_graphbolt_map( +def _verify_map_gb( part_g, part_id, gpb, @@ -1321,7 +1328,7 @@ def _verify_graphbolt_map( return local_nodes, local_edges -def _verify_graphbolt_local_and_map_id( +def _verify_local_and_map_id_gb( part_g, part_id, gpb, @@ -1343,10 +1350,10 @@ def _verify_graphbolt_local_and_map_id( """ local_nid = local_eid = None if store_inner_node and store_inner_edge and store_eids: - local_nid, local_eid = _verify_graphbolt_local_id(part_g, part_id, gpb) - local_nodes, local_edges = _verify_graphbolt_map(part_g, part_id, gpb) + local_nid, local_eid = _verify_local_id_gb(part_g, part_id, gpb) + local_nodes, local_edges = _verify_map_gb(part_g, part_id, gpb) if is_homo: - _verify_homo_graphbolt_feat( + _verify_feat_homo_gb( g, local_nodes, local_edges, @@ -1359,7 +1366,7 @@ def _verify_graphbolt_local_and_map_id( ) -def _verify_homo_graphbolt_feat( +def _verify_feat_homo_gb( g, local_nodes, local_edges, @@ -1392,7 +1399,7 @@ def _verify_homo_graphbolt_feat( assert np.all(F.asnumpy(true_feats) == F.asnumpy(edata)) -def _verify_graphbolt_orig_IDs( +def _verify_orig_IDs_gb( part_g, gpb, g, @@ -1411,7 +1418,7 @@ def _verify_graphbolt_orig_IDs( """ part_eids = part_g.edge_attributes[dgl.EID] if is_homo: - _verify_orig_edge_IDs( + _verify_orig_edge_IDs_gb( g, orig_nids, orig_eids, part_eids, part_src_ids, part_dst_ids ) local_orig_nids = orig_nids[part_g.node_attributes[dgl.NID]] @@ -1443,7 +1450,7 @@ def _verify_graphbolt_orig_IDs( src_ntype = g.ntypes[F.as_scalar(src_ntype_ids1[0])] dst_ntype = g.ntypes[F.as_scalar(dst_ntype_ids1[0])] - _verify_orig_edge_IDs( + _verify_orig_edge_IDs_gb( g, orig_nids, orig_eids, @@ -1515,7 +1522,7 @@ def test_partition_graph_graphbolt_homo( ) -def _verify_graphbolt_shuffled_data( +def _verify_shuffled_data_gb( g, gpb, orig_nids, @@ -1532,7 +1539,7 @@ def _verify_graphbolt_shuffled_data( make sure nodes and edges' id are correct. """ # Verify shuffled node/edge data for original IDs. - _verify_shuffled_labels( + _verify_shuffled_labels_gb( g, shuffled_labels, shuffled_elabels, @@ -1557,7 +1564,7 @@ def _verify_graphbolt_shuffled_data( assert np.all(F.asnumpy(eid2pid) == edge_map) -def _verify_shuffled_labels( +def _verify_shuffled_labels_gb( g, shuffled_labels, shuffled_edata, @@ -1595,7 +1602,7 @@ def _verify_shuffled_labels( assert np.all(orig_elabels == F.asnumpy(edata)) -def _verify_graphbolt_node_type_ID(part_g, gpb): +def _verify_node_type_ID_gb(part_g, gpb): """ check list: make sure ntype id have correct data type @@ -1665,8 +1672,8 @@ def _verify_graphbolt_part( part_config, part_id, load_feats=True, use_graphbolt=True ) # verify_metadata - _verify_graphbolt_metadata(gpb, g, num_parts, part_id, part_sizes) - _verify_graphbolt_local_and_map_id( + _verify_metadata_gb(gpb, g, num_parts, part_id, part_sizes) + _verify_local_and_map_id_gb( part_g, part_id, gpb, @@ -1683,10 +1690,10 @@ def _verify_graphbolt_part( src_ntype_ids, part_src_ids, dst_ntype_ids, - ) = _verify_graphbolt_node_type_ID(part_g, gpb) + ) = _verify_node_type_ID_gb(part_g, gpb) if store_eids: - _verify_graphbolt_orig_IDs( + _verify_orig_IDs_gb( part_g, gpb, g, @@ -1726,7 +1733,7 @@ def _verify_graphbolt_part( _verify_argument_for_graphbolt( parts, store_inner_node, store_inner_edge, store_eids, debug_mode ) - _verify_graphbolt_shuffled_data( + _verify_shuffled_data_gb( g, gpb, orig_nids, @@ -1740,6 +1747,22 @@ def _verify_graphbolt_part( return parts +def _verify_original_IDs_type_hetero(hg, orig_nids, orig_eids): + """ + check list: + make sure type of nodes and edges' ids are correct. + make sure nodes and edges' number in each type is correct. + """ + assert len(orig_nids) == len(hg.ntypes) + assert len(orig_eids) == len(hg.canonical_etypes) + for ntype in hg.ntypes: + assert len(orig_nids[ntype]) == hg.num_nodes(ntype) + assert F.dtype(orig_nids[ntype]) in (F.int64, F.int32) + for etype in hg.canonical_etypes: + assert len(orig_eids[etype]) == hg.num_edges(etype) + assert F.dtype(orig_eids[etype]) in (F.int64, F.int32) + + @pytest.mark.parametrize("part_method", ["metis", "random"]) @pytest.mark.parametrize("num_parts", [1, 4]) @pytest.mark.parametrize("store_eids", [True, False]) @@ -1789,17 +1812,8 @@ def test_partition_graph_graphbolt_hetero( store_inner_edge=store_inner_edge, n_jobs=n_jobs, ) - # _verify_original_IDs_type(hg, orig_nids, orig_eids) - - assert len(orig_nids) == len(hg.ntypes) - assert len(orig_eids) == len(hg.canonical_etypes) - for ntype in hg.ntypes: - assert len(orig_nids[ntype]) == hg.num_nodes(ntype) - assert F.dtype(orig_nids[ntype]) in (F.int64, F.int32) - for etype in hg.canonical_etypes: - assert len(orig_eids[etype]) == hg.num_edges(etype) - assert F.dtype(orig_eids[etype]) in (F.int64, F.int32) + _verify_original_IDs_type_hetero(hg, orig_nids, orig_eids) if debug_mode: store_eids = store_inner_node = store_inner_edge = True From f29cb5f55cd2622b4f1c9973a46555a0778adf52 Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Mon, 2 Sep 2024 03:48:57 +0000 Subject: [PATCH 23/39] [distGB] change test_partition.py --- tests/distributed/test_partition.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/tests/distributed/test_partition.py b/tests/distributed/test_partition.py index 42d8670cb0cb..44724802278a 100644 --- a/tests/distributed/test_partition.py +++ b/tests/distributed/test_partition.py @@ -89,17 +89,16 @@ def create_random_hetero(): def _verify_argument_for_graphbolt( - parts, store_inner_node, store_inner_edge, store_eids, debug_mode + parts, store_inner_node, store_inner_edge, store_eids ): """ check list: make sure arguments work. """ - if not debug_mode: - for part in parts: - assert store_inner_edge == ("inner_edge" in part.edge_attributes) - assert store_inner_node == ("inner_node" in part.node_attributes) - assert store_eids == (dgl.EID in part.edge_attributes) + for part in parts: + assert store_inner_edge == ("inner_edge" in part.edge_attributes) + assert store_inner_node == ("inner_node" in part.node_attributes) + assert store_eids == (dgl.EID in part.edge_attributes) def _verify_hetero_graph_node_edge_num( @@ -1731,7 +1730,7 @@ def _verify_graphbolt_part( ) _verify_argument_for_graphbolt( - parts, store_inner_node, store_inner_edge, store_eids, debug_mode + parts, store_inner_node, store_inner_edge, store_eids ) _verify_shuffled_data_gb( g, From da09a92ef6a28333413f9ddcf5d8a1fbb129fd5b Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Mon, 2 Sep 2024 03:59:05 +0000 Subject: [PATCH 24/39] [distGB] change test_partition.py --- tests/distributed/test_partition.py | 58 ++++++++++++++++------------- 1 file changed, 33 insertions(+), 25 deletions(-) diff --git a/tests/distributed/test_partition.py b/tests/distributed/test_partition.py index 44724802278a..4d9d77032ae5 100644 --- a/tests/distributed/test_partition.py +++ b/tests/distributed/test_partition.py @@ -157,7 +157,31 @@ def _verify_hetero_graph_node_edge_num( assert g.num_edges(etype) == num_edges[etype] -def _verify_node_id_range(g, part, nids): +def _verify_edge_id_range_hetero_gb( + g, part, eids, use_graphbolt, store_inner_edge +): + edata = part.edge_attributes if use_graphbolt else part.edata + etype = part.type_per_edge if use_graphbolt else edata[dgl.ETYPE] + eid = th.arange(len(edata[dgl.EID])) + etype_arr = F.gather_row(etype, eid) + eid_arr = F.gather_row(edata[dgl.EID], eid) + for etype in g.canonical_etypes: + etype_id = g.get_etype_id(etype) + eids[etype].append(F.boolean_mask(eid_arr, etype_arr == etype_id)) + # Make sure edge Ids fall into a range. + if store_inner_edge or not use_graphbolt: + inner_edge_mask = _get_inner_edge_mask( + part, etype_id, use_graphbolt=use_graphbolt + ) + inner_eids = np.sort( + F.asnumpy(F.boolean_mask(edata[dgl.EID], inner_edge_mask)) + ) + assert np.all( + inner_eids == np.arange(inner_eids[0], inner_eids[-1] + 1) + ) + + +def _verify_node_id_range_hetero_gb(g, part, nids): """ check list: make sure inner nodes have Ids fall into a range. @@ -179,7 +203,7 @@ def _verify_node_id_range(g, part, nids): nids[ntype].append(inner_nids) -def _verify_node_edge_included(g, nids, eids): +def _verify_node_edge_included_hetero_gb(g, nids, eids): for ntype in nids: nids_type = F.cat(nids[ntype], 0) uniq_ids = F.unique(nids_type) @@ -193,7 +217,7 @@ def _verify_node_edge_included(g, nids, eids): assert len(uniq_ids) == g.num_edges(etype) -def _verify_hetero_graph_attributes( +def _verify_graph_attributes_hetero_gb( g, parts, store_inner_edge, @@ -209,31 +233,15 @@ def _verify_hetero_graph_attributes( nids = {ntype: [] for ntype in g.ntypes} eids = {etype: [] for etype in g.canonical_etypes} for part in parts: - edata = part.edge_attributes if use_graphbolt else part.edata - etype = part.type_per_edge if use_graphbolt else edata[dgl.ETYPE] - eid = th.arange(len(edata[dgl.EID])) - etype_arr = F.gather_row(etype, eid) - eid_arr = F.gather_row(edata[dgl.EID], eid) - for etype in g.canonical_etypes: - etype_id = g.get_etype_id(etype) - eids[etype].append(F.boolean_mask(eid_arr, etype_arr == etype_id)) - # Make sure edge Ids fall into a range. - if store_inner_edge or not use_graphbolt: - inner_edge_mask = _get_inner_edge_mask( - part, etype_id, use_graphbolt=use_graphbolt - ) - inner_eids = np.sort( - F.asnumpy(F.boolean_mask(edata[dgl.EID], inner_edge_mask)) - ) - assert np.all( - inner_eids == np.arange(inner_eids[0], inner_eids[-1] + 1) - ) + _verify_edge_id_range_hetero_gb( + g, part, eids, use_graphbolt, store_inner_edge + ) if not use_graphbolt: - _verify_node_id_range(g, part, nids) + _verify_node_id_range_hetero_gb(g, part, nids) if not use_graphbolt: - _verify_node_edge_included(g, nids, eids) + _verify_node_edge_included_hetero_gb(g, nids, eids) def _verify_hetero_graph( @@ -252,7 +260,7 @@ def _verify_hetero_graph( debug_mode=debug_mode, ) if store_eids or not use_graphbolt: - _verify_hetero_graph_attributes( + _verify_graph_attributes_hetero_gb( g, parts, store_inner_edge=store_inner_edge, From be69e1e9b1f183e023a2ab724099eed03350e859 Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Thu, 5 Sep 2024 11:01:04 +0000 Subject: [PATCH 25/39] [distGB] modify partition.py --- python/dgl/distributed/partition.py | 36 +- tests/distributed/test_partition.py | 580 +++++++++++++++------------- 2 files changed, 326 insertions(+), 290 deletions(-) diff --git a/python/dgl/distributed/partition.py b/python/dgl/distributed/partition.py index 2d3029faa835..f0910cd89d03 100644 --- a/python/dgl/distributed/partition.py +++ b/python/dgl/distributed/partition.py @@ -116,24 +116,38 @@ def _save_dgl_graphs(filename, g_list, formats=None, sort_etypes=False): save_graphs(filename, g_list, formats=formats) -def _get_inner_node_mask(graph, ntype_id, use_graphbolt=False): - ndata = graph.node_attributes if use_graphbolt else graph.ndata - assert "inner_node" in ndata, '"inner_node" is not nodes\' data' - if NTYPE in ndata: - dtype = F.dtype(ndata["inner_node"]) - return ( - ndata["inner_node"] * F.astype(ndata[NTYPE] == ntype_id, dtype) == 1 +def _get_inner_node_mask(graph, ntype_id, gpb=None): + ndata = ( + graph.node_attributes + if isinstance(graph, gb.FusedCSCSamplingGraph) + else graph.ndata + ) + assert "inner_node" in ndata, "'inner_node' is not in nodes' data" + if NTYPE in ndata or gpb is not None: + ntype = ( + gpb.map_to_per_ntype(ndata[NID])[0] + if gpb is not None + else ndata[NTYPE] ) + dtype = F.dtype(ndata["inner_node"]) + return ndata["inner_node"] * F.astype(ntype == ntype_id, dtype) == 1 else: return ndata["inner_node"] == 1 -def _get_inner_edge_mask(graph, etype_id, use_graphbolt=False): - edata = graph.edge_attributes if use_graphbolt else graph.edata - assert "inner_edge" in edata, "'inner_edge' is not edges\' data" +def _get_inner_edge_mask( + graph, + etype_id, +): + edata = ( + graph.edge_attributes + if isinstance(graph, gb.FusedCSCSamplingGraph) + else graph.edata + ) + assert "inner_edge" in edata, "'inner_edge' is not in edges' data" etype = ( graph.type_per_edge - if use_graphbolt + if isinstance(graph, gb.FusedCSCSamplingGraph) else (graph.edata[ETYPE] if ETYPE in graph.edata else None) ) if etype is not None: diff --git a/tests/distributed/test_partition.py b/tests/distributed/test_partition.py index e683c0a01f48..32e2bdc4fea9 100644 --- a/tests/distributed/test_partition.py +++ b/tests/distributed/test_partition.py @@ -5,6 +5,7 @@ import dgl import dgl.backend as F +import dgl.graphbolt as gb import numpy as np import pytest import torch as th @@ -35,13 +36,21 @@ from utils import reset_envs -def _verify_partition_data_types(part_g, use_graphbolt=False): +def _verify_partition_data_types(part_g): """ check list: make sure nodes and edges have correct type. """ - ndata = part_g.node_attributes if use_graphbolt else part_g.ndata - edata = part_g.edge_attributes if use_graphbolt else part_g.edata + ndata = ( + part_g.node_attributes + if isinstance(part_g, gb.FusedCSCSamplingGraph) + else part_g.ndata + ) + edata = ( + part_g.edge_attributes + if isinstance(part_g, gb.FusedCSCSamplingGraph) + else part_g.edata + ) for k, dtype in RESERVED_FIELD_DTYPE.items(): if k in ndata: @@ -88,7 +97,7 @@ def create_random_hetero(): return dgl.heterograph(edges, num_nodes) -def _verify_argument_for_graphbolt( +def _verify_graphbolt_attributes( parts, store_inner_node, store_inner_edge, store_eids ): """ @@ -105,7 +114,6 @@ def _verify_hetero_graph_node_edge_num( g, parts, store_inner_edge, - use_graphbolt, debug_mode, ): """ @@ -117,28 +125,28 @@ def _verify_hetero_graph_node_edge_num( num_nodes = {ntype: 0 for ntype in g.ntypes} num_edges = {etype: 0 for etype in g.canonical_etypes} for part in parts: - edata = part.edge_attributes if use_graphbolt else part.edata + edata = ( + part.edge_attributes + if isinstance(part, gb.FusedCSCSamplingGraph) + else part.edata + ) if dgl.ETYPE in edata: assert len(g.canonical_etypes) == len(F.unique(edata[dgl.ETYPE])) - if debug_mode or not use_graphbolt: + if debug_mode or isinstance(part, dgl.DGLGraph): for ntype in g.ntypes: ntype_id = g.get_ntype_id(ntype) - inner_node_mask = _get_inner_node_mask( - part, ntype_id, use_graphbolt - ) + inner_node_mask = _get_inner_node_mask(part, ntype_id) num_inner_nodes = F.sum(F.astype(inner_node_mask, F.int64), 0) num_nodes[ntype] += num_inner_nodes - if store_inner_edge or not use_graphbolt: + if store_inner_edge or isinstance(part, dgl.DGLGraph): for etype in g.canonical_etypes: etype_id = g.get_etype_id(etype) - inner_edge_mask = _get_inner_edge_mask( - part, etype_id, use_graphbolt - ) + inner_edge_mask = _get_inner_edge_mask(part, etype_id) num_inner_edges = F.sum(F.astype(inner_edge_mask, F.int64), 0) num_edges[etype] += num_inner_edges # Verify the number of nodes are correct. - if debug_mode or not use_graphbolt: + if debug_mode or isinstance(part, dgl.DGLGraph): for ntype in g.ntypes: print( "node {}: {}, {}".format( @@ -147,7 +155,7 @@ def _verify_hetero_graph_node_edge_num( ) assert g.num_nodes(ntype) == num_nodes[ntype] # Verify the number of edges are correct. - if store_inner_edge or not use_graphbolt: + if store_inner_edge or isinstance(part, dgl.DGLGraph): for etype in g.canonical_etypes: print( "edge {}: {}, {}".format( @@ -155,16 +163,28 @@ def _verify_hetero_graph_node_edge_num( ) ) assert g.num_edges(etype) == num_edges[etype] - elif not store_inner_edge: - assert "inner_edge" not in parts[0].edge_attributes - -def _verify_edge_id_range_hetero_gb( - g, part, eids, use_graphbolt, store_inner_edge +def _verify_edge_id_range_hetero( + g, + part, + eids, ): - edata = part.edge_attributes if use_graphbolt else part.edata - etype = part.type_per_edge if use_graphbolt else edata[dgl.ETYPE] + """ + check list: + make sure inner_eids fall into a range. + make sure all edges are included. + """ + edata = ( + part.edge_attributes + if isinstance(part, gb.FusedCSCSamplingGraph) + else part.edata + ) + etype = ( + part.type_per_edge + if isinstance(part, gb.FusedCSCSamplingGraph) + else edata[dgl.ETYPE] + ) eid = th.arange(len(edata[dgl.EID])) etype_arr = F.gather_row(etype, eid) eid_arr = F.gather_row(edata[dgl.EID], eid) @@ -172,19 +192,17 @@ def _verify_edge_id_range_hetero_gb( etype_id = g.get_etype_id(etype) eids[etype].append(F.boolean_mask(eid_arr, etype_arr == etype_id)) # Make sure edge Ids fall into a range. - if store_inner_edge or not use_graphbolt: - inner_edge_mask = _get_inner_edge_mask( - part, etype_id, use_graphbolt=use_graphbolt - ) - inner_eids = np.sort( - F.asnumpy(F.boolean_mask(edata[dgl.EID], inner_edge_mask)) - ) - assert np.all( - inner_eids == np.arange(inner_eids[0], inner_eids[-1] + 1) - ) + inner_edge_mask = _get_inner_edge_mask(part, etype_id) + inner_eids = np.sort( + F.asnumpy(F.boolean_mask(edata[dgl.EID], inner_edge_mask)) + ) + assert np.all( + inner_eids == np.arange(inner_eids[0], inner_eids[-1] + 1) + ) + return eids -def _verify_node_id_range_hetero_gb(g, part, nids): +def _verify_node_id_range_hetero(g, part, nids): """ check list: make sure inner nodes have Ids fall into a range. @@ -193,7 +211,9 @@ def _verify_node_id_range_hetero_gb(g, part, nids): ntype_id = g.get_ntype_id(ntype) # Make sure inner nodes have Ids fall into a range. inner_node_mask = _get_inner_node_mask(part, ntype_id) - inner_nids = F.boolean_mask(part.ndata[dgl.NID], inner_node_mask) + inner_nids = F.boolean_mask( + part.node_attributes[dgl.NID], inner_node_mask + ) assert np.all( F.asnumpy( inner_nids @@ -204,27 +224,14 @@ def _verify_node_id_range_hetero_gb(g, part, nids): ) ) nids[ntype].append(inner_nids) + return nids -def _verify_node_edge_included_hetero_gb(g, nids, eids): - for ntype in nids: - nids_type = F.cat(nids[ntype], 0) - uniq_ids = F.unique(nids_type) - # We should get all nodes. - assert len(uniq_ids) == g.num_nodes(ntype) - - for etype in eids: - eids_type = F.cat(eids[etype], 0) - uniq_ids = F.unique(eids_type) - # We should get all nodes. - assert len(uniq_ids) == g.num_edges(etype) - - -def _verify_graph_attributes_hetero_gb( +def _verify_graph_attributes_hetero( g, parts, store_inner_edge, - use_graphbolt, + store_inner_node, ): """ check list: @@ -235,54 +242,70 @@ def _verify_graph_attributes_hetero_gb( """ nids = {ntype: [] for ntype in g.ntypes} eids = {etype: [] for etype in g.canonical_etypes} - for part in parts: - _verify_edge_id_range_hetero_gb( - g, part, eids, use_graphbolt, store_inner_edge - ) - - if not use_graphbolt: - _verify_node_id_range_hetero_gb(g, part, nids) - - if not use_graphbolt: - _verify_node_edge_included_hetero_gb(g, nids, eids) + # check edge id. + if store_inner_edge or isinstance(parts[0], dgl.DGLGraph): + for part in parts: + # collect eids + eids = _verify_edge_id_range_hetero(g, part, eids) + for etype in eids: + eids_type = F.cat(eids[etype], 0) + uniq_ids = F.unique(eids_type) + # We should get all nodes. + assert len(uniq_ids) == g.num_edges(etype) + + # check node id. + if store_inner_node or isinstance(parts[0], dgl.DGLGraph): + for part in parts: + nids = _verify_node_id_range_hetero(g, part, nids) + for ntype in nids: + nids_type = F.cat(nids[ntype], 0) + uniq_ids = F.unique(nids_type) + # We should get all nodes. + assert len(uniq_ids) == g.num_nodes(ntype) def _verify_hetero_graph( g, parts, - use_graphbolt=False, store_eids=False, store_inner_edge=False, + store_inner_node=False, debug_mode=False, ): _verify_hetero_graph_node_edge_num( g, parts, store_inner_edge=store_inner_edge, - use_graphbolt=use_graphbolt, debug_mode=debug_mode, ) - if store_eids or not use_graphbolt: - _verify_graph_attributes_hetero_gb( + if store_eids: + _verify_graph_attributes_hetero( g, parts, store_inner_edge=store_inner_edge, - use_graphbolt=use_graphbolt, + store_inner_node=store_inner_node, ) -def _verify_node_feats( - g, part, gpb, orig_nids, node_feats, use_graphbolt, is_homo -): +def _verify_node_feats(g, part, gpb, orig_nids, node_feats, is_homo=False): for ntype in g.ntypes: - ndata = part.node_attributes if use_graphbolt else part.ndata + ndata = ( + part.node_attributes + if isinstance(part, gb.FusedCSCSamplingGraph) + else part.ndata + ) ntype_id = g.get_ntype_id(ntype) - inner_node_mask = _get_inner_node_mask(part, ntype_id, use_graphbolt) + inner_node_mask = _get_inner_node_mask( + part, + ntype_id, + (gpb if isinstance(part, gb.FusedCSCSamplingGraph) else None), + ) inner_nids = F.boolean_mask(ndata[dgl.NID], inner_node_mask) ntype_ids, inner_type_nids = gpb.map_to_per_ntype(inner_nids) partid = gpb.nid2partid(inner_type_nids, ntype) - assert np.all(F.asnumpy(ntype_ids) == ntype_id) - assert np.all(F.asnumpy(partid) == gpb.partid) + if is_homo: + assert np.all(F.asnumpy(ntype_ids) == ntype_id) + assert np.all(F.asnumpy(partid) == gpb.partid) if is_homo: orig_id = orig_nids[inner_type_nids] @@ -298,13 +321,15 @@ def _verify_node_feats( assert np.all(F.asnumpy(ndata == true_feats)) -def _verify_edge_feats( - g, part, gpb, orig_eids, edge_feats, use_graphbolt, is_homo -): +def _verify_edge_feats(g, part, gpb, orig_eids, edge_feats, is_homo=False): for etype in g.canonical_etypes: - edata = part.edge_attributes if use_graphbolt else part.edata + edata = ( + part.edge_attributes + if isinstance(part, gb.FusedCSCSamplingGraph) + else part.edata + ) etype_id = g.get_etype_id(etype) - inner_edge_mask = _get_inner_edge_mask(part, etype_id, use_graphbolt) + inner_edge_mask = _get_inner_edge_mask(part, etype_id) inner_eids = F.boolean_mask(edata[dgl.EID], inner_edge_mask) etype_ids, inner_type_eids = gpb.map_to_per_etype(inner_eids) partid = gpb.eid2partid(inner_type_eids, etype) @@ -328,7 +353,7 @@ def _verify_edge_feats( assert np.all(F.asnumpy(edata == true_feats)) -def verify_graph_feats( +def verify_graph_feats_hetero_dgl( g, gpb, part, @@ -336,25 +361,70 @@ def verify_graph_feats( edge_feats, orig_nids, orig_eids, - store_eids=False, - store_inner_edge=False, +): + """ + check list: + make sure the feats of nodes and edges are correct + """ + _verify_node_feats(g, part, gpb, orig_nids, node_feats) + + _verify_edge_feats(g, part, gpb, orig_eids, edge_feats) + + +def verify_graph_feats_gb( + g, + gpbs, + parts, + tot_node_feats, + tot_edge_feats, + orig_nids, + orig_eids, + shuffled_labels, + shuffled_edata, + test_ntype, + test_etype, store_inner_node=False, - use_graphbolt=False, + store_inner_edge=False, + store_eids=False, is_homo=False, ): """ check list: make sure the feats of nodes and edges are correct """ - if (is_homo and store_inner_node) or not use_graphbolt: - _verify_node_feats( - g, part, gpb, orig_nids, node_feats, use_graphbolt, is_homo - ) + for part_id in range(len(parts)): + part = parts[part_id] + gpb = gpbs[part_id] + node_feats = tot_node_feats[part_id] + edge_feats = tot_edge_feats[part_id] + if store_inner_node: + _verify_node_feats( + g, + part, + gpb, + orig_nids, + node_feats, + is_homo=is_homo, + ) + if store_inner_edge and store_eids: + _verify_edge_feats( + g, + part, + gpb, + orig_eids, + edge_feats, + is_homo=is_homo, + ) - if (store_inner_edge and store_eids) or not use_graphbolt: - _verify_edge_feats( - g, part, gpb, orig_eids, edge_feats, use_graphbolt, is_homo - ) + _verify_shuffled_labels_gb( + g, + shuffled_labels, + shuffled_edata, + orig_nids, + orig_eids, + test_ntype, + test_etype, + ) def check_hetero_partition( @@ -466,7 +536,7 @@ def check_hetero_partition( assert len(orig_eids1) == len(orig_eids2) assert np.all(F.asnumpy(orig_eids1) == F.asnumpy(orig_eids2)) parts.append(part_g) - verify_graph_feats( + verify_graph_feats_hetero_dgl( hg, gpb, part_g, node_feats, edge_feats, orig_nids, orig_eids ) @@ -1345,68 +1415,15 @@ def _verify_local_and_map_id_gb( store_inner_node, store_inner_edge, store_eids, - g=None, - orig_nids=None, - orig_eids=None, - node_feats=None, - edge_feats=None, - is_homo=False, ): """ check list: make sure local id are correct. make sure mapping id are correct. - make sure homo graph have correct feats. """ - local_nid = local_eid = None if store_inner_node and store_inner_edge and store_eids: - local_nid, local_eid = _verify_local_id_gb(part_g, part_id, gpb) - local_nodes, local_edges = _verify_map_gb(part_g, part_id, gpb) - if is_homo: - _verify_feat_homo_gb( - g, - local_nodes, - local_edges, - orig_nids, - orig_eids, - node_feats, - edge_feats, - local_nid, - local_eid, - ) - - -def _verify_feat_homo_gb( - g, - local_nodes, - local_edges, - orig_nids, - orig_eids, - node_feats, - edge_feats, - local_nid, - local_eid, -): - """ - check list: - make sure feats of nodes and edges and its number are correct. - """ - local_nodes = orig_nids[local_nodes] - local_edges = orig_eids[local_edges] - - for name in ["labels", "feats"]: - assert "_N/" + name in node_feats - assert node_feats["_N/" + name].shape[0] == len(local_nodes) - true_feats = F.gather_row(g.ndata[name], local_nodes) - ndata = F.gather_row(node_feats["_N/" + name], local_nid) - assert np.all(F.asnumpy(true_feats) == F.asnumpy(ndata)) - for name in ["feats"]: - efeat_name = _etype_tuple_to_str(DEFAULT_ETYPE) + "/" + name - assert efeat_name in edge_feats - assert edge_feats[efeat_name].shape[0] == len(local_edges) - true_feats = F.gather_row(g.edata[name], local_edges) - edata = F.gather_row(edge_feats[efeat_name], local_eid) - assert np.all(F.asnumpy(true_feats) == F.asnumpy(edata)) + _verify_local_id_gb(part_g, part_id, gpb) + _verify_map_gb(part_g, part_id, gpb) def _verify_orig_IDs_gb( @@ -1487,23 +1504,6 @@ def test_partition_graph_graphbolt_homo( store_inner_edge, debug_mode, ): - """ - check list: - _verify_metadata: - number of edges, nodes, partitions for all - number of edges, nodes in each partitions - order and data type of local nid and eid - - _verify_mapping: - data type, ID's order and ID's number of edges and nodes - - verify_graph_feats: - graph's feats - - _verify_reconstrunt_IDs: - check if feats and IDs can be reconstructed - - """ reset_envs() if debug_mode: os.environ["DGL_DIST_DEBUG"] = "1" @@ -1545,37 +1545,15 @@ def test_partition_graph_graphbolt_homo( store_inner_edge, store_eids, is_homo=True, - debug_mode=debug_mode, ) -def _verify_shuffled_data_gb( - g, - gpb, - orig_nids, - orig_eids, - part_sizes, - shuffled_labels, - shuffled_elabels, - test_ntype=None, - test_etype=None, -): +def _verify_constructed_id_gb(part_sizes, gpb): """ + verify the part id of each node by constructed nids. check list: - make sure labels and feats are correct. - make sure nodes and edges' id are correct. + make sure each node' part id and its type are corect """ - # Verify shuffled node/edge data for original IDs. - _verify_shuffled_labels_gb( - g, - shuffled_labels, - shuffled_elabels, - orig_nids, - orig_eids, - test_ntype, - test_etype, - ) - node_map = [] edge_map = [] for part_i, (num_nodes, num_edges) in enumerate(part_sizes): @@ -1602,13 +1580,13 @@ def _verify_shuffled_labels_gb( ): """ check list: - make sure node labels are correct. - make sure edge labels are correct. + make sure node data are correct. + make sure edge data are correct. """ shuffled_labels = F.asnumpy(F.cat(shuffled_labels, 0)) shuffled_edata = F.asnumpy(F.cat(shuffled_edata, 0)) orig_labels = np.zeros(shuffled_labels.shape, dtype=shuffled_labels.dtype) - orig_elabels = np.zeros(shuffled_edata.shape, dtype=shuffled_edata.dtype) + orig_edata = np.zeros(shuffled_edata.shape, dtype=shuffled_edata.dtype) orig_nid = orig_nids if test_ntype is None else orig_nids[test_ntype] orig_eid = orig_eids if test_etype is None else orig_eids[test_etype] @@ -1624,9 +1602,9 @@ def _verify_shuffled_labels_gb( ) orig_labels[F.asnumpy(orig_nid)] = shuffled_labels - orig_elabels[F.asnumpy(orig_eid)] = shuffled_edata + orig_edata[F.asnumpy(orig_eid)] = shuffled_edata assert np.all(orig_labels == F.asnumpy(nlabel)) - assert np.all(orig_elabels == F.asnumpy(edata)) + assert np.all(orig_edata == F.asnumpy(edata)) def _verify_node_type_ID_gb(part_g, gpb): @@ -1655,6 +1633,82 @@ def _verify_node_type_ID_gb(part_g, gpb): ) +def _verify_IDs_gb( + g, + part_g, + part_id, + gpb, + part_sizes, + orig_nids, + orig_eids, + store_inner_node, + store_inner_edge, + store_eids, + is_homo, +): + # verify local id and mapping id + _verify_local_and_map_id_gb( + part_g, + part_id, + gpb, + store_inner_node, + store_inner_edge, + store_eids, + ) + + # Verify the mapping between the reshuffled IDs and the original IDs. + ( + part_src_ids, + part_dst_ids, + src_ntype_ids, + part_src_ids, + dst_ntype_ids, + ) = _verify_node_type_ID_gb(part_g, gpb) + + if store_eids: + _verify_orig_IDs_gb( + part_g, + gpb, + g, + part_src_ids=part_src_ids, + part_dst_ids=part_dst_ids, + src_ntype_ids=src_ntype_ids, + dst_ntype_ids=dst_ntype_ids, + orig_nids=orig_nids, + orig_eids=orig_eids, + is_homo=is_homo, + ) + _verify_constructed_id_gb(part_sizes, gpb) + + +def _collect_data_gb( + parts, + part_g, + gpbs, + gpb, + tot_node_feats, + node_feats, + tot_edge_feats, + edge_feats, + shuffled_labels, + shuffled_edata, + test_ntype, + test_etype, +): + if test_ntype != None: + shuffled_labels.append(node_feats[test_ntype + "/labels"]) + shuffled_edata.append( + edge_feats[_etype_tuple_to_str(test_etype) + "/labels"] + ) + else: + shuffled_labels.append(node_feats["_N/labels"]) + shuffled_edata.append(edge_feats["_N:_E:_N/feats"]) + parts.append(part_g) + gpbs.append(gpb) + tot_node_feats.append(node_feats) + tot_edge_feats.append(edge_feats) + + def _verify_graphbolt_part( g, test_dir, @@ -1668,109 +1722,94 @@ def _verify_graphbolt_part( test_ntype=None, test_etype=None, is_homo=False, - debug_mode=False, ): """ check list: - _verify_graphbolt_metadata: + _verify_metadata_gb: data type, ID's order and ID's number of edges and nodes - - _verify_graphbolt_local_and_map_id: - local id, mapping id - _verify_graphbolt_node_type_ID: - node type id - _verify_graphbolt_orig_IDs: - orig edge, hetero ntype id - verify_graph_feats: + _verify_IDs_gb: + local id, mapping id,node type id, orig edge, hetero ntype id + verify_graph_feats_gb: nodes and edges' feats - _verify_argument_for_graphbolt: + _verify_graphbolt_attributes: arguments - _verify_graphbolt_shuffled_data: - id after reshuffle """ parts = [] + tot_node_feats = [] + tot_edge_feats = [] shuffled_labels = [] shuffled_edata = [] part_sizes = [] + gpbs = [] part_config = os.path.join(test_dir, f"{graph_name}.json") # test each part for part_id in range(num_parts): part_g, node_feats, edge_feats, gpb, _, _, _ = load_partition( part_config, part_id, load_feats=True, use_graphbolt=True ) - # verify_metadata - _verify_metadata_gb(gpb, g, num_parts, part_id, part_sizes) - _verify_local_and_map_id_gb( + # verify metadata + _verify_metadata_gb( + gpb, + g, + num_parts, + part_id, + part_sizes, + ) + + # verify eid and nid + _verify_IDs_gb( + g, part_g, part_id, gpb, + part_sizes, + orig_nids, + orig_eids, store_inner_node, store_inner_edge, store_eids, - is_homo=False, + is_homo, ) - # Verify the mapping between the reshuffled IDs and the original IDs. - ( - part_src_ids, - part_dst_ids, - src_ntype_ids, - part_src_ids, - dst_ntype_ids, - ) = _verify_node_type_ID_gb(part_g, gpb) - - if store_eids: - _verify_orig_IDs_gb( - part_g, - gpb, - g, - part_src_ids=part_src_ids, - part_dst_ids=part_dst_ids, - src_ntype_ids=src_ntype_ids, - dst_ntype_ids=dst_ntype_ids, - orig_nids=orig_nids, - orig_eids=orig_eids, - is_homo=is_homo, - ) - if test_ntype != None: - shuffled_labels.append(node_feats[test_ntype + "/labels"]) - shuffled_edata.append( - edge_feats[_etype_tuple_to_str(test_etype) + "/labels"] - ) - else: - shuffled_labels.append(node_feats["_N/labels"]) - shuffled_edata.append(edge_feats["_N:_E:_N/feats"]) - parts.append(part_g) - - verify_graph_feats( - g, - gpb, + # collect shuffled data and parts + _collect_data_gb( + parts, part_g, + gpbs, + gpb, + tot_node_feats, node_feats, + tot_edge_feats, edge_feats, - orig_nids, - orig_eids, - store_eids, - store_inner_edge=store_inner_edge, - store_inner_node=store_inner_node, - use_graphbolt=True, - is_homo=is_homo, + shuffled_labels, + shuffled_edata, + test_ntype, + test_etype, ) - _verify_argument_for_graphbolt( - parts, store_inner_node, store_inner_edge, store_eids - ) - _verify_shuffled_data_gb( + # verify graph feats + verify_graph_feats_gb( g, - gpb, + gpbs, + parts, + tot_node_feats, + tot_edge_feats, orig_nids, orig_eids, - part_sizes, - shuffled_labels, - shuffled_edata, - test_ntype, - test_etype, + shuffled_labels=shuffled_labels, + shuffled_edata=shuffled_edata, + test_ntype=test_ntype, + test_etype=test_etype, + store_inner_node=store_inner_node, + store_inner_edge=store_inner_edge, + store_eids=store_eids, + is_homo=is_homo, + ) + + _verify_graphbolt_attributes( + parts, store_inner_node, store_inner_edge, store_eids ) + return parts @@ -1805,21 +1844,6 @@ def test_partition_graph_graphbolt_hetero( debug_mode, n_jobs=1, ): - """ - check list: - _vertify_original_IDs: - number of edges and nodes' type and number of them in each type - - _verify_graphbolt_mapping_IDs: - mapping node and edge IDs - feats in graph - - _verify_hetero_graph: - number, order of elements in hetero graph - - _verify_labels: - labels of nodes and edges - """ test_ntype = "n1" test_etype = ("n1", "r1", "n2") reset_envs() @@ -1827,7 +1851,6 @@ def test_partition_graph_graphbolt_hetero( os.environ["DGL_DIST_DEBUG"] = "1" with tempfile.TemporaryDirectory() as test_dir: hg = create_random_hetero() - # TODO create graph data graph_name = "test" hg.nodes[test_ntype].data["labels"] = F.arange( 0, hg.num_nodes(test_ntype) @@ -1878,7 +1901,6 @@ def test_partition_graph_graphbolt_hetero( _verify_hetero_graph( hg, parts, - True, store_eids=store_eids, store_inner_edge=store_inner_edge, debug_mode=debug_mode, From ef6693a24ff9865e2b9866d89b8a09fa98cc3c1f Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Fri, 6 Sep 2024 08:08:22 +0000 Subject: [PATCH 26/39] change partition.py --- python/dgl/distributed/partition.py | 473 ++++++++++++---------------- 1 file changed, 206 insertions(+), 267 deletions(-) diff --git a/python/dgl/distributed/partition.py b/python/dgl/distributed/partition.py index f0910cd89d03..319f99945b3d 100644 --- a/python/dgl/distributed/partition.py +++ b/python/dgl/distributed/partition.py @@ -109,10 +109,7 @@ def _process_partitions(g_list, formats=None, sort_etypes=False): return g_list -def _save_dgl_graphs(filename, g_list, formats=None, sort_etypes=False): - g_list = _process_partitions( - g_list, formats=formats, sort_etypes=sort_etypes - ) +def _save_dgl_graphs(filename, g_list, formats=None): save_graphs(filename, g_list, formats=formats) @@ -469,106 +466,7 @@ def load_partition_feats( return node_feats, edge_feats -def _load_partition_book_from_metadata(part_metadata, part_id): - assert "num_parts" in part_metadata, "num_parts does not exist." - assert ( - part_metadata["num_parts"] > part_id - ), "part {} is out of range (#parts: {})".format( - part_id, part_metadata["num_parts"] - ) - num_parts = part_metadata["num_parts"] - assert ( - "num_nodes" in part_metadata - ), "cannot get the number of nodes of the global graph." - assert ( - "num_edges" in part_metadata - ), "cannot get the number of edges of the global graph." - assert "node_map" in part_metadata, "cannot get the node map." - assert "edge_map" in part_metadata, "cannot get the edge map." - assert "graph_name" in part_metadata, "cannot get the graph name" - - # If this is a range partitioning, node_map actually stores a list, whose elements - # indicate the boundary of range partitioning. Otherwise, node_map stores a filename - # that contains node map in a NumPy array. - node_map = part_metadata["node_map"] - edge_map = part_metadata["edge_map"] - if isinstance(node_map, dict): - for key in node_map: - is_range_part = isinstance(node_map[key], list) - break - elif isinstance(node_map, list): - is_range_part = True - node_map = {DEFAULT_NTYPE: node_map} - else: - is_range_part = False - if isinstance(edge_map, list): - edge_map = {DEFAULT_ETYPE: edge_map} - - ntypes = {DEFAULT_NTYPE: 0} - etypes = {DEFAULT_ETYPE: 0} - if "ntypes" in part_metadata: - ntypes = part_metadata["ntypes"] - if "etypes" in part_metadata: - etypes = part_metadata["etypes"] - - if isinstance(node_map, dict): - for key in node_map: - assert key in ntypes, "The node type {} is invalid".format(key) - if isinstance(edge_map, dict): - for key in edge_map: - assert key in etypes, "The edge type {} is invalid".format(key) - - if not is_range_part: - raise TypeError("Only RangePartitionBook is supported currently.") - - node_map = _get_part_ranges(node_map) - edge_map = _get_part_ranges(edge_map) - - # Format dtype of node/edge map if dtype is specified. - def _format_node_edge_map(part_metadata, map_type, data): - key = f"{map_type}_map_dtype" - if key not in part_metadata: - return data - dtype = part_metadata[key] - assert dtype in ["int32", "int64"], ( - f"The {map_type} map dtype should be either int32 or int64, " - f"but got {dtype}." - ) - for key in data: - data[key] = data[key].astype(dtype) - return data - - node_map = _format_node_edge_map(part_metadata, "node", node_map) - edge_map = _format_node_edge_map(part_metadata, "edge", edge_map) - - # Sort the node/edge maps by the node/edge type ID. - node_map = dict(sorted(node_map.items(), key=lambda x: ntypes[x[0]])) - edge_map = dict(sorted(edge_map.items(), key=lambda x: etypes[x[0]])) - - def _assert_is_sorted(id_map): - id_ranges = np.array(list(id_map.values())) - ids = [] - for i in range(num_parts): - ids.append(id_ranges[:, i, :]) - ids = np.array(ids).flatten() - assert np.all( - ids[:-1] <= ids[1:] - ), f"The node/edge map is not sorted: {ids}" - - _assert_is_sorted(node_map) - _assert_is_sorted(edge_map) - - return ( - RangePartitionBook( - part_id, num_parts, node_map, edge_map, ntypes, etypes - ), - part_metadata["graph_name"], - ntypes, - etypes, - ) - - -def load_partition_book(part_config, part_id): +def load_partition_book(part_config, part_id, part_metadata=None): """Load a graph partition book from the partition config file. Parameters @@ -589,7 +487,8 @@ def load_partition_book(part_config, part_id): dict The edge types """ - part_metadata = _load_part_config(part_config) + if part_metadata == None: + part_metadata = _load_part_config(part_config) assert "num_parts" in part_metadata, "num_parts does not exist." assert ( part_metadata["num_parts"] > part_id @@ -773,6 +672,38 @@ def _set_trainer_ids(g, sim_g, node_parts): g.edges[c_etype].data["trainer_id"] = trainer_id +def _partition_to_graphbolt( + parts, + part_i, + part_config, + part_metadata, + *, + store_eids=True, + store_inner_node=False, + store_inner_edge=False, + graph_formats=None, +): + gpb, _, ntypes, etypes = load_partition_book( + part_config, part_i, part_metadata + ) + graph = parts[part_i] + csc_graph = gb_convert_single_dgl_partition( + ntypes=ntypes, + etypes=etypes, + gpb=gpb, + part_meta=part_metadata, + graph=graph, + store_eids=store_eids, + store_inner_edge=store_inner_edge, + store_inner_node=store_inner_node, + graph_formats=graph_formats, + ) + rel_path_result = _save_graph_gb( + part_config, part_i, csc_graph, part_metadata + ) + part_metadata[f"part-{part_i}"]["part_graph_graphbolt"] = rel_path_result + + def partition_graph( g, graph_name, @@ -1383,9 +1314,9 @@ def get_homogeneous(g, balance_ntypes): for name in g.edges[etype].data: if name in [EID, "inner_edge"]: continue - edge_feats[ - _etype_tuple_to_str(etype) + "/" + name - ] = F.gather_row(g.edges[etype].data[name], local_edges) + edge_feats[_etype_tuple_to_str(etype) + "/" + name] = ( + F.gather_row(g.edges[etype].data[name], local_edges) + ) else: for ntype in g.ntypes: if len(g.ntypes) > 1: @@ -1420,9 +1351,9 @@ def get_homogeneous(g, balance_ntypes): for name in g.edges[etype].data: if name in [EID, "inner_edge"]: continue - edge_feats[ - _etype_tuple_to_str(etype) + "/" + name - ] = F.gather_row(g.edges[etype].data[name], local_edges) + edge_feats[_etype_tuple_to_str(etype) + "/" + name] = ( + F.gather_row(g.edges[etype].data[name], local_edges) + ) # delete `orig_id` from ndata/edata del part.ndata["orig_id"] del part.edata["orig_id"] @@ -1440,34 +1371,8 @@ def get_homogeneous(g, balance_ntypes): "edge_feats": os.path.relpath(edge_feat_file, out_path), } sort_etypes = len(g.etypes) > 1 + part = _process_partitions([part], graph_formats, sort_etypes)[0] if use_graphbolt: - - def _partition_to_graphbolt( - part_config, - parts, - part_i, - part_metadata, - *, - store_eids=True, - store_inner_node=False, - store_inner_edge=False, - graph_formats=None, - ): - rel_path_result = gb_convert_single_dgl_partition( - part_i, - parts, - part_metadata, - part_config=part_config, - store_eids=store_eids, - store_inner_edge=store_inner_edge, - store_inner_node=store_inner_node, - graph_formats=graph_formats, - ) - part_metadata[f"part-{part_i}"][ - "part_graph_graphbolt" - ] = rel_path_result - - part = _process_partitions([part], graph_formats, sort_etypes)[0] # save FusedCSCSamplingGraph kwargs["graph_formats"] = graph_formats kwargs.pop("n_jobs", None) @@ -1480,15 +1385,14 @@ def _partition_to_graphbolt( ) else: part_graph_file = os.path.join(part_dir, "graph.dgl") - part_metadata["part-{}".format(part_id)][ - "part_graph" - ] = os.path.relpath(part_graph_file, out_path) + part_metadata["part-{}".format(part_id)]["part_graph"] = ( + os.path.relpath(part_graph_file, out_path) + ) # save DGLGraph _save_dgl_graphs( part_graph_file, [part], formats=graph_formats, - sort_etypes=sort_etypes, ) _dump_part_config(part_config, part_metadata) @@ -1554,7 +1458,7 @@ def init_type_per_edge(graph, gpb): return etype_ids -def _load_parts(part_config, part_id, parts): +def _load_part(part_config, part_id, parts=None): """load parts from variable or dist.""" if parts is None: graph, _, _, _, _, _, _ = load_partition( @@ -1565,15 +1469,116 @@ def _load_parts(part_config, part_id, parts): return graph -def gb_convert_single_dgl_partition( - part_id, - parts, +def _save_graph_gb(part_config, part_id, csc_graph, part_meta): + orig_feats_path = os.path.join( + os.path.dirname(part_config), + f"part{part_id}", + ) + csc_graph_path = os.path.join( + orig_feats_path, "fused_csc_sampling_graph.pt" + ) + torch.save(csc_graph, csc_graph_path) + + return os.path.relpath(csc_graph_path, os.path.dirname(part_config)) + # Update graph path. + + +def cast_various_to_minimum_dtype_gb( + graph, part_meta, + num_parts, + indptr, + indices, + type_per_edge, + etypes, + ntypes, + node_attributes, + edge_attributes, +): + # Cast various data to minimum dtype. + # Cast 1: indptr. + indptr = _cast_to_minimum_dtype(graph.num_edges(), indptr) + # Cast 2: indices. + indices = _cast_to_minimum_dtype(graph.num_nodes(), indices) + # Cast 3: type_per_edge. + type_per_edge = _cast_to_minimum_dtype( + len(etypes), type_per_edge, field=ETYPE + ) + # Cast 4: node/edge_attributes. + predicates = { + NID: part_meta["num_nodes"], + "part_id": num_parts, + NTYPE: len(ntypes), + EID: part_meta["num_edges"], + ETYPE: len(etypes), + DGL2GB_EID: part_meta["num_edges"], + GB_DST_ID: part_meta["num_nodes"], + } + for attributes in [node_attributes, edge_attributes]: + for key in attributes: + if key not in predicates: + continue + attributes[key] = _cast_to_minimum_dtype( + predicates[key], attributes[key], field=key + ) + + +def _create_attributes_gb( + graph, + gpb, + edge_ids, + is_homo, + store_inner_node, + store_inner_edge, + store_eids, + debug_mode, +): + # Save node attributes. Detailed attributes are shown below. + # DGL_GB\Attributes dgl.NID("_ID") dgl.NTYPE("_TYPE") "inner_node" "part_id" + # DGL_Homograph ✅ 🚫 ✅ ✅ + # GB_Homograph ✅ 🚫 optional 🚫 + # DGL_Heterograph ✅ ✅ ✅ ✅ + # GB_Heterograph ✅ 🚫 optional 🚫 + required_node_attrs = [NID] + if store_inner_node: + required_node_attrs.append("inner_node") + if debug_mode: + required_node_attrs = list(graph.ndata.keys()) + node_attributes = {attr: graph.ndata[attr] for attr in required_node_attrs} + + # Save edge attributes. Detailed attributes are shown below. + # DGL_GB\Attributes dgl.EID("_ID") dgl.ETYPE("_TYPE") "inner_edge" + # DGL_Homograph ✅ 🚫 ✅ + # GB_Homograph optional 🚫 optional + # DGL_Heterograph ✅ ✅ ✅ + # GB_Heterograph optional ✅ optional + type_per_edge = None + if not is_homo: + type_per_edge = init_type_per_edge(graph, gpb)[edge_ids] + type_per_edge = type_per_edge.to(RESERVED_FIELD_DTYPE[ETYPE]) + required_edge_attrs = [] + if store_eids: + required_edge_attrs.append(EID) + if store_inner_edge: + required_edge_attrs.append("inner_edge") + if debug_mode: + required_edge_attrs = list(graph.edata.keys()) + edge_attributes = { + attr: graph.edata[attr][edge_ids] for attr in required_edge_attrs + } + return node_attributes, edge_attributes, type_per_edge + + +def gb_convert_single_dgl_partition( + ntypes, + etypes, + gpb, graph_formats, - part_config, store_eids, store_inner_node, store_inner_edge, + part_meta, + graph, ): """Converts a single DGL partition to GraphBolt. @@ -1594,6 +1599,12 @@ def gb_convert_single_dgl_partition( Whether to store inner node mask in the new graph. Default: False. store_inner_edge : bool, optional Whether to store inner edge mask in the new graph. Default: False. + part_meta : dict + contain the meta data of the partition. + parts : list[DGLGraph] + the unit of graphs to be converted to graphbolt graph. + parts : list[DGLGraph] + the graph to be converted to graphbolt graph. """ debug_mode = "DGL_DIST_DEBUG" in os.environ if debug_mode: @@ -1601,18 +1612,8 @@ def gb_convert_single_dgl_partition( "Running in debug mode which means all attributes of DGL partitions" " will be saved to the new format." ) - if part_meta is None: - part_meta = _load_part_config(part_config) num_parts = part_meta["num_parts"] - graph = _load_parts(part_config, part_id, parts) - - gpb, _, ntypes, etypes = ( - load_partition_book(part_config, part_id) - if part_meta is None - else _load_partition_book_from_metadata(part_meta, part_id) - ) - is_homo = is_homogeneous(ntypes, etypes) node_type_to_id = ( None if is_homo else {ntype: ntid for ntid, ntype in enumerate(ntypes)} @@ -1627,39 +1628,16 @@ def gb_convert_single_dgl_partition( # Obtain CSC indtpr and indices. indptr, indices, edge_ids = graph.adj_tensors("csc") - # Save node attributes. Detailed attributes are shown below. - # DGL_GB\Attributes dgl.NID("_ID") dgl.NTYPE("_TYPE") "inner_node" "part_id" - # DGL_Homograph ✅ 🚫 ✅ ✅ - # GB_Homograph ✅ 🚫 optional 🚫 - # DGL_Heterograph ✅ ✅ ✅ ✅ - # GB_Heterograph ✅ 🚫 optional 🚫 - required_node_attrs = [NID] - if store_inner_node: - required_node_attrs.append("inner_node") - if debug_mode: - required_node_attrs = list(graph.ndata.keys()) - node_attributes = {attr: graph.ndata[attr] for attr in required_node_attrs} - - # Save edge attributes. Detailed attributes are shown below. - # DGL_GB\Attributes dgl.EID("_ID") dgl.ETYPE("_TYPE") "inner_edge" - # DGL_Homograph ✅ 🚫 ✅ - # GB_Homograph optional 🚫 optional - # DGL_Heterograph ✅ ✅ ✅ - # GB_Heterograph optional ✅ optional - type_per_edge = None - if not is_homo: - type_per_edge = init_type_per_edge(graph, gpb)[edge_ids] - type_per_edge = type_per_edge.to(RESERVED_FIELD_DTYPE[ETYPE]) - required_edge_attrs = [] - if store_eids: - required_edge_attrs.append(EID) - if store_inner_edge: - required_edge_attrs.append("inner_edge") - if debug_mode: - required_edge_attrs = list(graph.edata.keys()) - edge_attributes = { - attr: graph.edata[attr][edge_ids] for attr in required_edge_attrs - } + node_attributes, edge_attributes, type_per_edge = _create_attributes_gb( + graph, + gpb, + edge_ids, + is_homo, + store_inner_node, + store_inner_edge, + store_eids, + debug_mode, + ) # When converting DGLGraph to FusedCSCSamplingGraph, edge IDs are # re-ordered(actually FusedCSCSamplingGraph does not have edge IDs # in nature). So we need to save such re-order info for any @@ -1681,32 +1659,18 @@ def gb_convert_single_dgl_partition( indptr, dtype=indices.dtype ) - # Cast various data to minimum dtype. - # Cast 1: indptr. - indptr = _cast_to_minimum_dtype(graph.num_edges(), indptr) - # Cast 2: indices. - indices = _cast_to_minimum_dtype(graph.num_nodes(), indices) - # Cast 3: type_per_edge. - type_per_edge = _cast_to_minimum_dtype( - len(etypes), type_per_edge, field=ETYPE + cast_various_to_minimum_dtype_gb( + graph, + part_meta, + num_parts, + indptr, + indices, + type_per_edge, + etypes, + ntypes, + node_attributes, + edge_attributes, ) - # Cast 4: node/edge_attributes. - predicates = { - NID: part_meta["num_nodes"], - "part_id": num_parts, - NTYPE: len(ntypes), - EID: part_meta["num_edges"], - ETYPE: len(etypes), - DGL2GB_EID: part_meta["num_edges"], - GB_DST_ID: part_meta["num_nodes"], - } - for attributes in [node_attributes, edge_attributes]: - for key in attributes: - if key not in predicates: - continue - attributes[key] = _cast_to_minimum_dtype( - predicates[key], attributes[key], field=key - ) csc_graph = gb.fused_csc_sampling_graph( indptr, @@ -1718,17 +1682,7 @@ def gb_convert_single_dgl_partition( node_type_to_id=node_type_to_id, edge_type_to_id=edge_type_to_id, ) - orig_feats_path = os.path.join( - os.path.dirname(part_config), - part_meta[f"part-{part_id}"]["node_feats"], - ) - csc_graph_path = os.path.join( - os.path.dirname(orig_feats_path), "fused_csc_sampling_graph.pt" - ) - torch.save(csc_graph, csc_graph_path) - - return os.path.relpath(csc_graph_path, os.path.dirname(part_config)) - # Update graph path. + return csc_graph def _convert_partition_to_graphbolt( @@ -1740,7 +1694,6 @@ def _convert_partition_to_graphbolt( store_inner_edge, n_jobs, num_parts, - parts=None, ): # [Rui] DGL partitions are always saved as homogeneous graphs even though # the original graph is heterogeneous. But heterogeneous information like @@ -1752,12 +1705,12 @@ def _convert_partition_to_graphbolt( # We can simply pass None to it. # Iterate over partitions. + if part_meta is None: + part_meta = _load_part_config(part_config) convert_with_format = partial( gb_convert_single_dgl_partition, - parts=parts, part_meta=part_meta, graph_formats=graph_formats, - part_config=part_config, store_eids=store_eids, store_inner_node=store_inner_node, store_inner_edge=store_inner_edge, @@ -1772,16 +1725,35 @@ def _convert_partition_to_graphbolt( max_workers=min(num_parts, n_jobs), mp_context=mp_ctx, ) as executor: - futures = [] for part_id in range(num_parts): - futures.append(executor.submit(convert_with_format, part_id)) + gpb, _, ntypes, etypes = load_partition_book( + part_config, part_id + ) + part = _load_part(part_config, part_id) + csc_graph = executor.submit( + convert_with_format, + graph=part, + ntypes=ntypes, + etypes=etypes, + gpb=gpb, + ).result() + rel_path = _save_graph_gb( + part_config, part_id, csc_graph, part_meta + ) + rel_path_results.append(rel_path) - for part_id in range(num_parts): - rel_path_results.append(futures[part_id].result()) else: # If running single-threaded, avoid spawning new interpreter, which is slow for part_id in range(num_parts): - rel_path_results.append(convert_with_format(part_id)) + gpb, _, ntypes, etypes = load_partition_book(part_config, part_id) + part = _load_part(part_config, part_id) + csc_graph = convert_with_format( + graph=part, ntypes=ntypes, etypes=etypes, gpb=gpb + ) + rel_path = _save_graph_gb( + part_config, part_id, csc_graph, part_meta + ) + rel_path_results.append(rel_path) for part_id in range(num_parts): # Update graph path. @@ -1798,39 +1770,6 @@ def _convert_partition_to_graphbolt( return part_meta -def _dgl_partition_to_graphbolt( - part_config, - part_meta, - parts, - *, - store_eids=True, - store_inner_node=False, - store_inner_edge=False, - graph_formats=None, - n_jobs=1, -): - debug_mode = "DGL_DIST_DEBUG" in os.environ - if debug_mode: - dgl_warning( - "Running in debug mode which means all attributes of DGL partitions" - " will be saved to the new format." - ) - new_part_meta = copy.deepcopy(part_meta) - num_parts = part_meta["num_parts"] - part_meta = _convert_partition_to_graphbolt( - new_part_meta, - graph_formats, - part_config, - store_eids, - store_inner_node, - store_inner_edge, - n_jobs, - num_parts, - parts=parts, - ) - return part_meta - - def dgl_partition_to_graphbolt( part_config, *, From e80e3534e18604679315fe6128dd2e25875527cd Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Fri, 6 Sep 2024 08:32:51 +0000 Subject: [PATCH 27/39] partition.py --- python/dgl/distributed/partition.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/python/dgl/distributed/partition.py b/python/dgl/distributed/partition.py index 0051d57bba88..0b8f48383a90 100644 --- a/python/dgl/distributed/partition.py +++ b/python/dgl/distributed/partition.py @@ -487,7 +487,7 @@ def load_partition_book(part_config, part_id, part_metadata=None): dict The edge types """ - if part_metadata == None: + if part_metadata is None: part_metadata = _load_part_config(part_config) assert "num_parts" in part_metadata, "num_parts does not exist." assert ( @@ -699,7 +699,7 @@ def _partition_to_graphbolt( graph_formats=graph_formats, ) rel_path_result = _save_graph_gb( - part_config, part_i, csc_graph, part_metadata + part_config, part_i, csc_graph ) part_metadata[f"part-{part_i}"]["part_graph_graphbolt"] = rel_path_result @@ -1469,7 +1469,7 @@ def _load_part(part_config, part_id, parts=None): return graph -def _save_graph_gb(part_config, part_id, csc_graph, part_meta): +def _save_graph_gb(part_config, part_id, csc_graph): orig_feats_path = os.path.join( os.path.dirname(part_config), f"part{part_id}", @@ -1495,7 +1495,7 @@ def cast_various_to_minimum_dtype_gb( node_attributes, edge_attributes, ): - # Cast various data to minimum dtype. + """Cast various data to minimum dtype.""" # Cast 1: indptr. indptr = _cast_to_minimum_dtype(graph.num_edges(), indptr) # Cast 2: indices. @@ -1521,6 +1521,7 @@ def cast_various_to_minimum_dtype_gb( attributes[key] = _cast_to_minimum_dtype( predicates[key], attributes[key], field=key ) + return indptr, indices, type_per_edge def _create_attributes_gb( @@ -1659,7 +1660,7 @@ def gb_convert_single_dgl_partition( indptr, dtype=indices.dtype ) - cast_various_to_minimum_dtype_gb( + indptr, indices, type_per_edge=cast_various_to_minimum_dtype_gb( graph, part_meta, num_parts, @@ -1738,7 +1739,7 @@ def _convert_partition_to_graphbolt( gpb=gpb, ).result() rel_path = _save_graph_gb( - part_config, part_id, csc_graph, part_meta + part_config, part_id, csc_graph ) rel_path_results.append(rel_path) @@ -1751,7 +1752,7 @@ def _convert_partition_to_graphbolt( graph=part, ntypes=ntypes, etypes=etypes, gpb=gpb ) rel_path = _save_graph_gb( - part_config, part_id, csc_graph, part_meta + part_config, part_id, csc_graph ) rel_path_results.append(rel_path) From d7173b398be707680ca1753e6f28b9c8bce38974 Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Fri, 6 Sep 2024 08:36:15 +0000 Subject: [PATCH 28/39] partition.py --- python/dgl/distributed/partition.py | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/python/dgl/distributed/partition.py b/python/dgl/distributed/partition.py index 0b8f48383a90..fcfd96688e12 100644 --- a/python/dgl/distributed/partition.py +++ b/python/dgl/distributed/partition.py @@ -698,9 +698,7 @@ def _partition_to_graphbolt( store_inner_node=store_inner_node, graph_formats=graph_formats, ) - rel_path_result = _save_graph_gb( - part_config, part_i, csc_graph - ) + rel_path_result = _save_graph_gb(part_config, part_i, csc_graph) part_metadata[f"part-{part_i}"]["part_graph_graphbolt"] = rel_path_result @@ -1660,7 +1658,7 @@ def gb_convert_single_dgl_partition( indptr, dtype=indices.dtype ) - indptr, indices, type_per_edge=cast_various_to_minimum_dtype_gb( + indptr, indices, type_per_edge = cast_various_to_minimum_dtype_gb( graph, part_meta, num_parts, @@ -1738,9 +1736,7 @@ def _convert_partition_to_graphbolt( etypes=etypes, gpb=gpb, ).result() - rel_path = _save_graph_gb( - part_config, part_id, csc_graph - ) + rel_path = _save_graph_gb(part_config, part_id, csc_graph) rel_path_results.append(rel_path) else: @@ -1751,9 +1747,7 @@ def _convert_partition_to_graphbolt( csc_graph = convert_with_format( graph=part, ntypes=ntypes, etypes=etypes, gpb=gpb ) - rel_path = _save_graph_gb( - part_config, part_id, csc_graph - ) + rel_path = _save_graph_gb(part_config, part_id, csc_graph) rel_path_results.append(rel_path) for part_id in range(num_parts): From b91ef146334b3f89fc8be8f02eb08bbbdb2a935c Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Fri, 6 Sep 2024 09:26:22 +0000 Subject: [PATCH 29/39] chaneg doc --- python/dgl/distributed/partition.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/python/dgl/distributed/partition.py b/python/dgl/distributed/partition.py index fcfd96688e12..f5f3162bbd17 100644 --- a/python/dgl/distributed/partition.py +++ b/python/dgl/distributed/partition.py @@ -1583,8 +1583,12 @@ def gb_convert_single_dgl_partition( Parameters ---------- - part_id : int - The numerical ID of the partition to convert. + node types : dict + The node types + edge types : dict + The edge types + gpb : GraphPartitionBook + The global partition information. graph_formats : str or list[str], optional Save partitions in specified formats. It could be any combination of `coo`, `csc`. As `csc` format is mandatory for `FusedCSCSamplingGraph`, @@ -1599,11 +1603,9 @@ def gb_convert_single_dgl_partition( store_inner_edge : bool, optional Whether to store inner edge mask in the new graph. Default: False. part_meta : dict - contain the meta data of the partition. - parts : list[DGLGraph] - the unit of graphs to be converted to graphbolt graph. - parts : list[DGLGraph] - the graph to be converted to graphbolt graph. + Contain the meta data of the partition. + graph : DGLGraph + The graph to be converted to graphbolt graph. """ debug_mode = "DGL_DIST_DEBUG" in os.environ if debug_mode: From b2907b48f2a0d931c2eb00781a77950068e4f30c Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Mon, 9 Sep 2024 05:39:45 +0000 Subject: [PATCH 30/39] change partition --- python/dgl/distributed/partition.py | 127 +++++++++++++++------------- 1 file changed, 68 insertions(+), 59 deletions(-) diff --git a/python/dgl/distributed/partition.py b/python/dgl/distributed/partition.py index f5f3162bbd17..3830d4f92bd3 100644 --- a/python/dgl/distributed/partition.py +++ b/python/dgl/distributed/partition.py @@ -88,25 +88,23 @@ def _dump_part_config(part_config, part_metadata): json.dump(part_metadata, outfile, sort_keys=False, indent=4) -def _process_partitions(g_list, formats=None, sort_etypes=False): +def _process_partitions(g, formats=None, sort_etypes=False): """Preprocess partitions before saving: 1. format data types. 2. sort csc/csr by tag. """ - for g in g_list: - for k, dtype in RESERVED_FIELD_DTYPE.items(): - if k in g.ndata: - g.ndata[k] = F.astype(g.ndata[k], dtype) - if k in g.edata: - g.edata[k] = F.astype(g.edata[k], dtype) - for g in g_list: - if (not sort_etypes) or (formats is None): - continue + for k, dtype in RESERVED_FIELD_DTYPE.items(): + if k in g.ndata: + g.ndata[k] = F.astype(g.ndata[k], dtype) + if k in g.edata: + g.edata[k] = F.astype(g.edata[k], dtype) + + if (sort_etypes) and (formats is not None): if "csr" in formats: g = sort_csr_by_tag(g, tag=g.edata[ETYPE], tag_type="edge") if "csc" in formats: g = sort_csc_by_tag(g, tag=g.edata[ETYPE], tag_type="edge") - return g_list + return g def _save_dgl_graphs(filename, g_list, formats=None): @@ -475,6 +473,8 @@ def load_partition_book(part_config, part_id, part_metadata=None): The path of the partition config file. part_id : int The partition ID. + part_metadata : dict + The meta data of partition. Returns ------- @@ -684,7 +684,7 @@ def _partition_to_graphbolt( graph_formats=None, ): gpb, _, ntypes, etypes = load_partition_book( - part_config, part_i, part_metadata + part_config=part_config, part_id=part_i, part_metadata=part_metadata ) graph = parts[part_i] csc_graph = gb_convert_single_dgl_partition( @@ -698,7 +698,9 @@ def _partition_to_graphbolt( store_inner_node=store_inner_node, graph_formats=graph_formats, ) - rel_path_result = _save_graph_gb(part_config, part_i, csc_graph) + rel_path_result = _save_graph_gb( + part_config=part_config, part_id=part_i, csc_graph=csc_graph + ) part_metadata[f"part-{part_i}"]["part_graph_graphbolt"] = rel_path_result @@ -1312,9 +1314,9 @@ def get_homogeneous(g, balance_ntypes): for name in g.edges[etype].data: if name in [EID, "inner_edge"]: continue - edge_feats[ - _etype_tuple_to_str(etype) + "/" + name - ] = F.gather_row(g.edges[etype].data[name], local_edges) + edge_feats[_etype_tuple_to_str(etype) + "/" + name] = ( + F.gather_row(g.edges[etype].data[name], local_edges) + ) else: for ntype in g.ntypes: if len(g.ntypes) > 1: @@ -1349,9 +1351,9 @@ def get_homogeneous(g, balance_ntypes): for name in g.edges[etype].data: if name in [EID, "inner_edge"]: continue - edge_feats[ - _etype_tuple_to_str(etype) + "/" + name - ] = F.gather_row(g.edges[etype].data[name], local_edges) + edge_feats[_etype_tuple_to_str(etype) + "/" + name] = ( + F.gather_row(g.edges[etype].data[name], local_edges) + ) # delete `orig_id` from ndata/edata del part.ndata["orig_id"] del part.edata["orig_id"] @@ -1369,7 +1371,7 @@ def get_homogeneous(g, balance_ntypes): "edge_feats": os.path.relpath(edge_feat_file, out_path), } sort_etypes = len(g.etypes) > 1 - part = _process_partitions([part], graph_formats, sort_etypes)[0] + part = _process_partitions(part, graph_formats, sort_etypes) if use_graphbolt: # save FusedCSCSamplingGraph kwargs["graph_formats"] = graph_formats @@ -1383,9 +1385,9 @@ def get_homogeneous(g, balance_ntypes): ) else: part_graph_file = os.path.join(part_dir, "graph.dgl") - part_metadata["part-{}".format(part_id)][ - "part_graph" - ] = os.path.relpath(part_graph_file, out_path) + part_metadata["part-{}".format(part_id)]["part_graph"] = ( + os.path.relpath(part_graph_file, out_path) + ) # save DGLGraph _save_dgl_graphs( part_graph_file, @@ -1468,17 +1470,16 @@ def _load_part(part_config, part_id, parts=None): def _save_graph_gb(part_config, part_id, csc_graph): - orig_feats_path = os.path.join( + csc_graph_save_dir = os.path.join( os.path.dirname(part_config), f"part{part_id}", ) csc_graph_path = os.path.join( - orig_feats_path, "fused_csc_sampling_graph.pt" + csc_graph_save_dir, "fused_csc_sampling_graph.pt" ) torch.save(csc_graph, csc_graph_path) return os.path.relpath(csc_graph_path, os.path.dirname(part_config)) - # Update graph path. def cast_various_to_minimum_dtype_gb( @@ -1686,8 +1687,35 @@ def gb_convert_single_dgl_partition( return csc_graph +def convert_partition_to_graphbolt_multi_process( + part_config, + part_id, + graph_formats, + store_eids, + store_inner_node, + store_inner_edge, +): + gpb, _, ntypes, etypes = load_partition_book( + part_config=part_config, part_id=part_id + ) + part = _load_part(part_config, part_id) + part_meta = copy.deepcopy(_load_part_config(part_config)) + csc_graph = gb_convert_single_dgl_partition( + graph=part, + ntypes=ntypes, + etypes=etypes, + gpb=gpb, + part_meta=part_meta, + graph_formats=graph_formats, + store_eids=store_eids, + store_inner_node=store_inner_node, + store_inner_edge=store_inner_edge, + ) + rel_path = _save_graph_gb(part_config, part_id, csc_graph) + return rel_path + + def _convert_partition_to_graphbolt( - part_meta, graph_formats, part_config, store_eids, @@ -1706,11 +1734,9 @@ def _convert_partition_to_graphbolt( # We can simply pass None to it. # Iterate over partitions. - if part_meta is None: - part_meta = _load_part_config(part_config) convert_with_format = partial( - gb_convert_single_dgl_partition, - part_meta=part_meta, + convert_partition_to_graphbolt_multi_process, + part_config=part_config, graph_formats=graph_formats, store_eids=store_eids, store_inner_node=store_inner_node, @@ -1727,31 +1753,16 @@ def _convert_partition_to_graphbolt( mp_context=mp_ctx, ) as executor: for part_id in range(num_parts): - gpb, _, ntypes, etypes = load_partition_book( - part_config, part_id + rel_path_results.append( + executor.submit(part_id=part_id).result() ) - part = _load_part(part_config, part_id) - csc_graph = executor.submit( - convert_with_format, - graph=part, - ntypes=ntypes, - etypes=etypes, - gpb=gpb, - ).result() - rel_path = _save_graph_gb(part_config, part_id, csc_graph) - rel_path_results.append(rel_path) else: # If running single-threaded, avoid spawning new interpreter, which is slow for part_id in range(num_parts): - gpb, _, ntypes, etypes = load_partition_book(part_config, part_id) - part = _load_part(part_config, part_id) - csc_graph = convert_with_format( - graph=part, ntypes=ntypes, etypes=etypes, gpb=gpb - ) - rel_path = _save_graph_gb(part_config, part_id, csc_graph) + rel_path = convert_with_format(part_id=part_id) rel_path_results.append(rel_path) - + part_meta = _load_part_config(part_config) for part_id in range(num_parts): # Update graph path. part_meta[f"part-{part_id}"]["part_graph_graphbolt"] = rel_path_results[ @@ -1813,16 +1824,14 @@ def dgl_partition_to_graphbolt( " will be saved to the new format." ) part_meta = _load_part_config(part_config) - new_part_meta = copy.deepcopy(part_meta) num_parts = part_meta["num_parts"] part_meta = _convert_partition_to_graphbolt( - new_part_meta, - graph_formats, - part_config, - store_eids, - store_inner_node, - store_inner_edge, - n_jobs, - num_parts, + graph_formats=graph_formats, + part_config=part_config, + store_eids=store_eids, + store_inner_node=store_inner_node, + store_inner_edge=store_inner_edge, + n_jobs=n_jobs, + num_parts=num_parts, ) _dump_part_config(part_config, part_meta) From 24522e168d3c559825d4a42f4824a7c86293e238 Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Mon, 9 Sep 2024 13:40:57 +0000 Subject: [PATCH 31/39] change partition.py --- python/dgl/distributed/partition.py | 55 ++++++++++++++++++----------- 1 file changed, 34 insertions(+), 21 deletions(-) diff --git a/python/dgl/distributed/partition.py b/python/dgl/distributed/partition.py index 3830d4f92bd3..38dc2b0e9b40 100644 --- a/python/dgl/distributed/partition.py +++ b/python/dgl/distributed/partition.py @@ -1314,9 +1314,9 @@ def get_homogeneous(g, balance_ntypes): for name in g.edges[etype].data: if name in [EID, "inner_edge"]: continue - edge_feats[_etype_tuple_to_str(etype) + "/" + name] = ( - F.gather_row(g.edges[etype].data[name], local_edges) - ) + edge_feats[ + _etype_tuple_to_str(etype) + "/" + name + ] = F.gather_row(g.edges[etype].data[name], local_edges) else: for ntype in g.ntypes: if len(g.ntypes) > 1: @@ -1351,9 +1351,9 @@ def get_homogeneous(g, balance_ntypes): for name in g.edges[etype].data: if name in [EID, "inner_edge"]: continue - edge_feats[_etype_tuple_to_str(etype) + "/" + name] = ( - F.gather_row(g.edges[etype].data[name], local_edges) - ) + edge_feats[ + _etype_tuple_to_str(etype) + "/" + name + ] = F.gather_row(g.edges[etype].data[name], local_edges) # delete `orig_id` from ndata/edata del part.ndata["orig_id"] del part.edata["orig_id"] @@ -1372,22 +1372,35 @@ def get_homogeneous(g, balance_ntypes): } sort_etypes = len(g.etypes) > 1 part = _process_partitions(part, graph_formats, sort_etypes) - if use_graphbolt: - # save FusedCSCSamplingGraph - kwargs["graph_formats"] = graph_formats - kwargs.pop("n_jobs", None) - _partition_to_graphbolt( - part_i=part_id, - part_config=part_config, - part_metadata=part_metadata, - parts=parts, - **kwargs, - ) - else: + + # transmit to graphbolt and save graph + if use_graphbolt: + # save FusedCSCSamplingGraph + kwargs["graph_formats"] = graph_formats + n_jobs = kwargs.pop("n_jobs", 1) + mp_ctx = mp.get_context("spawn") + with concurrent.futures.ProcessPoolExecutor( + max_workers=min(num_parts, n_jobs), + mp_context=mp_ctx, + ) as executor: + for part_id in range(num_parts): + executor.submit( + _partition_to_graphbolt( + part_i=part_id, + part_config=part_config, + part_metadata=part_metadata, + parts=parts, + **kwargs, + ) + ) + else: + for part_id, part in parts.items(): + part_dir = os.path.join(out_path, "part" + str(part_id)) part_graph_file = os.path.join(part_dir, "graph.dgl") - part_metadata["part-{}".format(part_id)]["part_graph"] = ( - os.path.relpath(part_graph_file, out_path) - ) + part_metadata[ + "part-{}".format(part_id)][ + "part_graph" + ] = os.path.relpath(part_graph_file, out_path) # save DGLGraph _save_dgl_graphs( part_graph_file, From dcf3a394766dd88aab8495054b6e02c201575130 Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Mon, 9 Sep 2024 13:58:10 +0000 Subject: [PATCH 32/39] change partition.py format --- python/dgl/distributed/partition.py | 30 +++++++++++++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) diff --git a/python/dgl/distributed/partition.py b/python/dgl/distributed/partition.py index 38dc2b0e9b40..331f4a29a335 100644 --- a/python/dgl/distributed/partition.py +++ b/python/dgl/distributed/partition.py @@ -1397,8 +1397,7 @@ def get_homogeneous(g, balance_ntypes): for part_id, part in parts.items(): part_dir = os.path.join(out_path, "part" + str(part_id)) part_graph_file = os.path.join(part_dir, "graph.dgl") - part_metadata[ - "part-{}".format(part_id)][ + part_metadata["part-{}".format(part_id)][ "part_graph" ] = os.path.relpath(part_graph_file, out_path) # save DGLGraph @@ -1708,6 +1707,33 @@ def convert_partition_to_graphbolt_multi_process( store_inner_node, store_inner_edge, ): + """ + Convert signle partition to graphbolt, which is used for multiple process. + Parameters + ---------- + part_config : str + The path of the partition config file. + part_id : int + The partition ID. + graph_formats : str or list[str], optional + Save partitions in specified formats. It could be any combination of + `coo`, `csc`. As `csc` format is mandatory for `FusedCSCSamplingGraph`, + it is not necessary to specify this argument. It's mainly for + specifying `coo` format to save edge ID mapping and destination node + IDs. If not specified, whether to save `coo` format is determined by + the availability of the format in DGL partitions. Default: None. + store_eids : bool, optional + Whether to store edge IDs in the new graph. Default: True. + store_inner_node : bool, optional + Whether to store inner node mask in the new graph. Default: False. + store_inner_edge : bool, optional + Whether to store inner edge mask in the new graph. Default: False. + + Returns + ------- + str + The path csc_graph to save. + """ gpb, _, ntypes, etypes = load_partition_book( part_config=part_config, part_id=part_id ) From 220a8b312d0a7bab281a71dc9433d4d3474510a2 Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Tue, 10 Sep 2024 06:43:12 +0000 Subject: [PATCH 33/39] modify partition.py --- python/dgl/distributed/partition.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/python/dgl/distributed/partition.py b/python/dgl/distributed/partition.py index 331f4a29a335..b652d2df9fd1 100644 --- a/python/dgl/distributed/partition.py +++ b/python/dgl/distributed/partition.py @@ -88,7 +88,7 @@ def _dump_part_config(part_config, part_metadata): json.dump(part_metadata, outfile, sort_keys=False, indent=4) -def _process_partitions(g, formats=None, sort_etypes=False): +def process_partitions(g, formats=None, sort_etypes=False): """Preprocess partitions before saving: 1. format data types. 2. sort csc/csr by tag. @@ -1314,9 +1314,9 @@ def get_homogeneous(g, balance_ntypes): for name in g.edges[etype].data: if name in [EID, "inner_edge"]: continue - edge_feats[ - _etype_tuple_to_str(etype) + "/" + name - ] = F.gather_row(g.edges[etype].data[name], local_edges) + edge_feats[_etype_tuple_to_str(etype) + "/" + name] = ( + F.gather_row(g.edges[etype].data[name], local_edges) + ) else: for ntype in g.ntypes: if len(g.ntypes) > 1: @@ -1351,9 +1351,9 @@ def get_homogeneous(g, balance_ntypes): for name in g.edges[etype].data: if name in [EID, "inner_edge"]: continue - edge_feats[ - _etype_tuple_to_str(etype) + "/" + name - ] = F.gather_row(g.edges[etype].data[name], local_edges) + edge_feats[_etype_tuple_to_str(etype) + "/" + name] = ( + F.gather_row(g.edges[etype].data[name], local_edges) + ) # delete `orig_id` from ndata/edata del part.ndata["orig_id"] del part.edata["orig_id"] @@ -1371,7 +1371,7 @@ def get_homogeneous(g, balance_ntypes): "edge_feats": os.path.relpath(edge_feat_file, out_path), } sort_etypes = len(g.etypes) > 1 - part = _process_partitions(part, graph_formats, sort_etypes) + part = process_partitions(part, graph_formats, sort_etypes) # transmit to graphbolt and save graph if use_graphbolt: @@ -1398,8 +1398,8 @@ def get_homogeneous(g, balance_ntypes): part_dir = os.path.join(out_path, "part" + str(part_id)) part_graph_file = os.path.join(part_dir, "graph.dgl") part_metadata["part-{}".format(part_id)][ - "part_graph" - ] = os.path.relpath(part_graph_file, out_path) + "part_graph" + ] = os.path.relpath(part_graph_file, out_path) # save DGLGraph _save_dgl_graphs( part_graph_file, From 200eb6c57abf0de307d40ce19469ec4f017989bf Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Tue, 10 Sep 2024 08:38:12 +0000 Subject: [PATCH 34/39] change partition --- python/dgl/distributed/partition.py | 1 + 1 file changed, 1 insertion(+) diff --git a/python/dgl/distributed/partition.py b/python/dgl/distributed/partition.py index 3d06655b5aa7..f39ed0e46c5e 100644 --- a/python/dgl/distributed/partition.py +++ b/python/dgl/distributed/partition.py @@ -1503,6 +1503,7 @@ def get_homogeneous(g, balance_ntypes): part_metadata["part-{}".format(part_id)][ "part_graph" ] = os.path.relpath(part_graph_file, out_path) + # save DGLGraph _save_dgl_graphs( part_graph_file, From bb3239751047a25f78a0452302b917486d7a62c3 Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Tue, 10 Sep 2024 08:41:59 +0000 Subject: [PATCH 35/39] change partition --- python/dgl/distributed/partition.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/python/dgl/distributed/partition.py b/python/dgl/distributed/partition.py index f39ed0e46c5e..4aadfe98e617 100644 --- a/python/dgl/distributed/partition.py +++ b/python/dgl/distributed/partition.py @@ -1417,9 +1417,9 @@ def get_homogeneous(g, balance_ntypes): for name in g.edges[etype].data: if name in [EID, "inner_edge"]: continue - edge_feats[_etype_tuple_to_str(etype) + "/" + name] = ( - F.gather_row(g.edges[etype].data[name], local_edges) - ) + edge_feats[ + _etype_tuple_to_str(etype) + "/" + name + ] = F.gather_row(g.edges[etype].data[name], local_edges) else: for ntype in g.ntypes: if len(g.ntypes) > 1: @@ -1454,9 +1454,9 @@ def get_homogeneous(g, balance_ntypes): for name in g.edges[etype].data: if name in [EID, "inner_edge"]: continue - edge_feats[_etype_tuple_to_str(etype) + "/" + name] = ( - F.gather_row(g.edges[etype].data[name], local_edges) - ) + edge_feats[ + _etype_tuple_to_str(etype) + "/" + name + ] = F.gather_row(g.edges[etype].data[name], local_edges) # delete `orig_id` from ndata/edata del part.ndata["orig_id"] del part.edata["orig_id"] @@ -1503,7 +1503,6 @@ def get_homogeneous(g, balance_ntypes): part_metadata["part-{}".format(part_id)][ "part_graph" ] = os.path.relpath(part_graph_file, out_path) - # save DGLGraph _save_dgl_graphs( part_graph_file, From 33784adb4b8ad3dee6b38ad38bb5ba753a70eb9f Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Tue, 10 Sep 2024 09:15:12 +0000 Subject: [PATCH 36/39] change partition --- python/dgl/distributed/partition.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/dgl/distributed/partition.py b/python/dgl/distributed/partition.py index 4aadfe98e617..9db47a5eb1bd 100644 --- a/python/dgl/distributed/partition.py +++ b/python/dgl/distributed/partition.py @@ -1896,7 +1896,7 @@ def _convert_partition_to_graphbolt( ) as executor: for part_id in range(num_parts): rel_path_results.append( - executor.submit(part_id=part_id).result() + executor.submit(convert_with_format, part_id=part_id).result() ) else: From 2193ce473331f200dce87023b29021711e9041d6 Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Tue, 10 Sep 2024 09:17:35 +0000 Subject: [PATCH 37/39] add partition --- python/dgl/distributed/partition.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/python/dgl/distributed/partition.py b/python/dgl/distributed/partition.py index 9db47a5eb1bd..8d8e259a6836 100644 --- a/python/dgl/distributed/partition.py +++ b/python/dgl/distributed/partition.py @@ -1896,7 +1896,9 @@ def _convert_partition_to_graphbolt( ) as executor: for part_id in range(num_parts): rel_path_results.append( - executor.submit(convert_with_format, part_id=part_id).result() + executor.submit( + convert_with_format, part_id=part_id + ).result() ) else: From a02a882a81678aa10d73ebb27b851996fcf983f4 Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Tue, 10 Sep 2024 09:23:34 +0000 Subject: [PATCH 38/39] modify partition --- python/dgl/distributed/partition.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/dgl/distributed/partition.py b/python/dgl/distributed/partition.py index 8d8e259a6836..7fe4d61b0edb 100644 --- a/python/dgl/distributed/partition.py +++ b/python/dgl/distributed/partition.py @@ -1482,7 +1482,7 @@ def get_homogeneous(g, balance_ntypes): kwargs["graph_formats"] = graph_formats n_jobs = kwargs.pop("n_jobs", 1) mp_ctx = mp.get_context("spawn") - with concurrent.futures.ProcessPoolExecutor( + with concurrent.futures.ProcessPoolExecutor( # pylint: disable=unexpected-keyword-arg max_workers=min(num_parts, n_jobs), mp_context=mp_ctx, ) as executor: From 99450be531bdf9691055c556ce3797cff2654549 Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Wed, 11 Sep 2024 04:40:18 +0000 Subject: [PATCH 39/39] change partition --- python/dgl/distributed/partition.py | 37 ++++++++++++++++------------- 1 file changed, 20 insertions(+), 17 deletions(-) diff --git a/python/dgl/distributed/partition.py b/python/dgl/distributed/partition.py index 7fe4d61b0edb..079ed8806a96 100644 --- a/python/dgl/distributed/partition.py +++ b/python/dgl/distributed/partition.py @@ -1496,6 +1496,8 @@ def get_homogeneous(g, balance_ntypes): **kwargs, ) ) + part_metadata["node_map_dtype"] = "int64" + part_metadata["edge_map_dtype"] = "int64" else: for part_id, part in parts.items(): part_dir = os.path.join(out_path, "part" + str(part_id)) @@ -1688,12 +1690,12 @@ def gb_convert_single_dgl_partition( ntypes, etypes, gpb, - graph_formats, - store_eids, - store_inner_node, - store_inner_edge, part_meta, graph, + graph_formats=None, + store_eids=False, + store_inner_node=False, + store_inner_edge=False, ): """Converts a single DGL partition to GraphBolt. @@ -1705,6 +1707,10 @@ def gb_convert_single_dgl_partition( The edge types gpb : GraphPartitionBook The global partition information. + part_meta : dict + Contain the meta data of the partition. + graph : DGLGraph + The graph to be converted to graphbolt graph. graph_formats : str or list[str], optional Save partitions in specified formats. It could be any combination of `coo`, `csc`. As `csc` format is mandatory for `FusedCSCSamplingGraph`, @@ -1718,10 +1724,6 @@ def gb_convert_single_dgl_partition( Whether to store inner node mask in the new graph. Default: False. store_inner_edge : bool, optional Whether to store inner edge mask in the new graph. Default: False. - part_meta : dict - Contain the meta data of the partition. - graph : DGLGraph - The graph to be converted to graphbolt graph. """ debug_mode = "DGL_DIST_DEBUG" in os.environ if debug_mode: @@ -1802,16 +1804,17 @@ def gb_convert_single_dgl_partition( return csc_graph -def convert_partition_to_graphbolt_multi_process( +def _convert_partition_to_graphbolt( part_config, part_id, - graph_formats, - store_eids, - store_inner_node, - store_inner_edge, + graph_formats=None, + store_eids=False, + store_inner_node=False, + store_inner_edge=False, ): """ - Convert signle partition to graphbolt, which is used for multiple process. + The pipeline converting signle partition to graphbolt. + Parameters ---------- part_config : str @@ -1857,7 +1860,7 @@ def convert_partition_to_graphbolt_multi_process( return rel_path -def _convert_partition_to_graphbolt( +def _convert_partition_to_graphbolt_wrapper( graph_formats, part_config, store_eids, @@ -1877,7 +1880,7 @@ def _convert_partition_to_graphbolt( # Iterate over partitions. convert_with_format = partial( - convert_partition_to_graphbolt_multi_process, + _convert_partition_to_graphbolt, part_config=part_config, graph_formats=graph_formats, store_eids=store_eids, @@ -1969,7 +1972,7 @@ def dgl_partition_to_graphbolt( ) part_meta = _load_part_config(part_config) num_parts = part_meta["num_parts"] - part_meta = _convert_partition_to_graphbolt( + part_meta = _convert_partition_to_graphbolt_wrapper( graph_formats=graph_formats, part_config=part_config, store_eids=store_eids,