From 8ea4165a9e4cbfdba6971800fded679724ee1001 Mon Sep 17 00:00:00 2001 From: Akshat-Shu Date: Tue, 18 Mar 2025 17:10:23 +0530 Subject: [PATCH 1/9] Implement Hopcroft-karp algorithm to find the maximum matching of a undirected bipartite graph --- pydatastructs/graphs/__init__.py | 5 +- pydatastructs/graphs/algorithms.py | 324 +++++++++++++++++- pydatastructs/graphs/graph.py | 25 ++ pydatastructs/graphs/tests/test_algorithms.py | 175 +++++++++- 4 files changed, 526 insertions(+), 3 deletions(-) diff --git a/pydatastructs/graphs/__init__.py b/pydatastructs/graphs/__init__.py index 9c00ca0aa..bfde82d53 100644 --- a/pydatastructs/graphs/__init__.py +++ b/pydatastructs/graphs/__init__.py @@ -21,7 +21,10 @@ all_pair_shortest_paths, topological_sort, topological_sort_parallel, - max_flow + max_flow, + maximum_matching, + maximum_matching_parallel, + bipartite_coloring ) __all__.extend(algorithms.__all__) diff --git a/pydatastructs/graphs/algorithms.py b/pydatastructs/graphs/algorithms.py index 9de50e7cc..05aebb3df 100644 --- a/pydatastructs/graphs/algorithms.py +++ b/pydatastructs/graphs/algorithms.py @@ -23,7 +23,10 @@ 'all_pair_shortest_paths', 'topological_sort', 'topological_sort_parallel', - 'max_flow' + 'max_flow', + 'maximum_matching', + 'maximum_matching_parallel', + 'bipartite_coloring' ] Stack = Queue = deque @@ -1216,3 +1219,322 @@ def max_flow(graph, source, sink, algorithm='edmonds_karp', **kwargs): f"Currently {algorithm} algorithm isn't implemented for " "performing max flow on graphs.") return getattr(algorithms, func)(graph, source, sink) + +def bipartite_coloring(graph: Graph, **kwargs) -> tuple[bool, dict]: + """ + Finds a 2-coloring of the given graph if it is bipartite. + + Parameters + ========== + + graph: Graph + The graph under consideration. + invert: bool + If True, the colors are inverted. + make_undirected: bool + If False, the input graph should be undirected else it can be made undirected by setting this to True + backend: pydatastructs.Backend + The backend to be used. + Optional, by default, the best available + backend is used. + + Returns + ======= + + tuple + A tuple containing a boolean value and a dictionary. + The boolean value is True if the graph is bipartite + and False otherwise. The dictionary contains the + color assigned to each vertex. + + Examples + ======== + + >>> from pydatastructs import Graph, AdjacencyListGraphNode, bipartite_coloring + >>> v_1 = AdjacencyListGraphNode('v_1') + >>> v_2 = AdjacencyListGraphNode('v_2') + >>> v_3 = AdjacencyListGraphNode('v_3') + >>> v_4 = AdjacencyListGraphNode('v_4') + >>> graph = Graph(v_1, v_2, v_3, v_4) + >>> graph.add_edge('v_1', 'v_2') + >>> graph.add_edge('v_2', 'v_3') + >>> graph.add_edge('v_4', 'v_1') + >>> bipartite_coloring(graph) + >>> (True, {'v_1': 0, 'v_2': 1, 'v_4': 1, 'v_3': 0}) + + References + ========== + + .. [1] https://en.wikipedia.org/wiki/Bipartite_graph + """ + + color = {} + queue = Queue() + invert = kwargs.get('invert', False) + make_unidirected = kwargs.get('make_undirected', False) + + if make_unidirected: + graph = graph.to_undirected_adjacency_list() + + for start in graph.vertices: + if start not in color: + queue.append(start) + color[start] = 1 if invert else 0 + + while queue: + u = queue.popleft() + for v in graph.neighbors(u): + v_name = v.name + if v_name not in color: + color[v_name] = 1 - color[u] + queue.append(v_name) + elif color[v_name] == color[u]: + return (False, {}) + + return (True, color) + + +def _maximum_matching_hopcroft_karp_(graph: Graph): + U = set() + V = set() + bipartiteness, coloring = bipartite_coloring(graph) + + if not bipartiteness: + raise ValueError("Graph is not bipartite.") + + for node, c in coloring.items(): + if c == 0: + U.add(node) + else: + V.add(node) + + + pair_U = {u: None for u in U} + pair_V = {v: None for v in V} + dist = {} + + def bfs(): + queue = Queue() + for u in U: + if pair_U[u] is None: + dist[u] = 0 + queue.append(u) + else: + dist[u] = float('inf') + dist[None] = float('inf') + while queue: + u = queue.popleft() + if dist[u] < dist[None]: + for v in graph.neighbors(u): + if dist[pair_V[v.name]] == float('inf'): + dist[pair_V[v.name]] = dist[u] + 1 + queue.append(pair_V[v.name]) + return dist[None] != float('inf') + + def dfs(u): + if u is not None: + for v in graph.neighbors(u): + if dist[pair_V[v.name]] == dist[u] + 1: + if dfs(pair_V[v.name]): + pair_V[v.name] = u + pair_U[u] = v.name + return True + dist[u] = float('inf') + return False + return True + + matching = set() + while bfs(): + for u in U: + if pair_U[u] is None: + if dfs(u): + matching.add((u, pair_U[u])) + + return matching + +def maximum_matching(graph: Graph, algorithm: str, **kwargs) -> set: + """ + Finds the maximum matching in the given undirected using the given algorithm. + + Parameters + ========== + + graph: Graph + The graph under consideration. + algorithm: str + The algorithm to be used. + Currently, following are supported, + + 'hopcroft_karp' -> Hopcroft-Karp algorithm for Bipartite Graphs as given in [1]. + make_undirected: bool + If False, the graph should be undirected or unwanted results may be obtained. The graph can be made undirected by setting this to true. + backend: pydatastructs.Backend + The backend to be used. + Optional, by default, the best available + backend is used. + + Returns + ======= + + set + The set of edges which form the maximum matching. + + Examples + ======== + + >>> from pydatastructs import Graph, AdjacencyListGraphNode, maximum_matching + >>> v_1 = AdjacencyListGraphNode('v_1') + >>> v_2 = AdjacencyListGraphNode('v_2') + >>> v_3 = AdjacencyListGraphNode('v_3') + >>> v_4 = AdjacencyListGraphNode('v_4') + >>> graph = Graph(v_1, v_2, v_3, v_4) + >>> graph.add_edge('v_1', 'v_2') + >>> graph.add_edge('v_2', 'v_3') + >>> graph.add_edge('v_4', 'v_1') + >>> maximum_matching(graph, 'hopcroft_karp') + >>> {('v_1', 'v_4'), ('v_3', 'v_2')} + + References + ========== + + .. [1] https://en.wikipedia.org/wiki/Hopcroft%E2%80%93Karp_algorithm + """ + + + raise_if_backend_is_not_python( + maximum_matching, kwargs.get('backend', Backend.PYTHON)) + make_undirected = kwargs.get('make_undirected', False) + if make_undirected: + graph = graph.to_undirected_adjacency_list() + + import pydatastructs.graphs.algorithms as algorithms + func = "_maximum_matching_" + algorithm + "_" + if not hasattr(algorithms, func): + raise NotImplementedError( + f"Currently {algorithm} algorithm isn't implemented for " + "finding maximum matching in graphs.") + return getattr(algorithms, func)(graph) + +def _maximum_matching_hopcroft_karp_parallel(graph: Graph, num_threads: int) -> set: + U = set() + V = set() + bipartiteness, coloring = bipartite_coloring(graph) + + if not bipartiteness: + raise ValueError("Graph is not bipartite.") + + for node, c in coloring.items(): + if c == 0: + U.add(node) + else: + V.add(node) + + + pair_U = {u: None for u in U} + pair_V = {v: None for v in V} + dist = {} + + def bfs(): + queue = Queue() + for u in U: + if pair_U[u] is None: + dist[u] = 0 + queue.append(u) + else: + dist[u] = float('inf') + dist[None] = float('inf') + while queue: + u = queue.popleft() + if dist[u] < dist[None]: + for v in graph.neighbors(u): + if dist[pair_V[v.name]] == float('inf'): + dist[pair_V[v.name]] = dist[u] + 1 + queue.append(pair_V[v.name]) + return dist[None] != float('inf') + + def dfs(u): + if u is not None: + for v in graph.neighbors(u): + if dist[pair_V[v.name]] == dist[u] + 1: + if dfs(pair_V[v.name]): + pair_V[v.name] = u + pair_U[u] = v.name + return True + dist[u] = float('inf') + return False + return True + + matching = set() + + while bfs(): + unmatched_nodes = [u for u in U if pair_U[u] is None] + + with ThreadPoolExecutor(max_workers=num_threads) as Executor: + results = Executor.map(dfs, unmatched_nodes) + + for u, success in zip(unmatched_nodes, results): + if success: + matching.add((u, pair_U[u])) + + return matching + + +def maximum_matching_parallel(graph: Graph, algorithm: str, num_threads: int, **kwargs): + """ + Finds the maximum matching in the given graph using the given algorithm using + the given number of threads. + + Parameters + ========== + + graph: Graph + The graph under consideration. + algorithm: str + The algorithm to be used. + Currently, following are supported, + + 'hopcroft_karp' -> Hopcroft-Karp algorithm for Bipartite Graphs as given in [1]. + num_threads: int + The maximum number of threads to be used. + backend: pydatastructs.Backend + The backend to be used. + Optional, by default, the best available + backend is used. + + Returns + ======= + + set + The set of edges which form the maximum matching. + + Examples + ======== + + >>> from pydatastructs import Graph, AdjacencyListGraphNode, maximum_matching_parallel + >>> v_1 = AdjacencyListGraphNode('v_1') + >>> v_2 = AdjacencyListGraphNode('v_2') + >>> v_3 = AdjacencyListGraphNode('v_3') + >>> v_4 = AdjacencyListGraphNode('v_4') + >>> graph = Graph(v_1, v_2, v_3, v_4) + >>> graph.add_bidirectional_edge('v_1', 'v_2') + >>> graph.add_bidirectional_edge('v_2', 'v_3') + >>> graph.add_bidirectional_edge('v_4', 'v_1') + >>> maximum_matching_parallel(graph, 'hopcroft_karp', 1) + >>> {('v_1', 'v_4'), ('v_3', 'v_2')} + + References + ========== + + .. [1] https://en.wikipedia.org/wiki/Hopcroft%E2%80%93Karp_algorithm + """ + + raise_if_backend_is_not_python( + maximum_matching_parallel, kwargs.get('backend', Backend.PYTHON)) + + import pydatastructs.graphs.algorithms as algorithms + func = "_maximum_matching_" + algorithm + "_parallel" + if not hasattr(algorithms, func): + raise NotImplementedError( + f"Currently {algorithm} algorithm isn't implemented for " + "finding maximum matching in graphs.") + return getattr(algorithms, func)(graph, num_threads) diff --git a/pydatastructs/graphs/graph.py b/pydatastructs/graphs/graph.py index 24f33a17b..ed7a8520a 100644 --- a/pydatastructs/graphs/graph.py +++ b/pydatastructs/graphs/graph.py @@ -161,3 +161,28 @@ def num_edges(self): """ raise NotImplementedError( "This is an abstract method.") + + def add_bidirectional_edge(self, node1, node2, cost=None): + """ + Adds edges between node1 and node2 in both directions. + """ + self.add_edge(node1, node2, cost) + self.add_edge(node2, node1, cost) + + def to_undirected_adjacency_list(self): + """ + Converts the graph to undirected graph. + """ + vertexes = [] + undirected_graph = Graph(implementation='adjacency_list') + for vertex in self.vertices: + undirected_graph.add_vertex( + self.__getattribute__(vertex) + ) + + for vertex in self.vertices: + for v in self.neighbors(vertex): + edge = self.get_edge(vertex, v.name) + undirected_graph.add_bidirectional_edge(vertex, v.name, edge) + + return undirected_graph diff --git a/pydatastructs/graphs/tests/test_algorithms.py b/pydatastructs/graphs/tests/test_algorithms.py index f1586f512..163e1ba3e 100644 --- a/pydatastructs/graphs/tests/test_algorithms.py +++ b/pydatastructs/graphs/tests/test_algorithms.py @@ -2,7 +2,7 @@ breadth_first_search_parallel, minimum_spanning_tree, minimum_spanning_tree_parallel, strongly_connected_components, depth_first_search, shortest_paths,all_pair_shortest_paths, topological_sort, -topological_sort_parallel, max_flow) +topological_sort_parallel, max_flow, maximum_matching, maximum_matching_parallel, bipartite_coloring) from pydatastructs.utils.raises_util import raises def test_breadth_first_search(): @@ -448,3 +448,176 @@ def _test_max_flow(ds, algorithm): _test_max_flow("Matrix", "edmonds_karp") _test_max_flow("List", "dinic") _test_max_flow("Matrix", "dinic") + + + +def test_maximum_matching(): + def _test_maximum_matching(func, ds, algorithm, **kwargs): + import pydatastructs.utils.misc_util as utils + GraphNode = getattr(utils, "Adjacency" + ds + "GraphNode") + + a = GraphNode('a') + b = GraphNode('b') + c = GraphNode('c') + d = GraphNode('d') + e = GraphNode('e') + + G = Graph(a, b, c, d, e) + + G.add_bidirectional_edge('a', 'b') + G.add_bidirectional_edge('a', 'c') + G.add_bidirectional_edge('b', 'd') + G.add_bidirectional_edge('c', 'd') + G.add_bidirectional_edge('d', 'e') + + assert len(func(G, algorithm, **kwargs)) == 2 + + a = GraphNode('a') + b = GraphNode('b') + c = GraphNode('c') + d = GraphNode('d') + e = GraphNode('e') + f = GraphNode('f') + + G2 = Graph(a, b, c, d, e, f) + + G2.add_bidirectional_edge('a', 'b') + G2.add_bidirectional_edge('a', 'c') + G2.add_bidirectional_edge('b', 'd') + G2.add_bidirectional_edge('c', 'd') + G2.add_bidirectional_edge('d', 'e') + G2.add_bidirectional_edge('d', 'f') + + assert len(func(G2, algorithm, **kwargs)) == 2 + + a = GraphNode('a') + b = GraphNode('b') + c = GraphNode('c') + d = GraphNode('d') + + G3 = Graph(a, b, c, d) + + G3.add_bidirectional_edge('a', 'b') + G3.add_bidirectional_edge('a', 'c') + G3.add_bidirectional_edge('b', 'd') + G3.add_bidirectional_edge('c', 'd') + + assert len(func(G3, algorithm, **kwargs)) == 2 + + a = GraphNode('a') + b = GraphNode('b') + c = GraphNode('c') + d = GraphNode('d') + e = GraphNode('e') + f = GraphNode('f') + g = GraphNode('g') + h = GraphNode('h') + i = GraphNode('i') + + G4 = Graph(a, b, c, d, e, f, g, h, i) + + G4.add_bidirectional_edge('a', 'b') + G4.add_bidirectional_edge('a', 'c') + G4.add_bidirectional_edge('b', 'd') + G4.add_bidirectional_edge('c', 'd') + G4.add_bidirectional_edge('d', 'e') + G4.add_bidirectional_edge('d', 'f') + G4.add_bidirectional_edge('e', 'g') + G4.add_bidirectional_edge('f', 'g') + G4.add_bidirectional_edge('g', 'h') + G4.add_bidirectional_edge('g', 'i') + + assert len(func(G4, algorithm, **kwargs)) == 3 + + _test_maximum_matching(maximum_matching, "List", "hopcroft_karp") + _test_maximum_matching(maximum_matching, "Matrix", "hopcroft_karp") + _test_maximum_matching(maximum_matching_parallel, "List", "hopcroft_karp", num_threads=2) + _test_maximum_matching(maximum_matching_parallel, "Matrix", "hopcroft_karp", num_threads=2) + + +def test_bipartite_coloring(): + def _assert_correctness(graph, answer): + valid = True + for v in graph.vertices: + for u in graph.neighbors(v): + if answer[u.name] == answer[v.name]: + valid = False + break + if not valid: + break + return valid + + def _test_bipartite_coloring(ds): + import pydatastructs.utils.misc_util as utils + GraphNode = getattr(utils, "Adjacency" + ds + "GraphNode") + + a = GraphNode('a') + b = GraphNode('b') + c = GraphNode('c') + d = GraphNode('d') + e = GraphNode('e') + f = GraphNode('f') + + G1 = Graph(a, b, c, d, e, f) + + G1.add_bidirectional_edge('a', 'b') + G1.add_bidirectional_edge('a', 'c') + G1.add_bidirectional_edge('b', 'd') + G1.add_bidirectional_edge('c', 'd') + G1.add_bidirectional_edge('d', 'e') + G1.add_bidirectional_edge('d', 'f') + + valid, coloring = bipartite_coloring(G1) + + assert valid + assert _assert_correctness(G1, coloring) + + a = GraphNode('a') + b = GraphNode('b') + c = GraphNode('c') + d = GraphNode('d') + e = GraphNode('e') + + G2 = Graph(a, b, c, d, e) + + G2.add_bidirectional_edge('a', 'b') + G2.add_bidirectional_edge('a', 'c') + G2.add_bidirectional_edge('b', 'c') + G2.add_bidirectional_edge('c', 'd') + G2.add_bidirectional_edge('d', 'e') + G2.add_bidirectional_edge('e', 'a') + + valid, coloring = bipartite_coloring(G2) + + assert not valid + + a = GraphNode('a') + b = GraphNode('b') + c = GraphNode('c') + d = GraphNode('d') + e = GraphNode('e') + f = GraphNode('f') + g = GraphNode('g') + h = GraphNode('h') + i = GraphNode('i') + + G3 = Graph(a, b, c, d, e, f, g, h, i) + + G3.add_bidirectional_edge('a', 'b') + G3.add_bidirectional_edge('b', 'c') + G3.add_bidirectional_edge('c', 'd') + G3.add_bidirectional_edge('d', 'e') + G3.add_bidirectional_edge('e', 'f') + G3.add_bidirectional_edge('f', 'a') + G3.add_bidirectional_edge('g', 'f') + G3.add_bidirectional_edge('g', 'd') + G3.add_bidirectional_edge('f', 'h') + G3.add_bidirectional_edge('h', 'i') + G3.add_bidirectional_edge('i', 'g') + G3.add_bidirectional_edge('i', 'c') + G3.add_bidirectional_edge('h', 'b') + + valid, coloring = bipartite_coloring(G3) + + assert valid + assert _assert_correctness(G3, coloring) From f0a5ea2c633cc24c1053419a86b72746c242c04d Mon Sep 17 00:00:00 2001 From: Akshat-Shu Date: Wed, 19 Mar 2025 08:15:34 +0530 Subject: [PATCH 2/9] Fix typing issues caused due to older python version --- pydatastructs/graphs/algorithms.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pydatastructs/graphs/algorithms.py b/pydatastructs/graphs/algorithms.py index 05aebb3df..0154e66e3 100644 --- a/pydatastructs/graphs/algorithms.py +++ b/pydatastructs/graphs/algorithms.py @@ -11,6 +11,7 @@ from pydatastructs.graphs.graph import Graph from pydatastructs.linear_data_structures.algorithms import merge_sort_parallel from pydatastructs import PriorityQueue +from typing import Tuple, Dict __all__ = [ 'breadth_first_search', @@ -1220,7 +1221,7 @@ def max_flow(graph, source, sink, algorithm='edmonds_karp', **kwargs): "performing max flow on graphs.") return getattr(algorithms, func)(graph, source, sink) -def bipartite_coloring(graph: Graph, **kwargs) -> tuple[bool, dict]: +def bipartite_coloring(graph: Graph, **kwargs) -> Tuple[bool, Dict]: """ Finds a 2-coloring of the given graph if it is bipartite. @@ -1479,7 +1480,7 @@ def dfs(u): return matching -def maximum_matching_parallel(graph: Graph, algorithm: str, num_threads: int, **kwargs): +def maximum_matching_parallel(graph: Graph, algorithm: str, num_threads: int, **kwargs) -> set: """ Finds the maximum matching in the given graph using the given algorithm using the given number of threads. From 8419c5399aebc3f24f492ad6761c77a0c9ebb0af Mon Sep 17 00:00:00 2001 From: Akshat-Shu Date: Wed, 19 Mar 2025 08:52:23 +0530 Subject: [PATCH 3/9] Fix the implementation to properly handle None nodes --- pydatastructs/graphs/algorithms.py | 88 ++++++++++++++++++++---------- 1 file changed, 59 insertions(+), 29 deletions(-) diff --git a/pydatastructs/graphs/algorithms.py b/pydatastructs/graphs/algorithms.py index 0154e66e3..d5a7e3a49 100644 --- a/pydatastructs/graphs/algorithms.py +++ b/pydatastructs/graphs/algorithms.py @@ -1327,29 +1327,43 @@ def bfs(): u = queue.popleft() if dist[u] < dist[None]: for v in graph.neighbors(u): - if dist[pair_V[v.name]] == float('inf'): - dist[pair_V[v.name]] = dist[u] + 1 - queue.append(pair_V[v.name]) - return dist[None] != float('inf') + if v.name in pair_V: + alt = pair_V[v.name] + if alt is None: + dist[None] = dist[u] + 1 + queue.append(None) + elif dist.get(alt, float('inf')) == float('inf'): + dist[alt] = dist[u] + 1 + queue.append(alt) + return dist.get(None, float('inf')) != float('inf') def dfs(u): - if u is not None: - for v in graph.neighbors(u): - if dist[pair_V[v.name]] == dist[u] + 1: - if dfs(pair_V[v.name]): + if u is None: + return True + for v in graph.neighbors(u): + if v.name in pair_V: + alt = pair_V[v.name] + if alt is None: + pair_V[v.name] = u + pair_U[u] = v.name + return True + elif dist.get(alt, float('inf')) == dist.get(u, float('inf')) + 1: + if dfs(alt): pair_V[v.name] = u pair_U[u] = v.name return True - dist[u] = float('inf') - return False - return True + dist[u] = float('inf') + return False matching = set() while bfs(): for u in U: if pair_U[u] is None: - if dfs(u): - matching.add((u, pair_U[u])) + dfs(u) + + for u in U: + if pair_U[u] is not None: + matching.add((u, pair_U[u])) return matching @@ -1392,8 +1406,8 @@ def maximum_matching(graph: Graph, algorithm: str, **kwargs) -> set: >>> graph.add_edge('v_1', 'v_2') >>> graph.add_edge('v_2', 'v_3') >>> graph.add_edge('v_4', 'v_1') - >>> maximum_matching(graph, 'hopcroft_karp') - >>> {('v_1', 'v_4'), ('v_3', 'v_2')} + >>> maximum_matching(graph, 'hopcroft_karp', make_undirected=True) + >>> {('v_3', 'v_2'), ('v_1', 'v_4')} References ========== @@ -1448,22 +1462,33 @@ def bfs(): u = queue.popleft() if dist[u] < dist[None]: for v in graph.neighbors(u): - if dist[pair_V[v.name]] == float('inf'): - dist[pair_V[v.name]] = dist[u] + 1 - queue.append(pair_V[v.name]) - return dist[None] != float('inf') + if v.name in pair_V: + alt = pair_V[v.name] + if alt is None: + dist[None] = dist[u] + 1 + queue.append(None) + elif dist.get(alt, float('inf')) == float('inf'): + dist[alt] = dist[u] + 1 + queue.append(alt) + return dist.get(None, float('inf')) != float('inf') def dfs(u): - if u is not None: - for v in graph.neighbors(u): - if dist[pair_V[v.name]] == dist[u] + 1: - if dfs(pair_V[v.name]): + if u is None: + return True + for v in graph.neighbors(u): + if v.name in pair_V: + alt = pair_V[v.name] + if alt is None: + pair_V[v.name] = u + pair_U[u] = v.name + return True + elif dist.get(alt, float('inf')) == dist.get(u, float('inf')) + 1: + if dfs(alt): pair_V[v.name] = u pair_U[u] = v.name return True - dist[u] = float('inf') - return False - return True + dist[u] = float('inf') + return False matching = set() @@ -1474,7 +1499,7 @@ def dfs(u): results = Executor.map(dfs, unmatched_nodes) for u, success in zip(unmatched_nodes, results): - if success: + if success and pair_U[u] is not None: matching.add((u, pair_U[u])) return matching @@ -1497,6 +1522,8 @@ def maximum_matching_parallel(graph: Graph, algorithm: str, num_threads: int, ** 'hopcroft_karp' -> Hopcroft-Karp algorithm for Bipartite Graphs as given in [1]. num_threads: int The maximum number of threads to be used. + make_undirected: bool + If False, the graph should be undirected or unwanted results may be obtained. The graph can be made undirected by setting this to true. backend: pydatastructs.Backend The backend to be used. Optional, by default, the best available @@ -1520,8 +1547,8 @@ def maximum_matching_parallel(graph: Graph, algorithm: str, num_threads: int, ** >>> graph.add_bidirectional_edge('v_1', 'v_2') >>> graph.add_bidirectional_edge('v_2', 'v_3') >>> graph.add_bidirectional_edge('v_4', 'v_1') - >>> maximum_matching_parallel(graph, 'hopcroft_karp', 1) - >>> {('v_1', 'v_4'), ('v_3', 'v_2')} + >>> maximum_matching_parallel(graph, 'hopcroft_karp', 1, make_undirected=True) + >>> {('v_3', 'v_2'), ('v_1', 'v_4')} References ========== @@ -1531,6 +1558,9 @@ def maximum_matching_parallel(graph: Graph, algorithm: str, num_threads: int, ** raise_if_backend_is_not_python( maximum_matching_parallel, kwargs.get('backend', Backend.PYTHON)) + make_undirected = kwargs.get('make_undirected', False) + if make_undirected: + graph = graph.to_undirected_adjacency_list() import pydatastructs.graphs.algorithms as algorithms func = "_maximum_matching_" + algorithm + "_parallel" From 42369d189e5a0cd5dd69d2abb8d59a0003a6ece2 Mon Sep 17 00:00:00 2001 From: Akshat-Shu Date: Wed, 19 Mar 2025 09:06:02 +0530 Subject: [PATCH 4/9] Fix parallel processing implementation to avoid race conditions --- pydatastructs/graphs/algorithms.py | 88 +++++++++++++++++++++++------- 1 file changed, 67 insertions(+), 21 deletions(-) diff --git a/pydatastructs/graphs/algorithms.py b/pydatastructs/graphs/algorithms.py index d5a7e3a49..1b7d3743e 100644 --- a/pydatastructs/graphs/algorithms.py +++ b/pydatastructs/graphs/algorithms.py @@ -4,6 +4,8 @@ """ from collections import deque from concurrent.futures import ThreadPoolExecutor +from multiprocessing import Manager +import threading from pydatastructs.utils.misc_util import ( _comp, raise_if_backend_is_not_python, Backend, AdjacencyListGraphNode) from pydatastructs.miscellaneous_data_structures import ( @@ -1407,7 +1409,7 @@ def maximum_matching(graph: Graph, algorithm: str, **kwargs) -> set: >>> graph.add_edge('v_2', 'v_3') >>> graph.add_edge('v_4', 'v_1') >>> maximum_matching(graph, 'hopcroft_karp', make_undirected=True) - >>> {('v_3', 'v_2'), ('v_1', 'v_4')} + >>> {('v_1', 'v_4'), ('v_3', 'v_2')} References ========== @@ -1431,6 +1433,7 @@ def maximum_matching(graph: Graph, algorithm: str, **kwargs) -> set: return getattr(algorithms, func)(graph) def _maximum_matching_hopcroft_karp_parallel(graph: Graph, num_threads: int) -> set: + U = set() V = set() bipartiteness, coloring = bipartite_coloring(graph) @@ -1444,13 +1447,14 @@ def _maximum_matching_hopcroft_karp_parallel(graph: Graph, num_threads: int) -> else: V.add(node) - - pair_U = {u: None for u in U} - pair_V = {v: None for v in V} - dist = {} + manager = Manager() + pair_U = manager.dict({u: None for u in U}) + pair_V = manager.dict({v: None for v in V}) + lock = threading.RLock() def bfs(): queue = Queue() + dist = {} for u in U: if pair_U[u] is None: dist[u] = 0 @@ -1458,6 +1462,7 @@ def bfs(): else: dist[u] = float('inf') dist[None] = float('inf') + while queue: u = queue.popleft() if dist[u] < dist[None]: @@ -1470,36 +1475,77 @@ def bfs(): elif dist.get(alt, float('inf')) == float('inf'): dist[alt] = dist[u] + 1 queue.append(alt) - return dist.get(None, float('inf')) != float('inf') - def dfs(u): + return dist, dist.get(None, float('inf')) != float('inf') + + def dfs_worker(u, dist, local_pair_U, local_pair_V, thread_results): + if dfs(u, dist, local_pair_U, local_pair_V) and u in local_pair_U and local_pair_U[u] is not None: + thread_results.append((u, local_pair_U[u])) + return True + return False + + def dfs(u, dist, local_pair_U, local_pair_V): if u is None: return True + for v in graph.neighbors(u): - if v.name in pair_V: - alt = pair_V[v.name] + if v.name in local_pair_V: + alt = local_pair_V[v.name] if alt is None: - pair_V[v.name] = u - pair_U[u] = v.name + local_pair_V[v.name] = u + local_pair_U[u] = v.name return True elif dist.get(alt, float('inf')) == dist.get(u, float('inf')) + 1: - if dfs(alt): - pair_V[v.name] = u - pair_U[u] = v.name + if dfs(alt, dist, local_pair_U, local_pair_V): + local_pair_V[v.name] = u + local_pair_U[u] = v.name return True + dist[u] = float('inf') return False matching = set() - while bfs(): - unmatched_nodes = [u for u in U if pair_U[u] is None] + while True: + dist, has_path = bfs() + if not has_path: + break - with ThreadPoolExecutor(max_workers=num_threads) as Executor: - results = Executor.map(dfs, unmatched_nodes) + unmatched = [u for u in U if pair_U[u] is None] + if not unmatched: + break + + batch_size = max(1, len(unmatched) // num_threads) + batches = [unmatched[i:i+batch_size] for i in range(0, len(unmatched), batch_size)] + + for batch in batches: + all_results = [] + + with ThreadPoolExecutor(max_workers=num_threads) as executor: + futures = [] + for u in batch: + local_pair_U = dict(pair_U) + local_pair_V = dict(pair_V) + thread_results = [] - for u, success in zip(unmatched_nodes, results): - if success and pair_U[u] is not None: + futures.append(executor.submit( + dfs_worker, u, dist.copy(), local_pair_U, local_pair_V, thread_results + )) + + for future in futures: + future.result() + + with lock: + for u in batch: + if pair_U[u] is None: + result = dfs(u, dist.copy(), pair_U, pair_V) + if result and pair_U[u] is not None: + matching.add((u, pair_U[u])) + + with lock: + matching = set() + for u in U: + if pair_U[u] is not None: matching.add((u, pair_U[u])) return matching @@ -1548,7 +1594,7 @@ def maximum_matching_parallel(graph: Graph, algorithm: str, num_threads: int, ** >>> graph.add_bidirectional_edge('v_2', 'v_3') >>> graph.add_bidirectional_edge('v_4', 'v_1') >>> maximum_matching_parallel(graph, 'hopcroft_karp', 1, make_undirected=True) - >>> {('v_3', 'v_2'), ('v_1', 'v_4')} + >>> {('v_1', 'v_4'), ('v_3', 'v_2')} References ========== From 89b43aca0812e6c18d65a16b0c7dc5447c750409 Mon Sep 17 00:00:00 2001 From: Akshat-Shu Date: Wed, 19 Mar 2025 09:23:19 +0530 Subject: [PATCH 5/9] Modify docstring to work on examples --- pydatastructs/graphs/algorithms.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pydatastructs/graphs/algorithms.py b/pydatastructs/graphs/algorithms.py index 1b7d3743e..50e113df7 100644 --- a/pydatastructs/graphs/algorithms.py +++ b/pydatastructs/graphs/algorithms.py @@ -1262,8 +1262,8 @@ def bipartite_coloring(graph: Graph, **kwargs) -> Tuple[bool, Dict]: >>> graph.add_edge('v_1', 'v_2') >>> graph.add_edge('v_2', 'v_3') >>> graph.add_edge('v_4', 'v_1') - >>> bipartite_coloring(graph) - >>> (True, {'v_1': 0, 'v_2': 1, 'v_4': 1, 'v_3': 0}) + >>> bipartite_coloring(graph, make_undirected=True) + (True, {'v_1': 0, 'v_2': 1, 'v_4': 1, 'v_3': 0}) References ========== @@ -1409,7 +1409,7 @@ def maximum_matching(graph: Graph, algorithm: str, **kwargs) -> set: >>> graph.add_edge('v_2', 'v_3') >>> graph.add_edge('v_4', 'v_1') >>> maximum_matching(graph, 'hopcroft_karp', make_undirected=True) - >>> {('v_1', 'v_4'), ('v_3', 'v_2')} + {('v_1', 'v_4'), ('v_3', 'v_2')} References ========== @@ -1593,8 +1593,8 @@ def maximum_matching_parallel(graph: Graph, algorithm: str, num_threads: int, ** >>> graph.add_bidirectional_edge('v_1', 'v_2') >>> graph.add_bidirectional_edge('v_2', 'v_3') >>> graph.add_bidirectional_edge('v_4', 'v_1') - >>> maximum_matching_parallel(graph, 'hopcroft_karp', 1, make_undirected=True) - >>> {('v_1', 'v_4'), ('v_3', 'v_2')} + >>> maximum_matching_parallel(graph, 'hopcroft_karp', 1) + {('v_1', 'v_4'), ('v_3', 'v_2')} References ========== From 9c64741f32719962882664a7d488e6d2a26896b5 Mon Sep 17 00:00:00 2001 From: Akshat Shukla Date: Wed, 19 Mar 2025 14:34:52 +0530 Subject: [PATCH 6/9] fix documentation after resolving conflicts --- pydatastructs/graphs/algorithms.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pydatastructs/graphs/algorithms.py b/pydatastructs/graphs/algorithms.py index 8d1a408a9..eae5832bf 100644 --- a/pydatastructs/graphs/algorithms.py +++ b/pydatastructs/graphs/algorithms.py @@ -1697,6 +1697,9 @@ def find_bridges(graph): >>> graph.add_edge(v3.name, v4.name) >>> find_bridges(graph) [('0', '1'), ('1', '2'), ('2', '3'), ('3', '4')] + + References + ========== .. [1] https://en.wikipedia.org/wiki/Bridge_(graph_theory) """ From f673bfe01afcd4a83b1a6e2e0afb9ad177f62c6c Mon Sep 17 00:00:00 2001 From: Akshat Shukla Date: Wed, 19 Mar 2025 14:53:47 +0530 Subject: [PATCH 7/9] Fix docstring faliure by adding exact output --- pydatastructs/graphs/algorithms.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pydatastructs/graphs/algorithms.py b/pydatastructs/graphs/algorithms.py index eae5832bf..6e295839a 100644 --- a/pydatastructs/graphs/algorithms.py +++ b/pydatastructs/graphs/algorithms.py @@ -1460,7 +1460,7 @@ def maximum_matching(graph: Graph, algorithm: str, **kwargs) -> set: >>> graph.add_edge('v_2', 'v_3') >>> graph.add_edge('v_4', 'v_1') >>> maximum_matching(graph, 'hopcroft_karp', make_undirected=True) - {('v_1', 'v_4'), ('v_3', 'v_2')} + {('v_3', 'v_2'), ('v_1', 'v_4')} References ========== @@ -1645,7 +1645,8 @@ def maximum_matching_parallel(graph: Graph, algorithm: str, num_threads: int, ** >>> graph.add_bidirectional_edge('v_2', 'v_3') >>> graph.add_bidirectional_edge('v_4', 'v_1') >>> maximum_matching_parallel(graph, 'hopcroft_karp', 1) - {('v_1', 'v_4'), ('v_3', 'v_2')} + {('v_3', 'v_2'), ('v_1', 'v_4')} + References ========== From d1a8419e73f056891201b398f76a61c4ace44fc5 Mon Sep 17 00:00:00 2001 From: Akshat Shukla Date: Thu, 20 Mar 2025 21:28:03 +0530 Subject: [PATCH 8/9] Update AUTHORS --- AUTHORS | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS b/AUTHORS index 943bf804d..d2d186a32 100644 --- a/AUTHORS +++ b/AUTHORS @@ -11,3 +11,4 @@ Pratik Goyal Jay Thorat Rajveer Singh Bharadwaj Kishan Ved +Akshat Shukla From d782798ca6277e5dab88c08c3641baf7ac0f6c45 Mon Sep 17 00:00:00 2001 From: Akshat Shukla Date: Thu, 20 Mar 2025 21:53:19 +0530 Subject: [PATCH 9/9] Fix docstring faliure by making output fixed --- pydatastructs/graphs/algorithms.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pydatastructs/graphs/algorithms.py b/pydatastructs/graphs/algorithms.py index 6e295839a..480fae9a1 100644 --- a/pydatastructs/graphs/algorithms.py +++ b/pydatastructs/graphs/algorithms.py @@ -1459,8 +1459,8 @@ def maximum_matching(graph: Graph, algorithm: str, **kwargs) -> set: >>> graph.add_edge('v_1', 'v_2') >>> graph.add_edge('v_2', 'v_3') >>> graph.add_edge('v_4', 'v_1') - >>> maximum_matching(graph, 'hopcroft_karp', make_undirected=True) - {('v_3', 'v_2'), ('v_1', 'v_4')} + >>> sorted(maximum_matching(graph, 'hopcroft_karp', make_undirected=True)) + [('v_1', 'v_4'), ('v_3', 'v_2')] References ========== @@ -1644,8 +1644,8 @@ def maximum_matching_parallel(graph: Graph, algorithm: str, num_threads: int, ** >>> graph.add_bidirectional_edge('v_1', 'v_2') >>> graph.add_bidirectional_edge('v_2', 'v_3') >>> graph.add_bidirectional_edge('v_4', 'v_1') - >>> maximum_matching_parallel(graph, 'hopcroft_karp', 1) - {('v_3', 'v_2'), ('v_1', 'v_4')} + >>> sorted(maximum_matching_parallel(graph, 'hopcroft_karp', 1)) + [('v_1', 'v_4'), ('v_3', 'v_2')] References