Skip to content

Commit 5e7c43f

Browse files
authored
Enhance OSM getter script (#238)
* Init enhancing * Finish edge merging * Small fix * Enhance exception message * Avoid self-loops * Remove duplicate edges * Bugfix * Bugfix * Update version
1 parent 5ce7a4a commit 5e7c43f

File tree

4 files changed

+231
-16
lines changed

4 files changed

+231
-16
lines changed

requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,3 +9,4 @@ Pillow
99
seaborn
1010
tqdm
1111
opencv-python
12+
shapely

src/dsm/dsm.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77
static constexpr uint8_t DSM_VERSION_MAJOR = 2;
88
static constexpr uint8_t DSM_VERSION_MINOR = 2;
9-
static constexpr uint8_t DSM_VERSION_PATCH = 8;
9+
static constexpr uint8_t DSM_VERSION_PATCH = 9;
1010

1111
static auto const DSM_VERSION =
1212
std::format("{}.{}.{}", DSM_VERSION_MAJOR, DSM_VERSION_MINOR, DSM_VERSION_PATCH);

src/dsm/headers/Graph.cpp

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -305,8 +305,8 @@ namespace dsm {
305305
std::string id, lat, lon, highway;
306306
// osmid;x;y;highway
307307
std::getline(iss, id, ';');
308-
std::getline(iss, lat, ';');
309308
std::getline(iss, lon, ';');
309+
std::getline(iss, lat, ';');
310310
std::getline(iss, highway, ';');
311311
auto const nodeId{static_cast<Id>(std::stoul(id))};
312312
if (highway.find("traffic_signals") != std::string::npos) {
@@ -470,7 +470,10 @@ namespace dsm {
470470
void Graph::addStreet(std::unique_ptr<Street> street) {
471471
if (m_streets.contains(street->id())) {
472472
throw std::invalid_argument(
473-
buildLog(std::format("Street with id {} already exists.", street->id())));
473+
buildLog(std::format("Street with id {} from {} to {} already exists.",
474+
street->id(),
475+
street->nodePair().first,
476+
street->nodePair().second)));
474477
}
475478
// emplace nodes
476479
const auto srcId{street->nodePair().first};
@@ -488,7 +491,10 @@ namespace dsm {
488491
void Graph::addStreet(const Street& street) {
489492
if (m_streets.contains(street.id())) {
490493
throw std::invalid_argument(
491-
buildLog(std::format("Street with id {} already exists.", street.id())));
494+
buildLog(std::format("Street with id {} from {} to {} already exists.",
495+
street.id(),
496+
street.nodePair().first,
497+
street.nodePair().second)));
492498
}
493499
// emplace nodes
494500
const auto srcId{street.nodePair().first};

utils/get_osm_data.py

Lines changed: 220 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
The place is passed as a command line argument.
44
55
Example:
6-
python get_osm_data.py --place "postua, vercelli, italy"
6+
python get_osm_data.py --place "Bologna, Emilia-Romagna, Italy" --exclude-residential True
77
88
The output files are:
99
- nodes.csv
@@ -13,12 +13,16 @@
1313
"""
1414

1515
from argparse import ArgumentParser
16+
import logging
1617
import osmnx as ox
1718
import networkx as nx
1819
import matplotlib.patches as mpatches
1920
import matplotlib.pyplot as plt
21+
import pandas as pd
22+
from shapely.geometry import MultiLineString, LineString
23+
from shapely.ops import linemerge
2024

21-
RGBA_RED = (1, 0, 0, 0.1)
25+
RGBA_RED = (1, 0, 0, 0.3)
2226
RGBA_WHITE = (1, 1, 1, 1)
2327

2428
FLAGS_MOTORWAY = ["motorway", "motorway_link"]
@@ -36,8 +40,176 @@
3640
"residential",
3741
"living_street",
3842
"unclassified",
43+
"service",
44+
"pedestrian",
45+
"busway",
3946
]
4047

48+
49+
def merge_edges(
50+
graph: nx.DiGraph, previous_node: int, successive_node: int, node: int
51+
) -> dict:
52+
"""
53+
Merge two edges into a single edge.
54+
The function merges the edges into a single edge if the following conditions are met:
55+
- the name of the two edges is the same
56+
- the number of lanes is the same
57+
- the geometry of the two edges is contiguous
58+
- the coordinates of the previous_node and successive_node are in the geometry
59+
60+
Parameters:
61+
----------------
62+
graph (networkx.DiGraph): the graph
63+
previous_node (int): the previous node
64+
successive_node (int): the successive node
65+
node (int): the id of the node which will be removed
66+
67+
Returns:
68+
----------------
69+
dict: the new edge
70+
"""
71+
try:
72+
data_u = graph.get_edge_data(previous_node, node)[0]
73+
data_v = graph.get_edge_data(node, successive_node)[0]
74+
data_u.setdefault("lanes", 1)
75+
data_v.setdefault("lanes", 1)
76+
if (
77+
not (data_u["name"] in data_v["name"] or data_v["name"] in data_u["name"])
78+
or data_u["lanes"] != data_v["lanes"]
79+
):
80+
return None
81+
edge_uv = data_u.copy()
82+
# set length as the sum
83+
edge_uv["length"] = data_u["length"] + data_v["length"]
84+
edge_uv["lanes"] = int(data_u["lanes"])
85+
# merge also linestrings
86+
edge_uv["geometry"] = data_u["geometry"].union(data_v["geometry"])
87+
if isinstance(edge_uv["geometry"], MultiLineString):
88+
edge_uv["geometry"] = linemerge(edge_uv["geometry"])
89+
else:
90+
edge_uv["geometry"] = edge_uv["geometry"]
91+
if isinstance(edge_uv["geometry"], LineString):
92+
coords = list(edge_uv["geometry"].coords)
93+
else:
94+
# If it's still a MultiLineString,
95+
# handle it by iterating through its individual LineStrings
96+
coords = []
97+
for line in edge_uv["geometry"]:
98+
coords.extend(
99+
list(line.coords)
100+
) # Add coords from each individual LineString
101+
# take the list from coordinates of previous_node to coordinates of successive_node
102+
u_coords = (graph.nodes[previous_node]["x"], graph.nodes[previous_node]["y"])
103+
v_coords = (
104+
graph.nodes[successive_node]["x"],
105+
graph.nodes[successive_node]["y"],
106+
)
107+
if u_coords not in coords or v_coords not in coords:
108+
return None
109+
# cut coords from u_index to v_index
110+
edge_uv["geometry"] = LineString(
111+
coords[coords.index(u_coords) : coords.index(v_coords)]
112+
)
113+
except TypeError:
114+
# type error means that data_u or data_v cannot be created,
115+
# which means that the road is a one-way road
116+
# thus, skip the type error
117+
return None
118+
119+
return edge_uv
120+
121+
122+
def simplify_graph(graph_original: nx.DiGraph) -> nx.DiGraph:
123+
"""
124+
Simplify the graph by removing nodes that have only two neighborsand are actually the same
125+
street.
126+
The function merges the edges into a single edge.
127+
128+
Parameters:
129+
----------------
130+
graph_original (networkx.DiGraph): the graph to simplify
131+
132+
Returns:
133+
----------------
134+
networkx.DiGraph: the simplified graph
135+
"""
136+
graph = graph_original.copy()
137+
previous_nodes = 0
138+
while previous_nodes != len(graph.nodes):
139+
logging.info("New cycle: current_nodes=%d", len(graph.nodes))
140+
previous_nodes = len(graph.nodes)
141+
for node in graph.copy().nodes:
142+
# define neighborus as list of predecessors and successors
143+
neighbours = list(graph.predecessors(node)) + list(graph.successors(node))
144+
if (
145+
len(neighbours) != 2
146+
or graph.in_degree(node) != graph.out_degree(node)
147+
or graph.in_degree(node) > 2
148+
):
149+
continue
150+
u, v = neighbours
151+
if graph.has_edge(u, v):
152+
continue
153+
edge_uv = merge_edges(graph, u, v, node)
154+
edge_vu = merge_edges(graph, v, u, node)
155+
156+
if not (edge_uv is None and edge_vu is None):
157+
if edge_uv is not None:
158+
# print(f"Edges {u} -> {node} and {node} -> {v} can be merged.")
159+
graph.add_edge(
160+
u,
161+
v,
162+
length=edge_uv["length"],
163+
name=edge_uv["name"],
164+
geometry=edge_uv["geometry"],
165+
)
166+
# print(f"Added edge {graph.get_edge_data(u, v)}")
167+
if edge_vu is not None:
168+
# print(f"Edges {v} -> {node} and {node} -> {u} can be merged.")
169+
graph.add_edge(
170+
v,
171+
u,
172+
length=edge_vu["length"],
173+
name=edge_vu["name"],
174+
geometry=edge_vu["geometry"],
175+
)
176+
# print(f"Added edge {graph.get_edge_data(v, u)}")
177+
graph.remove_node(node)
178+
# print(f"Removed node {node}")
179+
180+
# Remove all nodes that are not in the giant component
181+
graph.remove_nodes_from(
182+
set(graph.nodes) - max(list(nx.weakly_connected_components(graph)), key=len)
183+
)
184+
# remove all self-loops
185+
graph.remove_edges_from(list(nx.selfloop_edges(graph)))
186+
# check if there are edges with same u and v. If true, keep only the one with the bigger lanes
187+
edges_to_remove = []
188+
seen_edges = {}
189+
190+
for u, v, data in graph.edges(data=True):
191+
lanes = data.get("lanes", 0)
192+
193+
if (u, v) not in seen_edges:
194+
seen_edges[(u, v)] = (lanes, None) # Store first edge and its lanes count
195+
else:
196+
existing_lanes, existing_edge = seen_edges[(u, v)]
197+
198+
if lanes > existing_lanes:
199+
edges_to_remove.append(
200+
existing_edge
201+
) # Remove the previous edge if the current one has more lanes
202+
seen_edges[(u, v)] = (lanes, (u, v)) # Update to keep current edge
203+
else:
204+
edges_to_remove.append(
205+
(u, v)
206+
) # Remove the current edge if it has fewer 'lanes'
207+
208+
graph.remove_edges_from(edges_to_remove)
209+
210+
return graph
211+
212+
41213
if __name__ == "__main__":
42214
parser = ArgumentParser("Script to get the OSM data of a place.")
43215
parser.add_argument(
@@ -58,15 +230,36 @@
58230
help="Exclude residential roads from the data",
59231
)
60232
parser = parser.parse_args()
233+
logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s")
234+
# set up colored logging
235+
logging.addLevelName(
236+
logging.INFO, f"\033[1;32m{logging.getLevelName(logging.INFO)}\033[1;0m"
237+
)
238+
logging.addLevelName(
239+
logging.WARNING, f"\033[1;33m{logging.getLevelName(logging.WARNING)}\033[1;0m"
240+
)
241+
logging.addLevelName(
242+
logging.ERROR, f"\033[1;31m{logging.getLevelName(logging.ERROR)}\033[1;0m"
243+
)
61244

62245
# get the street network for San Cesario sul Panaro
63246
G_ALL = ox.graph_from_place(parser.place, network_type="drive")
64-
print(f"Graph created with {len(G_ALL.nodes)} nodes and {len(G_ALL.edges)} edges.")
247+
logging.info(
248+
"Graph created with %d nodes and %d edges.", len(G_ALL.nodes), len(G_ALL.edges)
249+
)
65250

66251
gdf_nodes, gdf_edges = ox.graph_to_gdfs(G_ALL)
67252
gdf_edges["highway"] = gdf_edges["highway"].apply(
68253
lambda x: x[-1] if isinstance(x, list) else x
69254
)
255+
if "lanes" not in gdf_edges.columns:
256+
gdf_edges["lanes"] = 1
257+
gdf_edges["lanes"] = gdf_edges["lanes"].apply(
258+
lambda x: max(x) if isinstance(x, list) else 1 if pd.isna(x) else x
259+
)
260+
gdf_edges["name"] = gdf_edges["name"].apply(
261+
lambda x: " ".join(x) if isinstance(x, list) else " " if pd.isna(x) else x
262+
)
70263
# gdf_edges = gdf_edges[~gdf_edges["access"].isin(["no", "private"])]
71264

72265
# Make a plot to visualize the removed links
@@ -80,7 +273,19 @@
80273
# rebuild the graph
81274
G = ox.graph_from_gdfs(gdf_nodes, gdf_edges)
82275
G.remove_nodes_from(list(nx.isolates(G)))
83-
print(f"Graph filterded: now it has {len(G.nodes)} nodes and {len(G.edges)} edges.")
276+
logging.info(
277+
"Graph filtered: now it has %d nodes and %d edges.", len(G.nodes), len(G.edges)
278+
)
279+
G = simplify_graph(G)
280+
logging.info(
281+
"Graph simplified: now it has %d nodes and %d edges.",
282+
len(G.nodes),
283+
len(G.edges),
284+
)
285+
# assert that graph has not isolated nodes
286+
assert not list(nx.isolates(G))
287+
# assert that graph has not self-loops
288+
assert not list(nx.selfloop_edges(G))
84289

85290
fig, ax = ox.plot_graph(
86291
G_ALL,
@@ -94,23 +299,26 @@
94299
close=False,
95300
)
96301
ax.legend(handles=[removed_patch])
97-
plt.show()
302+
fig.set_size_inches(16, 9)
303+
plt.savefig("removed_nodes_and_edges.png")
98304

99-
gdf_nodes, gdf_edges = ox.graph_to_gdfs(G)
305+
# Plot resulting graph
306+
fig, ax = ox.plot_graph(G, show=False, close=False)
307+
fig.set_size_inches(16, 9)
308+
plt.savefig("final_graph.png")
100309

310+
gdf_nodes, gdf_edges = ox.graph_to_gdfs(G)
101311
# notice that osmnid is the index of the gdf_nodes DataFrame, so take it as a column
102312
gdf_nodes.reset_index(inplace=True)
103313
gdf_edges.reset_index(inplace=True)
314+
315+
# assert that there are no edges with the same u and v
316+
assert not gdf_edges.duplicated(subset=["u", "v"]).any()
104317
# Prepare node dataframe
105318
gdf_nodes = gdf_nodes[["osmid", "x", "y", "highway"]]
106319
# Prepare edge dataframe
107-
if "lanes" not in gdf_edges.columns:
108-
gdf_edges["lanes"] = None
109-
110-
gdf_edges["lanes"] = gdf_edges["lanes"].apply(
111-
lambda x: max(x) if isinstance(x, list) else x
112-
)
113320

321+
gdf_edges.to_csv("edges_ALL.csv", sep=";", index=False)
114322
gdf_edges = gdf_edges[
115323
["u", "v", "length", "oneway", "lanes", "highway", "maxspeed", "name"]
116324
]

0 commit comments

Comments
 (0)