Skip to content

Commit 8da0b5a

Browse files
committed
router2: improved partitioner
1 parent 255633c commit 8da0b5a

File tree

2 files changed

+178
-149
lines changed

2 files changed

+178
-149
lines changed

common/route/router2.cc

Lines changed: 175 additions & 149 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
#include "router2.h"
3030

3131
#include <algorithm>
32+
#include <atomic>
3233
#include <boost/container/flat_map.hpp>
3334
#include <chrono>
3435
#include <deque>
@@ -280,6 +281,9 @@ struct Router2
280281
// Used to add existing routing to the heap
281282
pool<WireId> in_wire_by_loc;
282283
dict<std::pair<int, int>, pool<WireId>> wire_by_loc;
284+
285+
std::unique_ptr<ThreadContext> lhs;
286+
std::unique_ptr<ThreadContext> rhs;
283287
};
284288

285289
bool thread_test_wire(ThreadContext &t, PerWireData &w)
@@ -294,6 +298,122 @@ struct Router2
294298
ARC_FATAL,
295299
};
296300

301+
struct Partition
302+
{
303+
std::vector<int> queue;
304+
std::unique_ptr<Partition> lhs;
305+
std::unique_ptr<Partition> rhs;
306+
BoundingBox bb;
307+
uint64_t rngseed;
308+
309+
Partition(Context *ctx, std::vector<int> nets_to_partition, const std::vector<NetInfo*>& nets_by_udata, const std::vector<PerNetData> &nets, BoundingBox bb, int depth = 0) : bb{bb} {
310+
//for (int i = 0; i < depth; i++)
311+
// printf(" ");
312+
//printf("%d: (%d, %d, %d, %d)\n", depth, bb.x0, bb.y0, bb.x1, bb.y1);
313+
rngseed = ctx->rng64();
314+
// Too small to partition?
315+
if (nets_to_partition.size() <= 128) {
316+
queue = std::move(nets_to_partition);
317+
return;
318+
}
319+
auto along_x = false;
320+
auto p = find_partition(ctx, nets_to_partition, nets_by_udata, nets, bb, along_x);
321+
// No partition point found?
322+
if (p.x == -1 || p.y == -1) {
323+
queue = std::move(nets_to_partition);
324+
return;
325+
}
326+
auto crosses_p = [&](BoundingBox bb) {
327+
return along_x ? ((p.x >= bb.x0) && (p.x <= bb.x1)) : ((p.y >= bb.y0) && (p.y <= bb.y1));
328+
};
329+
auto left_of_p = [&](BoundingBox bb) {
330+
return along_x ? ((p.x >= 0) && (p.x <= bb.x0)) : ((p.y >= 0) && (p.y <= bb.y0));
331+
};
332+
auto lhs_queue = std::vector<int>{};
333+
auto rhs_queue = std::vector<int>{};
334+
for (auto net : nets_to_partition) {
335+
auto net_bb = nets[net].bb;
336+
if (crosses_p(net_bb))
337+
queue.push_back(net);
338+
else if (left_of_p(net_bb))
339+
lhs_queue.push_back(net);
340+
else
341+
rhs_queue.push_back(net);
342+
}
343+
if (along_x) {
344+
lhs = std::make_unique<Partition>(ctx, std::move(lhs_queue), nets_by_udata, nets, BoundingBox{p.x + 1, bb.y0, bb.x1, bb.y1}, depth + 1);
345+
rhs = std::make_unique<Partition>(ctx, std::move(rhs_queue), nets_by_udata, nets, BoundingBox{bb.x0, bb.y0, p.x, bb.y1}, depth + 1);
346+
} else {
347+
lhs = std::make_unique<Partition>(ctx, std::move(lhs_queue), nets_by_udata, nets, BoundingBox{bb.x0, p.y + 1, bb.x1, bb.y1}, depth + 1);
348+
rhs = std::make_unique<Partition>(ctx, std::move(rhs_queue), nets_by_udata, nets, BoundingBox{bb.x0, bb.y0, bb.x1, p.y}, depth + 1);
349+
}
350+
};
351+
352+
Loc find_partition(Context *ctx, std::vector<int> nets_to_partition, const std::vector<NetInfo*>& nets_by_udata, const std::vector<PerNetData> &nets, BoundingBox bb, bool &along_x) {
353+
auto total_before_x = std::vector<int>(ctx->getGridDimX() + 1, 0);
354+
auto total_after_x = std::vector<int>(ctx->getGridDimX() + 1, 0);
355+
auto total_on_x = std::vector<int>(ctx->getGridDimX() + 1, 0);
356+
auto total_before_y = std::vector<int>(ctx->getGridDimY() + 1, 0);
357+
auto total_after_y = std::vector<int>(ctx->getGridDimY() + 1, 0);
358+
auto total_on_y = std::vector<int>(ctx->getGridDimY() + 1, 0);
359+
for (auto net : nets_to_partition) {
360+
if (nets[net].src_wire == WireId())
361+
continue;
362+
auto net_bb = nets[net].bb;
363+
auto fanout = nets[net].arcs.size();
364+
for (int x = net_bb.x1; x <= bb.x1; x++)
365+
total_before_x.at(x) += fanout;
366+
for (int x = bb.x0; x < net_bb.x0; x++)
367+
total_after_x.at(x) += fanout;
368+
for (int x = net_bb.x0; x < net_bb.x1; x++)
369+
total_on_x.at(x) += fanout;
370+
for (int y = net_bb.y1; y <= bb.y1; y++)
371+
total_before_y.at(y) += fanout;
372+
for (int y = bb.y0; y < net_bb.y0; y++)
373+
total_after_y.at(y) += fanout;
374+
for (int y = net_bb.y0; y < net_bb.y1; y++)
375+
total_on_y.at(y) += fanout;
376+
}
377+
auto p = Loc(-1, -1, 0);
378+
auto best = std::numeric_limits<int>::max();
379+
for (int x = bb.x0; x <= bb.x1; x++) {
380+
if (total_before_x.at(x) == 0 || total_after_x.at(x) == 0)
381+
continue;
382+
auto score = total_on_x.at(x) + std::max(total_before_x.at(x), total_after_x.at(x));
383+
if (score < best) {
384+
best = score;
385+
p.x = x;
386+
p.y = bb.y0;
387+
along_x = true;
388+
}
389+
}
390+
for (int y = bb.y0; y <= bb.y1; y++) {
391+
if (total_before_y.at(y) == 0 || total_after_y.at(y) == 0)
392+
continue;
393+
auto score = total_on_y.at(y) + std::max(total_before_y.at(y), total_after_y.at(y));
394+
if (score < best) {
395+
best = score;
396+
p.x = bb.x0;
397+
p.y = y;
398+
along_x = false;
399+
}
400+
}
401+
return p;
402+
}
403+
404+
std::unique_ptr<ThreadContext> setup_threads(const std::vector<NetInfo*>& nets_by_udata) {
405+
auto tc = std::make_unique<ThreadContext>();
406+
tc->bb = bb;
407+
tc->rng.rngseed(rngseed);
408+
for (auto net : queue)
409+
tc->route_nets.push_back(nets_by_udata[net]);
410+
queue.clear();
411+
if (lhs) tc->lhs = lhs->setup_threads(nets_by_udata);
412+
if (rhs) tc->rhs = rhs->setup_threads(nets_by_udata);
413+
return tc;
414+
}
415+
};
416+
297417
// Define to make sure we don't print in a multithreaded context
298418
#define ARC_LOG_ERR(...) \
299419
do { \
@@ -1180,171 +1300,77 @@ struct Router2
11801300
}
11811301
}
11821302

1183-
int mid_x = 0, mid_y = 0;
1303+
std::atomic_int thread_count;
11841304

1185-
void partition_nets()
1305+
void router_singlethread(ThreadContext &t)
11861306
{
1187-
// Create a histogram of positions in X and Y positions
1188-
std::map<int, int> cxs, cys;
1189-
for (auto &n : nets) {
1190-
if (n.cx != -1)
1191-
++cxs[n.cx];
1192-
if (n.cy != -1)
1193-
++cys[n.cy];
1194-
}
1195-
// 4-way split for now
1196-
int accum_x = 0, accum_y = 0;
1197-
int halfway = int(nets.size()) / 2;
1198-
for (auto &p : cxs) {
1199-
if (accum_x < halfway && (accum_x + p.second) >= halfway)
1200-
mid_x = p.first;
1201-
accum_x += p.second;
1202-
}
1203-
for (auto &p : cys) {
1204-
if (accum_y < halfway && (accum_y + p.second) >= halfway)
1205-
mid_y = p.first;
1206-
accum_y += p.second;
1207-
}
1208-
if (ctx->verbose) {
1209-
log_info(" x splitpoint: %d\n", mid_x);
1210-
log_info(" y splitpoint: %d\n", mid_y);
1211-
}
1212-
std::vector<int> bins(5, 0);
1213-
for (auto &n : nets) {
1214-
if (n.bb.x0 < mid_x && n.bb.x1 < mid_x && n.bb.y0 < mid_y && n.bb.y1 < mid_y)
1215-
++bins[0]; // TL
1216-
else if (n.bb.x0 >= mid_x && n.bb.x1 >= mid_x && n.bb.y0 < mid_y && n.bb.y1 < mid_y)
1217-
++bins[1]; // TR
1218-
else if (n.bb.x0 < mid_x && n.bb.x1 < mid_x && n.bb.y0 >= mid_y && n.bb.y1 >= mid_y)
1219-
++bins[2]; // BL
1220-
else if (n.bb.x0 >= mid_x && n.bb.x1 >= mid_x && n.bb.y0 >= mid_y && n.bb.y1 >= mid_y)
1221-
++bins[3]; // BR
1222-
else
1223-
++bins[4]; // cross-boundary
1307+
if (t.lhs)
1308+
router_singlethread(*t.lhs.get());
1309+
if (t.rhs)
1310+
router_singlethread(*t.rhs.get());
1311+
1312+
if (t.lhs)
1313+
for (auto n : t.lhs->failed_nets)
1314+
t.route_nets.push_back(n);
1315+
if (t.rhs)
1316+
for (auto n : t.rhs->failed_nets)
1317+
t.route_nets.push_back(n);
1318+
1319+
for (auto n : t.route_nets) {
1320+
bool result = route_net(t, n, /*is_mt=*/true);
1321+
if (!result)
1322+
t.failed_nets.push_back(n);
12241323
}
1225-
if (ctx->verbose)
1226-
for (int i = 0; i < 5; i++)
1227-
log_info(" bin %d N=%d\n", i, bins[i]);
12281324
}
12291325

1230-
void router_thread(ThreadContext &t, bool is_mt)
1326+
void router_multithread(ThreadContext &t)
12311327
{
1328+
if (t.lhs && t.rhs) {
1329+
if (thread_count < cfg.thread_limit) {
1330+
thread_count++;
1331+
boost::thread rhs([this, &t]() { router_multithread(*t.rhs.get()); });
1332+
router_multithread(*t.lhs.get());
1333+
rhs.join();
1334+
thread_count--;
1335+
} else {
1336+
router_multithread(*t.lhs.get());
1337+
router_multithread(*t.rhs.get());
1338+
}
1339+
} else if (t.lhs)
1340+
router_multithread(*t.lhs.get());
1341+
else if (t.rhs)
1342+
router_multithread(*t.rhs.get());
1343+
1344+
if (t.lhs)
1345+
for (auto n : t.lhs->failed_nets)
1346+
t.route_nets.push_back(n);
1347+
if (t.rhs)
1348+
for (auto n : t.rhs->failed_nets)
1349+
t.route_nets.push_back(n);
1350+
12321351
for (auto n : t.route_nets) {
1233-
bool result = route_net(t, n, is_mt);
1352+
bool result = route_net(t, n, /*is_mt=*/true);
12341353
if (!result)
12351354
t.failed_nets.push_back(n);
12361355
}
12371356
}
12381357

12391358
void do_route()
12401359
{
1241-
// Don't multithread if fewer than 200 nets (heuristic)
1242-
if (route_queue.size() < 200) {
1243-
ThreadContext st;
1244-
st.rng.rngseed(ctx->rng64());
1245-
st.bb = BoundingBox(0, 0, std::numeric_limits<int>::max(), std::numeric_limits<int>::max());
1246-
for (size_t j = 0; j < route_queue.size(); j++) {
1247-
route_net(st, nets_by_udata[route_queue[j]], false);
1248-
}
1249-
return;
1250-
}
1251-
const int Nq = 4, Nv = 2, Nh = 2;
1252-
const int N = Nq + Nv + Nh;
1253-
std::vector<ThreadContext> tcs(N + 1);
1254-
for (auto &th : tcs) {
1255-
th.rng.rngseed(ctx->rng64());
1256-
}
1257-
int le_x = mid_x;
1258-
int rs_x = mid_x;
1259-
int le_y = mid_y;
1260-
int rs_y = mid_y;
1261-
// Set up thread bounding boxes
1262-
tcs.at(0).bb = BoundingBox(0, 0, mid_x, mid_y);
1263-
tcs.at(1).bb = BoundingBox(mid_x + 1, 0, std::numeric_limits<int>::max(), le_y);
1264-
tcs.at(2).bb = BoundingBox(0, mid_y + 1, mid_x, std::numeric_limits<int>::max());
1265-
tcs.at(3).bb =
1266-
BoundingBox(mid_x + 1, mid_y + 1, std::numeric_limits<int>::max(), std::numeric_limits<int>::max());
1267-
1268-
tcs.at(4).bb = BoundingBox(0, 0, std::numeric_limits<int>::max(), mid_y);
1269-
tcs.at(5).bb = BoundingBox(0, mid_y + 1, std::numeric_limits<int>::max(), std::numeric_limits<int>::max());
1270-
1271-
tcs.at(6).bb = BoundingBox(0, 0, mid_x, std::numeric_limits<int>::max());
1272-
tcs.at(7).bb = BoundingBox(mid_x + 1, 0, std::numeric_limits<int>::max(), std::numeric_limits<int>::max());
1273-
1274-
tcs.at(8).bb = BoundingBox(0, 0, std::numeric_limits<int>::max(), std::numeric_limits<int>::max());
1275-
1276-
for (auto n : route_queue) {
1277-
auto &nd = nets.at(n);
1278-
auto ni = nets_by_udata.at(n);
1279-
int bin = N;
1280-
// Quadrants
1281-
if (nd.bb.x0 < le_x && nd.bb.x1 < le_x && nd.bb.y0 < le_y && nd.bb.y1 < le_y)
1282-
bin = 0;
1283-
else if (nd.bb.x0 >= rs_x && nd.bb.x1 >= rs_x && nd.bb.y0 < le_y && nd.bb.y1 < le_y)
1284-
bin = 1;
1285-
else if (nd.bb.x0 < le_x && nd.bb.x1 < le_x && nd.bb.y0 >= rs_y && nd.bb.y1 >= rs_y)
1286-
bin = 2;
1287-
else if (nd.bb.x0 >= rs_x && nd.bb.x1 >= rs_x && nd.bb.y0 >= rs_y && nd.bb.y1 >= rs_y)
1288-
bin = 3;
1289-
// Vertical split
1290-
else if (nd.bb.y0 < le_y && nd.bb.y1 < le_y)
1291-
bin = Nq + 0;
1292-
else if (nd.bb.y0 >= rs_y && nd.bb.y1 >= rs_y)
1293-
bin = Nq + 1;
1294-
// Horizontal split
1295-
else if (nd.bb.x0 < le_x && nd.bb.x1 < le_x)
1296-
bin = Nq + Nv + 0;
1297-
else if (nd.bb.x0 >= rs_x && nd.bb.x1 >= rs_x)
1298-
bin = Nq + Nv + 1;
1299-
tcs.at(bin).route_nets.push_back(ni);
1300-
}
1301-
if (ctx->verbose)
1302-
log_info("%d/%d nets not multi-threadable\n", int(tcs.at(N).route_nets.size()), int(route_queue.size()));
1360+
auto partition = Partition{ctx, route_queue, nets_by_udata, nets, BoundingBox(0, 0, ctx->getGridDimX(), ctx->getGridDimY())};
1361+
1362+
auto tc = partition.setup_threads(nets_by_udata);
1363+
thread_count = 1;
13031364
#ifdef NPNR_DISABLE_THREADS
1304-
// Singlethreaded routing - quadrants
1305-
for (int i = 0; i < Nq; i++) {
1306-
router_thread(tcs.at(i), /*is_mt=*/false);
1307-
}
1308-
// Vertical splits
1309-
for (int i = Nq; i < Nq + Nv; i++) {
1310-
router_thread(tcs.at(i), /*is_mt=*/false);
1311-
}
1312-
// Horizontal splits
1313-
for (int i = Nq + Nv; i < Nq + Nv + Nh; i++) {
1314-
router_thread(tcs.at(i), /*is_mt=*/false);
1315-
}
1365+
router_singlethread(*tc.get());
13161366
#else
1317-
// Multithreaded part of routing - quadrants
1318-
std::vector<boost::thread> threads;
1319-
for (int i = 0; i < Nq; i++) {
1320-
threads.emplace_back([this, &tcs, i]() { router_thread(tcs.at(i), /*is_mt=*/true); });
1321-
}
1322-
for (auto &t : threads)
1323-
t.join();
1324-
threads.clear();
1325-
// Vertical splits
1326-
for (int i = Nq; i < Nq + Nv; i++) {
1327-
threads.emplace_back([this, &tcs, i]() { router_thread(tcs.at(i), /*is_mt=*/true); });
1328-
}
1329-
for (auto &t : threads)
1330-
t.join();
1331-
threads.clear();
1332-
// Horizontal splits
1333-
for (int i = Nq + Nv; i < Nq + Nv + Nh; i++) {
1334-
threads.emplace_back([this, &tcs, i]() { router_thread(tcs.at(i), /*is_mt=*/true); });
1335-
}
1336-
for (auto &t : threads)
1337-
t.join();
1338-
threads.clear();
1367+
router_multithread(*tc.get());
13391368
#endif
1340-
// Singlethreaded part of routing - nets that cross partitions
1341-
// or don't fit within bounding box
1342-
for (auto st_net : tcs.at(N).route_nets)
1343-
route_net(tcs.at(N), st_net, false);
1344-
// Failed nets
1345-
for (int i = 0; i < N; i++)
1346-
for (auto fail : tcs.at(i).failed_nets)
1347-
route_net(tcs.at(N), fail, false);
1369+
auto st = ThreadContext{};
1370+
st.bb = BoundingBox(0, 0, std::numeric_limits<int>::max(), std::numeric_limits<int>::max());
1371+
1372+
for (auto fail : tc->failed_nets)
1373+
route_net(st, fail, false);
13481374
}
13491375

13501376
delay_t get_route_delay(int net, store_index<PortRef> usr_idx, int phys_idx)
@@ -1397,7 +1423,6 @@ struct Router2
13971423
setup_nets();
13981424
setup_wires();
13991425
find_all_reserved_wires();
1400-
partition_nets();
14011426
curr_cong_weight = cfg.init_curr_cong_weight;
14021427
hist_cong_weight = cfg.hist_cong_weight;
14031428
ThreadContext st;
@@ -1534,6 +1559,7 @@ Router2Cfg::Router2Cfg(Context *ctx)
15341559
heatmap = ctx->settings.at(ctx->id("router2/heatmap")).as_string();
15351560
else
15361561
heatmap = "";
1562+
thread_limit = ctx->setting<int>("threads", 4);
15371563
}
15381564

15391565
NEXTPNR_NAMESPACE_END

common/route/router2.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,9 @@ struct Router2Cfg
5757
// Print additional performance profiling information
5858
bool perf_profile = false;
5959

60+
// Number of threads to utilise while routing
61+
int thread_limit;
62+
6063
std::string heatmap;
6164
std::function<float(Context *ctx, WireId wire, PipId pip, float crit_weight)> get_base_cost = default_base_cost;
6265
};

0 commit comments

Comments
 (0)