29
29
#include " router2.h"
30
30
31
31
#include < algorithm>
32
+ #include < atomic>
32
33
#include < boost/container/flat_map.hpp>
33
34
#include < chrono>
34
35
#include < deque>
@@ -280,6 +281,9 @@ struct Router2
280
281
// Used to add existing routing to the heap
281
282
pool<WireId> in_wire_by_loc;
282
283
dict<std::pair<int , int >, pool<WireId>> wire_by_loc;
284
+
285
+ std::unique_ptr<ThreadContext> lhs;
286
+ std::unique_ptr<ThreadContext> rhs;
283
287
};
284
288
285
289
bool thread_test_wire (ThreadContext &t, PerWireData &w)
@@ -294,6 +298,122 @@ struct Router2
294
298
ARC_FATAL,
295
299
};
296
300
301
+ struct Partition
302
+ {
303
+ std::vector<int > queue;
304
+ std::unique_ptr<Partition> lhs;
305
+ std::unique_ptr<Partition> rhs;
306
+ BoundingBox bb;
307
+ uint64_t rngseed;
308
+
309
+ Partition (Context *ctx, std::vector<int > nets_to_partition, const std::vector<NetInfo*>& nets_by_udata, const std::vector<PerNetData> &nets, BoundingBox bb, int depth = 0 ) : bb{bb} {
310
+ // for (int i = 0; i < depth; i++)
311
+ // printf(" ");
312
+ // printf("%d: (%d, %d, %d, %d)\n", depth, bb.x0, bb.y0, bb.x1, bb.y1);
313
+ rngseed = ctx->rng64 ();
314
+ // Too small to partition?
315
+ if (nets_to_partition.size () <= 128 ) {
316
+ queue = std::move (nets_to_partition);
317
+ return ;
318
+ }
319
+ auto along_x = false ;
320
+ auto p = find_partition (ctx, nets_to_partition, nets_by_udata, nets, bb, along_x);
321
+ // No partition point found?
322
+ if (p.x == -1 || p.y == -1 ) {
323
+ queue = std::move (nets_to_partition);
324
+ return ;
325
+ }
326
+ auto crosses_p = [&](BoundingBox bb) {
327
+ return along_x ? ((p.x >= bb.x0 ) && (p.x <= bb.x1 )) : ((p.y >= bb.y0 ) && (p.y <= bb.y1 ));
328
+ };
329
+ auto left_of_p = [&](BoundingBox bb) {
330
+ return along_x ? ((p.x >= 0 ) && (p.x <= bb.x0 )) : ((p.y >= 0 ) && (p.y <= bb.y0 ));
331
+ };
332
+ auto lhs_queue = std::vector<int >{};
333
+ auto rhs_queue = std::vector<int >{};
334
+ for (auto net : nets_to_partition) {
335
+ auto net_bb = nets[net].bb ;
336
+ if (crosses_p (net_bb))
337
+ queue.push_back (net);
338
+ else if (left_of_p (net_bb))
339
+ lhs_queue.push_back (net);
340
+ else
341
+ rhs_queue.push_back (net);
342
+ }
343
+ if (along_x) {
344
+ lhs = std::make_unique<Partition>(ctx, std::move (lhs_queue), nets_by_udata, nets, BoundingBox{p.x + 1 , bb.y0 , bb.x1 , bb.y1 }, depth + 1 );
345
+ rhs = std::make_unique<Partition>(ctx, std::move (rhs_queue), nets_by_udata, nets, BoundingBox{bb.x0 , bb.y0 , p.x , bb.y1 }, depth + 1 );
346
+ } else {
347
+ lhs = std::make_unique<Partition>(ctx, std::move (lhs_queue), nets_by_udata, nets, BoundingBox{bb.x0 , p.y + 1 , bb.x1 , bb.y1 }, depth + 1 );
348
+ rhs = std::make_unique<Partition>(ctx, std::move (rhs_queue), nets_by_udata, nets, BoundingBox{bb.x0 , bb.y0 , bb.x1 , p.y }, depth + 1 );
349
+ }
350
+ };
351
+
352
+ Loc find_partition (Context *ctx, std::vector<int > nets_to_partition, const std::vector<NetInfo*>& nets_by_udata, const std::vector<PerNetData> &nets, BoundingBox bb, bool &along_x) {
353
+ auto total_before_x = std::vector<int >(ctx->getGridDimX () + 1 , 0 );
354
+ auto total_after_x = std::vector<int >(ctx->getGridDimX () + 1 , 0 );
355
+ auto total_on_x = std::vector<int >(ctx->getGridDimX () + 1 , 0 );
356
+ auto total_before_y = std::vector<int >(ctx->getGridDimY () + 1 , 0 );
357
+ auto total_after_y = std::vector<int >(ctx->getGridDimY () + 1 , 0 );
358
+ auto total_on_y = std::vector<int >(ctx->getGridDimY () + 1 , 0 );
359
+ for (auto net : nets_to_partition) {
360
+ if (nets[net].src_wire == WireId ())
361
+ continue ;
362
+ auto net_bb = nets[net].bb ;
363
+ auto fanout = nets[net].arcs .size ();
364
+ for (int x = net_bb.x1 ; x <= bb.x1 ; x++)
365
+ total_before_x.at (x) += fanout;
366
+ for (int x = bb.x0 ; x < net_bb.x0 ; x++)
367
+ total_after_x.at (x) += fanout;
368
+ for (int x = net_bb.x0 ; x < net_bb.x1 ; x++)
369
+ total_on_x.at (x) += fanout;
370
+ for (int y = net_bb.y1 ; y <= bb.y1 ; y++)
371
+ total_before_y.at (y) += fanout;
372
+ for (int y = bb.y0 ; y < net_bb.y0 ; y++)
373
+ total_after_y.at (y) += fanout;
374
+ for (int y = net_bb.y0 ; y < net_bb.y1 ; y++)
375
+ total_on_y.at (y) += fanout;
376
+ }
377
+ auto p = Loc (-1 , -1 , 0 );
378
+ auto best = std::numeric_limits<int >::max ();
379
+ for (int x = bb.x0 ; x <= bb.x1 ; x++) {
380
+ if (total_before_x.at (x) == 0 || total_after_x.at (x) == 0 )
381
+ continue ;
382
+ auto score = total_on_x.at (x) + std::max (total_before_x.at (x), total_after_x.at (x));
383
+ if (score < best) {
384
+ best = score;
385
+ p.x = x;
386
+ p.y = bb.y0 ;
387
+ along_x = true ;
388
+ }
389
+ }
390
+ for (int y = bb.y0 ; y <= bb.y1 ; y++) {
391
+ if (total_before_y.at (y) == 0 || total_after_y.at (y) == 0 )
392
+ continue ;
393
+ auto score = total_on_y.at (y) + std::max (total_before_y.at (y), total_after_y.at (y));
394
+ if (score < best) {
395
+ best = score;
396
+ p.x = bb.x0 ;
397
+ p.y = y;
398
+ along_x = false ;
399
+ }
400
+ }
401
+ return p;
402
+ }
403
+
404
+ std::unique_ptr<ThreadContext> setup_threads (const std::vector<NetInfo*>& nets_by_udata) {
405
+ auto tc = std::make_unique<ThreadContext>();
406
+ tc->bb = bb;
407
+ tc->rng .rngseed (rngseed);
408
+ for (auto net : queue)
409
+ tc->route_nets .push_back (nets_by_udata[net]);
410
+ queue.clear ();
411
+ if (lhs) tc->lhs = lhs->setup_threads (nets_by_udata);
412
+ if (rhs) tc->rhs = rhs->setup_threads (nets_by_udata);
413
+ return tc;
414
+ }
415
+ };
416
+
297
417
// Define to make sure we don't print in a multithreaded context
298
418
#define ARC_LOG_ERR (...) \
299
419
do { \
@@ -1180,171 +1300,77 @@ struct Router2
1180
1300
}
1181
1301
}
1182
1302
1183
- int mid_x = 0 , mid_y = 0 ;
1303
+ std::atomic_int thread_count ;
1184
1304
1185
- void partition_nets ( )
1305
+ void router_singlethread (ThreadContext &t )
1186
1306
{
1187
- // Create a histogram of positions in X and Y positions
1188
- std::map<int , int > cxs, cys;
1189
- for (auto &n : nets) {
1190
- if (n.cx != -1 )
1191
- ++cxs[n.cx ];
1192
- if (n.cy != -1 )
1193
- ++cys[n.cy ];
1194
- }
1195
- // 4-way split for now
1196
- int accum_x = 0 , accum_y = 0 ;
1197
- int halfway = int (nets.size ()) / 2 ;
1198
- for (auto &p : cxs) {
1199
- if (accum_x < halfway && (accum_x + p.second ) >= halfway)
1200
- mid_x = p.first ;
1201
- accum_x += p.second ;
1202
- }
1203
- for (auto &p : cys) {
1204
- if (accum_y < halfway && (accum_y + p.second ) >= halfway)
1205
- mid_y = p.first ;
1206
- accum_y += p.second ;
1207
- }
1208
- if (ctx->verbose ) {
1209
- log_info (" x splitpoint: %d\n " , mid_x);
1210
- log_info (" y splitpoint: %d\n " , mid_y);
1211
- }
1212
- std::vector<int > bins (5 , 0 );
1213
- for (auto &n : nets) {
1214
- if (n.bb .x0 < mid_x && n.bb .x1 < mid_x && n.bb .y0 < mid_y && n.bb .y1 < mid_y)
1215
- ++bins[0 ]; // TL
1216
- else if (n.bb .x0 >= mid_x && n.bb .x1 >= mid_x && n.bb .y0 < mid_y && n.bb .y1 < mid_y)
1217
- ++bins[1 ]; // TR
1218
- else if (n.bb .x0 < mid_x && n.bb .x1 < mid_x && n.bb .y0 >= mid_y && n.bb .y1 >= mid_y)
1219
- ++bins[2 ]; // BL
1220
- else if (n.bb .x0 >= mid_x && n.bb .x1 >= mid_x && n.bb .y0 >= mid_y && n.bb .y1 >= mid_y)
1221
- ++bins[3 ]; // BR
1222
- else
1223
- ++bins[4 ]; // cross-boundary
1307
+ if (t.lhs )
1308
+ router_singlethread (*t.lhs .get ());
1309
+ if (t.rhs )
1310
+ router_singlethread (*t.rhs .get ());
1311
+
1312
+ if (t.lhs )
1313
+ for (auto n : t.lhs ->failed_nets )
1314
+ t.route_nets .push_back (n);
1315
+ if (t.rhs )
1316
+ for (auto n : t.rhs ->failed_nets )
1317
+ t.route_nets .push_back (n);
1318
+
1319
+ for (auto n : t.route_nets ) {
1320
+ bool result = route_net (t, n, /* is_mt=*/ true );
1321
+ if (!result)
1322
+ t.failed_nets .push_back (n);
1224
1323
}
1225
- if (ctx->verbose )
1226
- for (int i = 0 ; i < 5 ; i++)
1227
- log_info (" bin %d N=%d\n " , i, bins[i]);
1228
1324
}
1229
1325
1230
- void router_thread (ThreadContext &t, bool is_mt )
1326
+ void router_multithread (ThreadContext &t)
1231
1327
{
1328
+ if (t.lhs && t.rhs ) {
1329
+ if (thread_count < cfg.thread_limit ) {
1330
+ thread_count++;
1331
+ boost::thread rhs ([this , &t]() { router_multithread (*t.rhs .get ()); });
1332
+ router_multithread (*t.lhs .get ());
1333
+ rhs.join ();
1334
+ thread_count--;
1335
+ } else {
1336
+ router_multithread (*t.lhs .get ());
1337
+ router_multithread (*t.rhs .get ());
1338
+ }
1339
+ } else if (t.lhs )
1340
+ router_multithread (*t.lhs .get ());
1341
+ else if (t.rhs )
1342
+ router_multithread (*t.rhs .get ());
1343
+
1344
+ if (t.lhs )
1345
+ for (auto n : t.lhs ->failed_nets )
1346
+ t.route_nets .push_back (n);
1347
+ if (t.rhs )
1348
+ for (auto n : t.rhs ->failed_nets )
1349
+ t.route_nets .push_back (n);
1350
+
1232
1351
for (auto n : t.route_nets ) {
1233
- bool result = route_net (t, n, is_mt);
1352
+ bool result = route_net (t, n, /* is_mt= */ true );
1234
1353
if (!result)
1235
1354
t.failed_nets .push_back (n);
1236
1355
}
1237
1356
}
1238
1357
1239
1358
void do_route ()
1240
1359
{
1241
- // Don't multithread if fewer than 200 nets (heuristic)
1242
- if (route_queue.size () < 200 ) {
1243
- ThreadContext st;
1244
- st.rng .rngseed (ctx->rng64 ());
1245
- st.bb = BoundingBox (0 , 0 , std::numeric_limits<int >::max (), std::numeric_limits<int >::max ());
1246
- for (size_t j = 0 ; j < route_queue.size (); j++) {
1247
- route_net (st, nets_by_udata[route_queue[j]], false );
1248
- }
1249
- return ;
1250
- }
1251
- const int Nq = 4 , Nv = 2 , Nh = 2 ;
1252
- const int N = Nq + Nv + Nh;
1253
- std::vector<ThreadContext> tcs (N + 1 );
1254
- for (auto &th : tcs) {
1255
- th.rng .rngseed (ctx->rng64 ());
1256
- }
1257
- int le_x = mid_x;
1258
- int rs_x = mid_x;
1259
- int le_y = mid_y;
1260
- int rs_y = mid_y;
1261
- // Set up thread bounding boxes
1262
- tcs.at (0 ).bb = BoundingBox (0 , 0 , mid_x, mid_y);
1263
- tcs.at (1 ).bb = BoundingBox (mid_x + 1 , 0 , std::numeric_limits<int >::max (), le_y);
1264
- tcs.at (2 ).bb = BoundingBox (0 , mid_y + 1 , mid_x, std::numeric_limits<int >::max ());
1265
- tcs.at (3 ).bb =
1266
- BoundingBox (mid_x + 1 , mid_y + 1 , std::numeric_limits<int >::max (), std::numeric_limits<int >::max ());
1267
-
1268
- tcs.at (4 ).bb = BoundingBox (0 , 0 , std::numeric_limits<int >::max (), mid_y);
1269
- tcs.at (5 ).bb = BoundingBox (0 , mid_y + 1 , std::numeric_limits<int >::max (), std::numeric_limits<int >::max ());
1270
-
1271
- tcs.at (6 ).bb = BoundingBox (0 , 0 , mid_x, std::numeric_limits<int >::max ());
1272
- tcs.at (7 ).bb = BoundingBox (mid_x + 1 , 0 , std::numeric_limits<int >::max (), std::numeric_limits<int >::max ());
1273
-
1274
- tcs.at (8 ).bb = BoundingBox (0 , 0 , std::numeric_limits<int >::max (), std::numeric_limits<int >::max ());
1275
-
1276
- for (auto n : route_queue) {
1277
- auto &nd = nets.at (n);
1278
- auto ni = nets_by_udata.at (n);
1279
- int bin = N;
1280
- // Quadrants
1281
- if (nd.bb .x0 < le_x && nd.bb .x1 < le_x && nd.bb .y0 < le_y && nd.bb .y1 < le_y)
1282
- bin = 0 ;
1283
- else if (nd.bb .x0 >= rs_x && nd.bb .x1 >= rs_x && nd.bb .y0 < le_y && nd.bb .y1 < le_y)
1284
- bin = 1 ;
1285
- else if (nd.bb .x0 < le_x && nd.bb .x1 < le_x && nd.bb .y0 >= rs_y && nd.bb .y1 >= rs_y)
1286
- bin = 2 ;
1287
- else if (nd.bb .x0 >= rs_x && nd.bb .x1 >= rs_x && nd.bb .y0 >= rs_y && nd.bb .y1 >= rs_y)
1288
- bin = 3 ;
1289
- // Vertical split
1290
- else if (nd.bb .y0 < le_y && nd.bb .y1 < le_y)
1291
- bin = Nq + 0 ;
1292
- else if (nd.bb .y0 >= rs_y && nd.bb .y1 >= rs_y)
1293
- bin = Nq + 1 ;
1294
- // Horizontal split
1295
- else if (nd.bb .x0 < le_x && nd.bb .x1 < le_x)
1296
- bin = Nq + Nv + 0 ;
1297
- else if (nd.bb .x0 >= rs_x && nd.bb .x1 >= rs_x)
1298
- bin = Nq + Nv + 1 ;
1299
- tcs.at (bin).route_nets .push_back (ni);
1300
- }
1301
- if (ctx->verbose )
1302
- log_info (" %d/%d nets not multi-threadable\n " , int (tcs.at (N).route_nets .size ()), int (route_queue.size ()));
1360
+ auto partition = Partition{ctx, route_queue, nets_by_udata, nets, BoundingBox (0 , 0 , ctx->getGridDimX (), ctx->getGridDimY ())};
1361
+
1362
+ auto tc = partition.setup_threads (nets_by_udata);
1363
+ thread_count = 1 ;
1303
1364
#ifdef NPNR_DISABLE_THREADS
1304
- // Singlethreaded routing - quadrants
1305
- for (int i = 0 ; i < Nq; i++) {
1306
- router_thread (tcs.at (i), /* is_mt=*/ false );
1307
- }
1308
- // Vertical splits
1309
- for (int i = Nq; i < Nq + Nv; i++) {
1310
- router_thread (tcs.at (i), /* is_mt=*/ false );
1311
- }
1312
- // Horizontal splits
1313
- for (int i = Nq + Nv; i < Nq + Nv + Nh; i++) {
1314
- router_thread (tcs.at (i), /* is_mt=*/ false );
1315
- }
1365
+ router_singlethread (*tc.get ());
1316
1366
#else
1317
- // Multithreaded part of routing - quadrants
1318
- std::vector<boost::thread> threads;
1319
- for (int i = 0 ; i < Nq; i++) {
1320
- threads.emplace_back ([this , &tcs, i]() { router_thread (tcs.at (i), /* is_mt=*/ true ); });
1321
- }
1322
- for (auto &t : threads)
1323
- t.join ();
1324
- threads.clear ();
1325
- // Vertical splits
1326
- for (int i = Nq; i < Nq + Nv; i++) {
1327
- threads.emplace_back ([this , &tcs, i]() { router_thread (tcs.at (i), /* is_mt=*/ true ); });
1328
- }
1329
- for (auto &t : threads)
1330
- t.join ();
1331
- threads.clear ();
1332
- // Horizontal splits
1333
- for (int i = Nq + Nv; i < Nq + Nv + Nh; i++) {
1334
- threads.emplace_back ([this , &tcs, i]() { router_thread (tcs.at (i), /* is_mt=*/ true ); });
1335
- }
1336
- for (auto &t : threads)
1337
- t.join ();
1338
- threads.clear ();
1367
+ router_multithread (*tc.get ());
1339
1368
#endif
1340
- // Singlethreaded part of routing - nets that cross partitions
1341
- // or don't fit within bounding box
1342
- for (auto st_net : tcs.at (N).route_nets )
1343
- route_net (tcs.at (N), st_net, false );
1344
- // Failed nets
1345
- for (int i = 0 ; i < N; i++)
1346
- for (auto fail : tcs.at (i).failed_nets )
1347
- route_net (tcs.at (N), fail, false );
1369
+ auto st = ThreadContext{};
1370
+ st.bb = BoundingBox (0 , 0 , std::numeric_limits<int >::max (), std::numeric_limits<int >::max ());
1371
+
1372
+ for (auto fail : tc->failed_nets )
1373
+ route_net (st, fail, false );
1348
1374
}
1349
1375
1350
1376
delay_t get_route_delay (int net, store_index<PortRef> usr_idx, int phys_idx)
@@ -1397,7 +1423,6 @@ struct Router2
1397
1423
setup_nets ();
1398
1424
setup_wires ();
1399
1425
find_all_reserved_wires ();
1400
- partition_nets ();
1401
1426
curr_cong_weight = cfg.init_curr_cong_weight ;
1402
1427
hist_cong_weight = cfg.hist_cong_weight ;
1403
1428
ThreadContext st;
@@ -1534,6 +1559,7 @@ Router2Cfg::Router2Cfg(Context *ctx)
1534
1559
heatmap = ctx->settings .at (ctx->id (" router2/heatmap" )).as_string ();
1535
1560
else
1536
1561
heatmap = " " ;
1562
+ thread_limit = ctx->setting <int >(" threads" , 4 );
1537
1563
}
1538
1564
1539
1565
NEXTPNR_NAMESPACE_END
0 commit comments