Skip to content

Commit a21fa43

Browse files
authored
Merge pull request openucx#8348 from dmitrygx/topic/uct/dc_fc_hard_req
UCT/IB/DC: Resend FC_HARD_REQ instead of scheduling EP on waitq
2 parents a6537b9 + f0b8ed7 commit a21fa43

File tree

3 files changed

+14
-21
lines changed

3 files changed

+14
-21
lines changed

src/uct/ib/dc/dc_mlx5.inl

+3-4
Original file line numberDiff line numberDiff line change
@@ -41,16 +41,15 @@ uct_dc_mlx5_get_arbiter_params(uct_dc_mlx5_iface_t *iface, uct_dc_mlx5_ep_t *ep,
4141
}
4242

4343
static UCS_F_ALWAYS_INLINE void
44-
uct_dc_mlx5_ep_schedule(uct_dc_mlx5_iface_t *iface, uct_dc_mlx5_ep_t *ep,
45-
int force)
44+
uct_dc_mlx5_ep_schedule(uct_dc_mlx5_iface_t *iface, uct_dc_mlx5_ep_t *ep)
4645
{
4746
if (ep->dci == UCT_DC_MLX5_EP_NO_DCI) {
4847
/* no dci:
4948
* Do not grab dci here. Instead put the group on dci allocation
5049
* arbiter. This way we can assure fairness between all eps waiting for
5150
* dci allocation. Relevant for dcs and dcs_quota policies.
5251
*/
53-
uct_dc_mlx5_iface_schedule_dci_alloc(iface, ep, force);
52+
uct_dc_mlx5_iface_schedule_dci_alloc(iface, ep);
5453
} else {
5554
uct_dc_mlx5_iface_dci_sched_tx(iface, ep);
5655
}
@@ -84,5 +83,5 @@ uct_dc_mlx5_ep_pending_common(uct_dc_mlx5_iface_t *iface, uct_dc_mlx5_ep_t *ep,
8483
return;
8584
}
8685

87-
uct_dc_mlx5_ep_schedule(iface, ep, 0);
86+
uct_dc_mlx5_ep_schedule(iface, ep);
8887
}

src/uct/ib/dc/dc_mlx5_ep.c

+7-12
Original file line numberDiff line numberDiff line change
@@ -1572,17 +1572,12 @@ static unsigned uct_dc_mlx5_ep_fc_hard_req_progress(void *arg)
15721572
* resend FC_HARD_REQ packet to make sure a peer will resend FC_PURE_GRANT
15731573
* packet in case of failure on the remote FC endpoint */
15741574
kh_foreach_key(&iface->tx.fc_hash, ep_key, {
1575-
ep = (uct_dc_mlx5_ep_t*)ep_key;
1576-
1577-
/* Allocate DCI for the endpoint to schedule the endpoint to DCI wait
1578-
* queue if there is free DCI */
1579-
status = uct_dc_mlx5_iface_dci_get(iface, ep);
1580-
ucs_assertv((status == UCS_OK) || (status == UCS_ERR_NO_RESOURCE),
1581-
"%s", ucs_status_string(status));
1582-
1583-
/* Force DCI scheduling, since FC resources may never become available
1584-
* unless we send FC_HARD_REQ packet */
1585-
uct_dc_mlx5_ep_schedule(iface, ep, 1);
1575+
ep = (uct_dc_mlx5_ep_t*)ep_key;
1576+
status = uct_dc_mlx5_ep_check_fc(iface, ep);
1577+
if ((status == UCS_OK) || (status == UCS_ERR_NO_RESOURCE)) {
1578+
ucs_warn("ep %p: flow-control check failed: %s", ep,
1579+
ucs_status_string(status));
1580+
}
15861581
})
15871582

15881583
return 1;
@@ -1702,7 +1697,7 @@ void uct_dc_mlx5_ep_handle_failure(uct_dc_mlx5_ep_t *ep,
17021697
/* Since DCI isn't assigned for the FC endpoint, schedule DCI
17031698
* allocation for progressing possible FC_PURE_GRANT re-sending
17041699
* operation which are scheduled on the pending queue */
1705-
uct_dc_mlx5_iface_schedule_dci_alloc(iface, ep, 0);
1700+
uct_dc_mlx5_iface_schedule_dci_alloc(iface, ep);
17061701
}
17071702
}
17081703

src/uct/ib/dc/dc_mlx5_ep.h

+4-5
Original file line numberDiff line numberDiff line change
@@ -385,13 +385,12 @@ static inline int uct_dc_mlx5_iface_dci_ep_can_send(uct_dc_mlx5_ep_t *ep)
385385

386386
static UCS_F_ALWAYS_INLINE
387387
void uct_dc_mlx5_iface_schedule_dci_alloc(uct_dc_mlx5_iface_t *iface,
388-
uct_dc_mlx5_ep_t *ep, int force)
388+
uct_dc_mlx5_ep_t *ep)
389389
{
390390
ucs_arbiter_t *waitq;
391391

392-
/* If FC window is empty and force scheduling wasn't requested, the group
393-
* will be scheduled when grant is received */
394-
if (force || uct_rc_fc_has_resources(&iface->super.super, &ep->fc)) {
392+
/* If FC window is empty the group will be scheduled when grant is received */
393+
if (uct_rc_fc_has_resources(&iface->super.super, &ep->fc)) {
395394
waitq = uct_dc_mlx5_iface_dci_waitq(iface, uct_dc_mlx5_ep_pool_index(ep));
396395
ucs_arbiter_group_schedule(waitq, &ep->arb_group);
397396
}
@@ -479,7 +478,7 @@ uct_dc_mlx5_iface_dci_put(uct_dc_mlx5_iface_t *iface, uint8_t dci_index)
479478
* move the group to the 'wait for dci alloc' state
480479
*/
481480
ucs_arbiter_group_desched(uct_dc_mlx5_iface_tx_waitq(iface), &ep->arb_group);
482-
uct_dc_mlx5_iface_schedule_dci_alloc(iface, ep, 0);
481+
uct_dc_mlx5_iface_schedule_dci_alloc(iface, ep);
483482
}
484483

485484
static inline void uct_dc_mlx5_iface_dci_alloc(uct_dc_mlx5_iface_t *iface, uct_dc_mlx5_ep_t *ep)

0 commit comments

Comments
 (0)