Skip to content

Commit 25c2ca4

Browse files
committed
Cleanup attachment to scheduler
If a scheduler request is received and we are not already attached to the scheduler, then use PMIx_tool_attach_to_server to setup the connection. Note that the function will simply return if we are already attached, but PRRTE doesn't know about it yet. Signed-off-by: Ralph Castain <[email protected]>
1 parent 6c5318a commit 25c2ca4

File tree

3 files changed

+30
-4
lines changed

3 files changed

+30
-4
lines changed

src/prted/pmix/pmix_server_gen.c

Lines changed: 26 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -525,6 +525,9 @@ static void _toolconn(int sd, short args, void *cbdata)
525525
pmix_data_buffer_t *buf;
526526
prte_plm_cmd_flag_t command = PRTE_PLM_ALLOC_JOBID_CMD;
527527
pmix_status_t xrc;
528+
bool primary = false;
529+
bool nspace_given = false;
530+
bool rank_given = false;
528531
PRTE_HIDE_UNUSED_PARAMS(sd, args);
529532

530533
PMIX_ACQUIRE_OBJECT(cd);
@@ -560,8 +563,10 @@ static void _toolconn(int sd, short args, void *cbdata)
560563
}
561564
} else if (PMIX_CHECK_KEY(&cd->info[n], PMIX_NSPACE)) {
562565
PMIX_LOAD_NSPACE(cd->target.nspace, cd->info[n].value.data.string);
566+
nspace_given = true;
563567
} else if (PMIX_CHECK_KEY(&cd->info[n], PMIX_RANK)) {
564568
cd->target.rank = cd->info[n].value.data.rank;
569+
rank_given = true;
565570
} else if (PMIX_CHECK_KEY(&cd->info[n], PMIX_HOSTNAME)) {
566571
cd->operation = strdup(cd->info[n].value.data.string);
567572
} else if (PMIX_CHECK_KEY(&cd->info[n], PMIX_CMD_LINE)) {
@@ -570,6 +575,8 @@ static void _toolconn(int sd, short args, void *cbdata)
570575
cd->launcher = PMIX_INFO_TRUE(&cd->info[n]);
571576
} else if (PMIX_CHECK_KEY(&cd->info[n], PMIX_SERVER_SCHEDULER)) {
572577
cd->scheduler = PMIX_INFO_TRUE(&cd->info[n]);
578+
} else if (PMIX_CHECK_KEY(&cd->info[n], PMIX_PRIMARY_SERVER)) {
579+
primary = PMIX_INFO_TRUE(&cd->info[n]);
573580
} else if (PMIX_CHECK_KEY(&cd->info[n], PMIX_PROC_PID)) {
574581
PMIX_VALUE_GET_NUMBER(xrc, &cd->info[n].value, cd->pid, pid_t);
575582
if (PMIX_SUCCESS != xrc) {
@@ -599,11 +606,25 @@ static void _toolconn(int sd, short args, void *cbdata)
599606
} else {
600607
/* mark that the scheduler has attached to us */
601608
prte_pmix_server_globals.scheduler_connected = true;
609+
// the scheduler always self-assigns its ID
610+
if (!nspace_given || !rank_given) {
611+
cd->toolcbfunc(PMIX_ERR_NOT_SUPPORTED, NULL, cd->cbdata);
612+
PMIX_RELEASE(cd);
613+
return;
614+
}
602615
PMIX_LOAD_PROCID(&prte_pmix_server_globals.scheduler,
603616
cd->target.nspace, cd->target.rank);
604-
/* we cannot immediately set the scheduler to be our
605-
* PMIx server as the PMIx library hasn't finished
606-
* recording it */
617+
rc = PMIX_SUCCESS;
618+
619+
if (!primary) {
620+
/* we cannot immediately set the scheduler to be our
621+
* PMIx server as the PMIx library hasn't finished
622+
* recording it */
623+
goto complete;
624+
}
625+
// it has been recorded in the library, so record it here
626+
prte_pmix_server_globals.scheduler_set_as_server = true;
627+
goto complete;
607628
}
608629
}
609630

@@ -652,6 +673,8 @@ static void _toolconn(int sd, short args, void *cbdata)
652673
if (PMIX_SUCCESS != rc) {
653674
rc = prte_pmix_convert_rc(rc);
654675
}
676+
677+
complete:
655678
if (NULL != cd->toolcbfunc) {
656679
cd->toolcbfunc(rc, &cd->target, cd->cbdata);
657680
}

src/tools/psched/psched.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,8 @@ typedef struct {
129129
// allocation request info
130130
pmix_proc_t requestor;
131131
pmix_alloc_directive_t directive;
132+
// whether the data is a local copy
133+
bool copy;
132134
// original info keys
133135
pmix_info_t *data;
134136
size_t ndata;

src/tools/psched/state.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -315,6 +315,7 @@ PMIX_CLASS_INSTANCE(psched_state_t,
315315
static void req_con(psched_req_t *p)
316316
{
317317
PMIx_Load_procid(&p->requestor, NULL, PMIX_RANK_INVALID);
318+
p->copy = false; // data is not a local copy
318319
p->data = NULL;
319320
p->ndata = 0;
320321
p->user_refid = NULL;
@@ -341,7 +342,7 @@ static void req_con(psched_req_t *p)
341342
}
342343
static void req_des(psched_req_t *p)
343344
{
344-
if (NULL != p->data) {
345+
if (NULL != p->data && p->copy) {
345346
PMIx_Info_free(p->data, p->ndata);
346347
}
347348
if (NULL != p->user_refid) {

0 commit comments

Comments
 (0)