Skip to content

Commit 8692cfb

Browse files
committed
Add ability to direct app GPU support
There apparently are some circumstances when an application can benefit from disabling the internal GPU support in one or more of its libraries. Let's assume that a library might also provide a mechanism by which that support can be defaulted to enabled or disabled. Add CLI support for specifying that GPU support be enabled or disabled. We assume that: (a) this is something that a tool might want to enquire about to see what an app was told to do (b) a user might want/expect this to be a directive inherited by any spawned child jobs Also note that there was a lot of code duplication between prte and prun_common when it came to parsing the cmd line for job-level directives. Collect those in a common function as we see that some divergence had already occurred. Signed-off-by: Ralph Castain <[email protected]>
1 parent e671752 commit 8692cfb

File tree

12 files changed

+222
-274
lines changed

12 files changed

+222
-274
lines changed

Diff for: examples/client2.c

+11-1
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
* Copyright (c) 2011 Oak Ridge National Labs. All rights reserved.
1616
* Copyright (c) 2013-2019 Intel, Inc. All rights reserved.
1717
* Copyright (c) 2015 Mellanox Technologies, Inc. All rights reserved.
18-
* Copyright (c) 2021 Nanook Consulting. All rights reserved.
18+
* Copyright (c) 2021-2025 Nanook Consulting All rights reserved.
1919
* $COPYRIGHT$
2020
*
2121
* Additional copyrights may follow
@@ -128,6 +128,16 @@ int main(int argc, char **argv)
128128
PMIX_VALUE_RELEASE(val);
129129
fprintf(stderr, "Client %s:%d job size %d\n", myproc.nspace, myproc.rank, nprocs);
130130

131+
#ifdef PMIX_GPU_SUPPORT
132+
/* see if we were given a GPU directive */
133+
rc = PMIx_Get(&proc, PMIX_GPU_SUPPORT, NULL, 0, &val);
134+
if (PMIX_SUCCESS == rc) {
135+
fprintf(stderr, "%s:%d GPU support: %s\n", myproc.nspace, myproc.rank, val->data.flag ? "ENABLED" : "DISABLED");
136+
} else {
137+
fprintf(stderr, "%s:%d GPU support: NOT GIVEN\n", myproc.nspace, myproc.rank);
138+
}
139+
#endif
140+
131141
/* put a data array of pmix_value's */
132142
val = (pmix_value_t *) malloc(32 * sizeof(pmix_value_t));
133143
for (n = 0; n < 32; n++) {

Diff for: src/mca/rmaps/base/rmaps_base_map_job.c

+10-1
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
* and Technology (RIST). All rights reserved.
1818
* Copyright (c) 2019 UT-Battelle, LLC. All rights reserved.
1919
*
20-
* Copyright (c) 2021-2024 Nanook Consulting All rights reserved.
20+
* Copyright (c) 2021-2025 Nanook Consulting All rights reserved.
2121
* Copyright (c) 2022 IBM Corporation. All rights reserved.
2222
* $COPYRIGHT$
2323
*
@@ -80,6 +80,7 @@ void prte_rmaps_base_map_job(int fd, short args, void *cbdata)
8080
pmix_data_array_t *darray = NULL;
8181
pmix_list_t nodes;
8282
int slots, len;
83+
bool flag, *fptr;
8384

8485
PRTE_HIDE_UNUSED_PARAMS(fd, args);
8586

@@ -99,6 +100,7 @@ void prte_rmaps_base_map_job(int fd, short args, void *cbdata)
99100
memset(&options, 0, sizeof(prte_rmaps_options_t));
100101
options.stream = prte_rmaps_base_framework.framework_output;
101102
options.verbosity = 5; // usual value for base-level functions
103+
fptr = &flag;
102104

103105
/* check and set some general options */
104106
if (prte_get_attribute(&jdata->attributes, PRTE_JOB_DO_NOT_LAUNCH, NULL, PMIX_BOOL)) {
@@ -286,6 +288,13 @@ void prte_rmaps_base_map_job(int fd, short args, void *cbdata)
286288
}
287289
}
288290
}
291+
/* if not already assigned, inherit the parent's GPU support directive */
292+
if (!prte_get_attribute(&jdata->attributes, PRTE_JOB_GPU_SUPPORT, NULL, PMIX_BOOL)) {
293+
if (prte_get_attribute(&parent->attributes, PRTE_JOB_GPU_SUPPORT, (void **) &fptr, PMIX_BOOL)) {
294+
prte_set_attribute(&jdata->attributes, PRTE_JOB_GPU_SUPPORT, PRTE_ATTR_GLOBAL, fptr, PMIX_BOOL);
295+
}
296+
}
297+
289298
} else {
290299
if (!prte_get_attribute(&jdata->attributes, PRTE_JOB_HWT_CPUS, NULL, PMIX_BOOL) &&
291300
!prte_get_attribute(&jdata->attributes, PRTE_JOB_CORE_CPUS, NULL, PMIX_BOOL)) {

Diff for: src/mca/schizo/ompi/schizo_ompi.c

+5-3
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
* Copyright (c) 2015 Research Organization for Information Science
1919
* and Technology (RIST). All rights reserved.
2020
* Copyright (c) 2018-2022 IBM Corporation. All rights reserved.
21-
* Copyright (c) 2021-2024 Nanook Consulting All rights reserved.
21+
* Copyright (c) 2021-2025 Nanook Consulting All rights reserved.
2222
* Copyright (c) 2022-2024 Triad National Security, LLC. All rights
2323
* reserved.
2424
* $COPYRIGHT$
@@ -198,7 +198,9 @@ static struct option ompioptions[] = {
198198
/* mpiexec mandated form launch key parameters - MPI 4.0 */
199199
PMIX_OPTION_DEFINE("initial-errhandler", PMIX_ARG_REQD),
200200
/* mpiexec mandated form launch key parameters - MPI 4.1*/
201-
PMIX_OPTION_DEFINE("memory-alloc-kinds", PMIX_ARG_REQD),
201+
PMIX_OPTION_DEFINE(PRTE_CLI_MEM_ALLOC_KIND, PMIX_ARG_REQD),
202+
/* GPU support - on/off */
203+
PMIX_OPTION_DEFINE(PRTE_CLI_GPU_SUPPORT, PMIX_ARG_REQD),
202204

203205
/* Display Commumication Protocol : MPI_Init */
204206
PMIX_OPTION_DEFINE("display-comm", PMIX_ARG_NONE),
@@ -1603,7 +1605,7 @@ static int parse_env(char **srcenv, char ***dstenv,
16031605
}
16041606
}
16051607

1606-
if (NULL != (opt = pmix_cmd_line_get_param(results, "memory-alloc-kinds"))) {
1608+
if (NULL != (opt = pmix_cmd_line_get_param(results, PRTE_CLI_MEM_ALLOC_KIND))) {
16071609
rc = check_cache(&cache, &cachevals, "mpi_memory_alloc_kinds", opt->values[0]);
16081610
if (PRTE_SUCCESS != rc) {
16091611
PMIX_ARGV_FREE_COMPAT(cache);

Diff for: src/mca/schizo/prte/schizo_prte.c

+3-1
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
* Copyright (c) 2015 Research Organization for Information Science
1919
* and Technology (RIST). All rights reserved.
2020
* Copyright (c) 2018-2022 IBM Corporation. All rights reserved.
21-
* Copyright (c) 2021-2024 Nanook Consulting All rights reserved.
21+
* Copyright (c) 2021-2025 Nanook Consulting All rights reserved.
2222
* $COPYRIGHT$
2323
*
2424
* Additional copyrights may follow
@@ -197,6 +197,7 @@ static struct option prterunoptions[] = {
197197
PMIX_OPTION_DEFINE(PRTE_CLI_DO_NOT_AGG_HELP, PMIX_ARG_NONE),
198198
PMIX_OPTION_DEFINE(PRTE_CLI_FWD_ENVIRON, PMIX_ARG_OPTIONAL),
199199
PMIX_OPTION_DEFINE(PRTE_CLI_MEM_ALLOC_KIND, PMIX_ARG_REQD),
200+
PMIX_OPTION_DEFINE(PRTE_CLI_GPU_SUPPORT, PMIX_ARG_REQD),
200201

201202
// output options
202203
PMIX_OPTION_DEFINE(PRTE_CLI_OUTPUT, PMIX_ARG_REQD),
@@ -312,6 +313,7 @@ static struct option prunoptions[] = {
312313
PMIX_OPTION_DEFINE(PRTE_CLI_DO_NOT_AGG_HELP, PMIX_ARG_NONE),
313314
PMIX_OPTION_DEFINE(PRTE_CLI_FWD_ENVIRON, PMIX_ARG_OPTIONAL),
314315
PMIX_OPTION_DEFINE(PRTE_CLI_MEM_ALLOC_KIND, PMIX_ARG_REQD),
316+
PMIX_OPTION_DEFINE(PRTE_CLI_GPU_SUPPORT, PMIX_ARG_REQD),
315317

316318
// output options
317319
PMIX_OPTION_DEFINE(PRTE_CLI_OUTPUT, PMIX_ARG_REQD),

Diff for: src/prted/pmix/pmix_server_dyn.c

+7
Original file line numberDiff line numberDiff line change
@@ -655,6 +655,13 @@ int prte_pmix_xfer_job_info(prte_job_t *jdata,
655655
prte_set_attribute(&jdata->attributes, PRTE_JOB_NOAGG_HELP, PRTE_ATTR_GLOBAL,
656656
&flag, PMIX_BOOL);
657657

658+
#ifdef PMIX_GPU_SUPPORT
659+
} else if (PMIX_CHECK_KEY(info, PMIX_GPU_SUPPORT)) {
660+
flag = PMIX_INFO_TRUE(info);
661+
prte_set_attribute(&jdata->attributes, PRTE_JOB_GPU_SUPPORT, PRTE_ATTR_GLOBAL,
662+
&flag, PMIX_BOOL);
663+
#endif
664+
658665
/*** DEFAULT - CACHE FOR INCLUSION WITH JOB INFO ***/
659666
} else {
660667
pmix_server_cache_job_info(jdata, info);

Diff for: src/prted/pmix/pmix_server_register_fns.c

+7
Original file line numberDiff line numberDiff line change
@@ -376,6 +376,13 @@ int prte_pmix_server_register_nspace(prte_job_t *jdata)
376376
}
377377
#endif
378378

379+
// check for GPU directives
380+
#ifdef PMIX_GPU_SUPPORT
381+
if (prte_get_attribute(&jdata->attributes, PRTE_JOB_GPU_SUPPORT, (void**)&fptr, PMIX_BOOL)) {
382+
PMIX_INFO_LIST_ADD(ret, info, PMIX_GPU_SUPPORT, &flag, PMIX_BOOL);
383+
}
384+
#endif
385+
379386
/* for each app in the job, create an app-array */
380387
for (n = 0; n < jdata->apps->size; n++) {
381388
if (NULL == (app = (prte_app_context_t *) pmix_pointer_array_get_item(jdata->apps, n))) {

Diff for: src/prted/prted.h

+4-1
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
* All rights reserved.
1212
* Copyright (c) 2019 Intel, Inc. All rights reserved.
1313
* Copyright (c) 2020 Cisco Systems, Inc. All rights reserved
14-
* Copyright (c) 2021-2022 Nanook Consulting. All rights reserved.
14+
* Copyright (c) 2021-2025 Nanook Consulting All rights reserved.
1515
* $COPYRIGHT$
1616
*
1717
* Additional copyrights may follow
@@ -53,6 +53,9 @@ PRTE_EXPORT int prun_common(pmix_cli_result_t *cli,
5353
prte_schizo_base_module_t *schizo,
5454
int argc, char **argv);
5555

56+
PRTE_EXPORT int prte_prun_parse_common_cli(void *jinfo, pmix_cli_result_t *results,
57+
prte_schizo_base_module_t *schizo,
58+
pmix_list_t *apps);
5659
END_C_DECLS
5760

5861
#endif /* PRTED_H */

0 commit comments

Comments
 (0)