Skip to content

Commit

Permalink
Add ability to direct app GPU support
Browse files Browse the repository at this point in the history
There apparently are some circumstances when an application can
benefit from disabling the internal GPU support in one or more
of its libraries. Let's assume that a library might also provide
a mechanism by which that support can be defaulted to enabled
or disabled.

Add CLI support for specifying that GPU support be enabled or
disabled. We assume that:

(a) this is something that a tool might want to enquire about
    to see what an app was told to do

(b) a user might want/expect this to be a directive inherited
    by any spawned child jobs

Also note that there was a lot of code duplication between
prte and prun_common when it came to parsing the cmd line
for job-level directives. Collect those in a common function
as we see that some divergence had already occurred.

Signed-off-by: Ralph Castain <[email protected]>
  • Loading branch information
rhc54 committed Feb 6, 2025
1 parent e671752 commit fb8521e
Show file tree
Hide file tree
Showing 15 changed files with 234 additions and 275 deletions.
12 changes: 11 additions & 1 deletion examples/client2.c
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
* Copyright (c) 2011 Oak Ridge National Labs. All rights reserved.
* Copyright (c) 2013-2019 Intel, Inc. All rights reserved.
* Copyright (c) 2015 Mellanox Technologies, Inc. All rights reserved.
* Copyright (c) 2021 Nanook Consulting. All rights reserved.
* Copyright (c) 2021-2025 Nanook Consulting All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
Expand Down Expand Up @@ -128,6 +128,16 @@ int main(int argc, char **argv)
PMIX_VALUE_RELEASE(val);
fprintf(stderr, "Client %s:%d job size %d\n", myproc.nspace, myproc.rank, nprocs);

#ifdef PMIX_GPU_SUPPORT
/* see if we were given a GPU directive */
rc = PMIx_Get(&proc, PMIX_GPU_SUPPORT, NULL, 0, &val);
if (PMIX_SUCCESS == rc) {
fprintf(stderr, "%s:%d GPU support: %s\n", myproc.nspace, myproc.rank, val->data.flag ? "ENABLED" : "DISABLED");
} else {
fprintf(stderr, "%s:%d GPU support: NOT GIVEN\n", myproc.nspace, myproc.rank);
}
#endif

/* put a data array of pmix_value's */
val = (pmix_value_t *) malloc(32 * sizeof(pmix_value_t));
for (n = 0; n < 32; n++) {
Expand Down
4 changes: 4 additions & 0 deletions src/docs/show-help-files/help-prterun.txt
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,10 @@ option to the help request as "--help <option>".
| "-x <name>" | Export an environment variable, optionally |
| | specifying a value |
+----------------------+-----------------------------------------------+
| "--gpu-support <val>"| Direct application to either enable (true) or |
| | disable (false) its internal library's GPU |
| | support |
+----------------------+-----------------------------------------------+

+----------------------+-----------------------------------------------+
| | Specific Options |
Expand Down
4 changes: 4 additions & 0 deletions src/docs/show-help-files/help-prun.txt
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,10 @@ option to the help request as "--help <option>".
| | current environmental variables starting with |
| | "foo") |
+----------------------+-----------------------------------------------+
| "--gpu-support <val>"| Direct application to either enable (true) or |
| | disable (false) its internal library's GPU |
| | support |
+----------------------+-----------------------------------------------+

+----------------------+-----------------------------------------------+
| | Specific Options |
Expand Down
11 changes: 10 additions & 1 deletion src/mca/rmaps/base/rmaps_base_map_job.c
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
* and Technology (RIST). All rights reserved.
* Copyright (c) 2019 UT-Battelle, LLC. All rights reserved.
*
* Copyright (c) 2021-2024 Nanook Consulting All rights reserved.
* Copyright (c) 2021-2025 Nanook Consulting All rights reserved.
* Copyright (c) 2022 IBM Corporation. All rights reserved.
* $COPYRIGHT$
*
Expand Down Expand Up @@ -80,6 +80,7 @@ void prte_rmaps_base_map_job(int fd, short args, void *cbdata)
pmix_data_array_t *darray = NULL;
pmix_list_t nodes;
int slots, len;
bool flag, *fptr;

PRTE_HIDE_UNUSED_PARAMS(fd, args);

Expand All @@ -99,6 +100,7 @@ void prte_rmaps_base_map_job(int fd, short args, void *cbdata)
memset(&options, 0, sizeof(prte_rmaps_options_t));
options.stream = prte_rmaps_base_framework.framework_output;
options.verbosity = 5; // usual value for base-level functions
fptr = &flag;

/* check and set some general options */
if (prte_get_attribute(&jdata->attributes, PRTE_JOB_DO_NOT_LAUNCH, NULL, PMIX_BOOL)) {
Expand Down Expand Up @@ -286,6 +288,13 @@ void prte_rmaps_base_map_job(int fd, short args, void *cbdata)
}
}
}
/* if not already assigned, inherit the parent's GPU support directive */
if (!prte_get_attribute(&jdata->attributes, PRTE_JOB_GPU_SUPPORT, NULL, PMIX_BOOL)) {
if (prte_get_attribute(&parent->attributes, PRTE_JOB_GPU_SUPPORT, (void **) &fptr, PMIX_BOOL)) {
prte_set_attribute(&jdata->attributes, PRTE_JOB_GPU_SUPPORT, PRTE_ATTR_GLOBAL, fptr, PMIX_BOOL);
}
}

} else {
if (!prte_get_attribute(&jdata->attributes, PRTE_JOB_HWT_CPUS, NULL, PMIX_BOOL) &&
!prte_get_attribute(&jdata->attributes, PRTE_JOB_CORE_CPUS, NULL, PMIX_BOOL)) {
Expand Down
5 changes: 4 additions & 1 deletion src/mca/schizo/ompi/schizo-ompi-cli.rstxt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
.. -*- rst -*-
Copyright (c) 2021-2022 Nanook Consulting. All rights reserved.
Copyright (c) 2021-2025 Nanook Consulting All rights reserved.
Copyright (c) 2022 Cisco Systems, Inc. All rights reserved.
Copyright (c) 2022 IBM Corporation. All rights reserved.
Copyright (c) 2023 Jeffrey M. Squyres. All rights reserved.
Expand Down Expand Up @@ -84,6 +84,9 @@ Launch options
* ``-x <var>``: Export a environment variable, optionally specifying a value.
:ref:`See below for details <label-schizo-ompi-x>`.

* ``--gpu-support <val>``: Direct application to either enable (true) or
disable (false) its internal library's GPU support

Mapping, ranking, and binding options
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

Expand Down
8 changes: 5 additions & 3 deletions src/mca/schizo/ompi/schizo_ompi.c
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
* Copyright (c) 2015 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2018-2022 IBM Corporation. All rights reserved.
* Copyright (c) 2021-2024 Nanook Consulting All rights reserved.
* Copyright (c) 2021-2025 Nanook Consulting All rights reserved.
* Copyright (c) 2022-2024 Triad National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
Expand Down Expand Up @@ -198,7 +198,9 @@ static struct option ompioptions[] = {
/* mpiexec mandated form launch key parameters - MPI 4.0 */
PMIX_OPTION_DEFINE("initial-errhandler", PMIX_ARG_REQD),
/* mpiexec mandated form launch key parameters - MPI 4.1*/
PMIX_OPTION_DEFINE("memory-alloc-kinds", PMIX_ARG_REQD),
PMIX_OPTION_DEFINE(PRTE_CLI_MEM_ALLOC_KIND, PMIX_ARG_REQD),
/* GPU support - on/off */
PMIX_OPTION_DEFINE(PRTE_CLI_GPU_SUPPORT, PMIX_ARG_REQD),

/* Display Commumication Protocol : MPI_Init */
PMIX_OPTION_DEFINE("display-comm", PMIX_ARG_NONE),
Expand Down Expand Up @@ -1603,7 +1605,7 @@ static int parse_env(char **srcenv, char ***dstenv,
}
}

if (NULL != (opt = pmix_cmd_line_get_param(results, "memory-alloc-kinds"))) {
if (NULL != (opt = pmix_cmd_line_get_param(results, PRTE_CLI_MEM_ALLOC_KIND))) {
rc = check_cache(&cache, &cachevals, "mpi_memory_alloc_kinds", opt->values[0]);
if (PRTE_SUCCESS != rc) {
PMIX_ARGV_FREE_COMPAT(cache);
Expand Down
4 changes: 3 additions & 1 deletion src/mca/schizo/prte/schizo_prte.c
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
* Copyright (c) 2015 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2018-2022 IBM Corporation. All rights reserved.
* Copyright (c) 2021-2024 Nanook Consulting All rights reserved.
* Copyright (c) 2021-2025 Nanook Consulting All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
Expand Down Expand Up @@ -197,6 +197,7 @@ static struct option prterunoptions[] = {
PMIX_OPTION_DEFINE(PRTE_CLI_DO_NOT_AGG_HELP, PMIX_ARG_NONE),
PMIX_OPTION_DEFINE(PRTE_CLI_FWD_ENVIRON, PMIX_ARG_OPTIONAL),
PMIX_OPTION_DEFINE(PRTE_CLI_MEM_ALLOC_KIND, PMIX_ARG_REQD),
PMIX_OPTION_DEFINE(PRTE_CLI_GPU_SUPPORT, PMIX_ARG_REQD),

// output options
PMIX_OPTION_DEFINE(PRTE_CLI_OUTPUT, PMIX_ARG_REQD),
Expand Down Expand Up @@ -312,6 +313,7 @@ static struct option prunoptions[] = {
PMIX_OPTION_DEFINE(PRTE_CLI_DO_NOT_AGG_HELP, PMIX_ARG_NONE),
PMIX_OPTION_DEFINE(PRTE_CLI_FWD_ENVIRON, PMIX_ARG_OPTIONAL),
PMIX_OPTION_DEFINE(PRTE_CLI_MEM_ALLOC_KIND, PMIX_ARG_REQD),
PMIX_OPTION_DEFINE(PRTE_CLI_GPU_SUPPORT, PMIX_ARG_REQD),

// output options
PMIX_OPTION_DEFINE(PRTE_CLI_OUTPUT, PMIX_ARG_REQD),
Expand Down
7 changes: 7 additions & 0 deletions src/prted/pmix/pmix_server_dyn.c
Original file line number Diff line number Diff line change
Expand Up @@ -655,6 +655,13 @@ int prte_pmix_xfer_job_info(prte_job_t *jdata,
prte_set_attribute(&jdata->attributes, PRTE_JOB_NOAGG_HELP, PRTE_ATTR_GLOBAL,
&flag, PMIX_BOOL);

#ifdef PMIX_GPU_SUPPORT
} else if (PMIX_CHECK_KEY(info, PMIX_GPU_SUPPORT)) {
flag = PMIX_INFO_TRUE(info);
prte_set_attribute(&jdata->attributes, PRTE_JOB_GPU_SUPPORT, PRTE_ATTR_GLOBAL,
&flag, PMIX_BOOL);
#endif

/*** DEFAULT - CACHE FOR INCLUSION WITH JOB INFO ***/
} else {
pmix_server_cache_job_info(jdata, info);
Expand Down
7 changes: 7 additions & 0 deletions src/prted/pmix/pmix_server_register_fns.c
Original file line number Diff line number Diff line change
Expand Up @@ -376,6 +376,13 @@ int prte_pmix_server_register_nspace(prte_job_t *jdata)
}
#endif

// check for GPU directives
#ifdef PMIX_GPU_SUPPORT
if (prte_get_attribute(&jdata->attributes, PRTE_JOB_GPU_SUPPORT, (void**)&fptr, PMIX_BOOL)) {
PMIX_INFO_LIST_ADD(ret, info, PMIX_GPU_SUPPORT, &flag, PMIX_BOOL);
}
#endif

/* for each app in the job, create an app-array */
for (n = 0; n < jdata->apps->size; n++) {
if (NULL == (app = (prte_app_context_t *) pmix_pointer_array_get_item(jdata->apps, n))) {
Expand Down
5 changes: 4 additions & 1 deletion src/prted/prted.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
* All rights reserved.
* Copyright (c) 2019 Intel, Inc. All rights reserved.
* Copyright (c) 2020 Cisco Systems, Inc. All rights reserved
* Copyright (c) 2021-2022 Nanook Consulting. All rights reserved.
* Copyright (c) 2021-2025 Nanook Consulting All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
Expand Down Expand Up @@ -53,6 +53,9 @@ PRTE_EXPORT int prun_common(pmix_cli_result_t *cli,
prte_schizo_base_module_t *schizo,
int argc, char **argv);

PRTE_EXPORT int prte_prun_parse_common_cli(void *jinfo, pmix_cli_result_t *results,
prte_schizo_base_module_t *schizo,
pmix_list_t *apps);
END_C_DECLS

#endif /* PRTED_H */
Loading

0 comments on commit fb8521e

Please sign in to comment.