Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 27 additions & 19 deletions src/mca/odls/base/odls_base_default_fns.c
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
* and Technology (RIST). All rights reserved.
* Copyright (c) 2017 Mellanox Technologies Ltd. All rights reserved.
* Copyright (c) 2017-2020 IBM Corporation. All rights reserved.
* Copyright (c) 2021 Nanook Consulting. All rights reserved.
* Copyright (c) 2021-2022 Nanook Consulting. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
Expand Down Expand Up @@ -71,7 +71,7 @@
#include "src/mca/rml/rml.h"
#include "src/mca/routed/routed.h"
#include "src/mca/rtc/rtc.h"
#include "src/mca/schizo/schizo.h"
#include "src/mca/schizo/base/base.h"
#include "src/mca/state/state.h"

#include "src/prted/pmix/pmix_server.h"
Expand Down Expand Up @@ -425,6 +425,7 @@ int prte_odls_base_default_construct_child_list(pmix_data_buffer_t *buffer, pmix
pmix_byte_object_t bo, pbo;
size_t m;
pmix_envar_t envt;
char *tmp;

PRTE_OUTPUT_VERBOSE((5, prte_odls_base_framework.framework_output,
"%s odls:constructing child list", PRTE_NAME_PRINT(PRTE_PROC_MY_NAME)));
Expand Down Expand Up @@ -581,6 +582,24 @@ int prte_odls_base_default_construct_child_list(pmix_data_buffer_t *buffer, pmix
jdata->map = PRTE_NEW(prte_job_map_t);
}
}
/* get the associated schizo module */
if (NULL != jdata->personality) {
tmp = prte_argv_join(jdata->personality, ',');
} else {
tmp = NULL;
}
jdata->schizo = (struct prte_schizo_base_module_t*)prte_schizo_base_detect_proxy(tmp);
if (NULL == jdata->schizo) {
prte_show_help("help-schizo-base.txt", "no-proxy", true,
prte_tool_basename, (NULL == tmp) ? "NULL" : tmp);
if (NULL != tmp) {
free(tmp);
}
return 1;
}
if (NULL != tmp) {
free(tmp);
}

/* if the job is fully described, then mpirun will have computed
* and sent us the complete array of procs in the prte_job_t, so we
Expand Down Expand Up @@ -952,6 +971,7 @@ void prte_odls_base_spawn_proc(int fd, short sd, void *cbdata)
pmix_proc_t pproc;
pmix_status_t ret;
char *ptr;

PRTE_HIDE_UNUSED_PARAMS(fd, sd);

PRTE_ACQUIRE_OBJECT(cd);
Expand All @@ -967,7 +987,7 @@ void prte_odls_base_spawn_proc(int fd, short sd, void *cbdata)
char *tmp = strdup(app->env[i]);
ptr = strchr(tmp, '=');
if (NULL == ptr) {
PMIX_ERROR_LOG(PRTE_ERR_BAD_PARAM);
PRTE_ERROR_LOG(PRTE_ERR_BAD_PARAM);
rc = PRTE_ERR_BAD_PARAM;
state = PRTE_PROC_STATE_FAILED_TO_LAUNCH;
free(tmp);
Expand Down Expand Up @@ -1009,15 +1029,6 @@ void prte_odls_base_spawn_proc(int fd, short sd, void *cbdata)
child->rml_uri = NULL;
}

/* setup the rest of the environment with the proc-specific items - these
* will be overwritten for each child
*/
if (PRTE_SUCCESS != (rc = prte_schizo.setup_child(jobdat, child, app, &cd->env))) {
PRTE_ERROR_LOG(rc);
state = PRTE_PROC_STATE_FAILED_TO_LAUNCH;
goto errorout;
}

/* did the user request we display output in xterms? */
if (NULL != prte_xterm) {
prte_list_item_t *nmitem;
Expand Down Expand Up @@ -1130,6 +1141,8 @@ void prte_odls_base_default_launch_local(int fd, short sd, void *cbdata)
char **argvptr;
char *pathenv = NULL, *mpiexec_pathenv = NULL;
char *full_search;
prte_schizo_base_module_t *schizo;

PRTE_HIDE_UNUSED_PARAMS(fd, sd);

PRTE_ACQUIRE_OBJECT(caddy);
Expand All @@ -1154,6 +1167,7 @@ void prte_odls_base_default_launch_local(int fd, short sd, void *cbdata)
* and was removed. This isn't an error so just move along */
goto ERROR_OUT;
}
schizo = (prte_schizo_base_module_t*)jobdat->schizo;

/* do we have any local procs to launch? */
if (0 == jobdat->num_local_procs) {
Expand Down Expand Up @@ -1269,7 +1283,7 @@ void prte_odls_base_default_launch_local(int fd, short sd, void *cbdata)
}

/* setup the environment for this app */
if (PRTE_SUCCESS != (rc = prte_schizo.setup_fork(jobdat, app))) {
if (PRTE_SUCCESS != (rc = schizo->setup_fork(jobdat, app))) {

PRTE_OUTPUT_VERBOSE((10, prte_odls_base_framework.framework_output,
"%s odls:launch:setup_fork failed with error %s",
Expand Down Expand Up @@ -2078,12 +2092,6 @@ int prte_odls_base_default_restart_proc(prte_proc_t *child,
}
app = (prte_app_context_t *) prte_pointer_array_get_item(jobdat->apps, child->app_idx);

/* reset envars to match this child */
if (PRTE_SUCCESS != (rc = prte_schizo.setup_child(jobdat, child, app, &app->env))) {
PRTE_ERROR_LOG(rc);
goto CLEANUP;
}

/* setup the path */
if (PRTE_SUCCESS != (rc = setup_path(app, &wdir))) {
PRTE_ERROR_LOG(rc);
Expand Down
1 change: 1 addition & 0 deletions src/mca/plm/alps/configure.m4
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
# Copyright (c) 2011-2016 Los Alamos National Security, LLC.
# All rights reserved.
# Copyright (c) 2019 Intel, Inc. All rights reserved.
# Copyright (c) 2022 Nanook Consulting. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
Expand Down
1 change: 1 addition & 0 deletions src/mca/plm/lsf/configure.m4
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
# Copyright (c) 2011-2013 Los Alamos National Security, LLC.
# All rights reserved.
# Copyright (c) 2019 Intel, Inc. All rights reserved.
# Copyright (c) 2022 Nanook Consulting. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
Expand Down
1 change: 1 addition & 0 deletions src/mca/plm/slurm/configure.m4
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
# Copyright (c) 2011-2016 Los Alamos National Security, LLC.
# All rights reserved.
# Copyright (c) 2017-2019 Intel, Inc. All rights reserved.
# Copyright (c) 2022 Nanook Consulting. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
Expand Down
15 changes: 1 addition & 14 deletions src/mca/plm/ssh/plm_ssh_module.c
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
* Copyright (c) 2014-2020 Intel, Inc. All rights reserved.
* Copyright (c) 2015-2019 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2021 Nanook Consulting. All rights reserved.
* Copyright (c) 2021-2022 Nanook Consulting. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
Expand Down Expand Up @@ -93,7 +93,6 @@
#include "src/mca/rml/rml.h"
#include "src/mca/rml/rml_types.h"
#include "src/mca/routed/routed.h"
#include "src/mca/schizo/schizo.h"
#include "src/mca/state/state.h"

#include "src/mca/plm/base/base.h"
Expand Down Expand Up @@ -628,18 +627,6 @@ static int setup_launch(int *argcptr, char ***argvptr, char *nodename, int *node
free(param);
}

/* unless told otherwise... */
if (prte_plm_ssh_component.pass_environ_mca_params) {
/* now check our local environment for MCA params - add them
* only if they aren't already present
*/
if (PRTE_SUCCESS != (rc = prte_schizo.parse_env(NULL, environ, &argv, true))) {
prte_argv_free(argv);
return rc;
}
argc = prte_argv_count(argv);
}

/* protect the params */
prte_plm_base_wrap_args(argv);

Expand Down
1 change: 1 addition & 0 deletions src/mca/plm/tm/configure.m4
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
# All rights reserved.
# Copyright (c) 2009-2020 Cisco Systems, Inc. All rights reserved
# Copyright (c) 2019 Intel, Inc. All rights reserved.
# Copyright (c) 2022 Nanook Consulting. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
Expand Down
32 changes: 17 additions & 15 deletions src/mca/ras/base/ras_base_allocate.c
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
* Copyright (c) 2018 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2020 Cisco Systems, Inc. All rights reserved
* Copyright (c) 2021 Nanook Consulting. All rights reserved.
* Copyright (c) 2021-2022 Nanook Consulting. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
Expand Down Expand Up @@ -181,10 +181,10 @@ void prte_ras_base_allocate(int fd, short args, void *cbdata)
prte_job_t *jdata;
prte_list_t nodes;
prte_node_t *node;
int32_t i;
int32_t i, j;
prte_app_context_t *app;
prte_state_caddy_t *caddy = (prte_state_caddy_t *) cbdata;
char *hosts = NULL;
char *hosts = NULL, **hostlist;
char *ptr;
pmix_status_t ret;
PRTE_HIDE_UNUSED_PARAMS(fd, args);
Expand Down Expand Up @@ -373,8 +373,7 @@ void prte_ras_base_allocate(int fd, short args, void *cbdata)
if (NULL == (app = (prte_app_context_t *) prte_pointer_array_get_item(jdata->apps, i))) {
continue;
}
if (prte_get_attribute(&app->attributes, PRTE_APP_DASH_HOST, (void **) &hosts,
PMIX_STRING)) {
if (prte_get_attribute(&app->attributes, PRTE_APP_DASH_HOST, (void **) &hosts, PMIX_STRING)) {
PRTE_OUTPUT_VERBOSE((5, prte_ras_base_framework.framework_output,
"%s ras:base:allocate adding dash_hosts",
PRTE_NAME_PRINT(PRTE_PROC_MY_NAME)));
Expand Down Expand Up @@ -426,22 +425,25 @@ void prte_ras_base_allocate(int fd, short args, void *cbdata)
if (NULL == (app = (prte_app_context_t *) prte_pointer_array_get_item(jdata->apps, i))) {
continue;
}
if (prte_get_attribute(&app->attributes, PRTE_APP_HOSTFILE, (void **) &hosts,
PMIX_STRING)) {
if (prte_get_attribute(&app->attributes, PRTE_APP_HOSTFILE, (void **) &hosts, PMIX_STRING)) {
PRTE_OUTPUT_VERBOSE((5, prte_ras_base_framework.framework_output,
"%s ras:base:allocate adding hostfile %s",
PRTE_NAME_PRINT(PRTE_PROC_MY_NAME), hosts));

/* hostfile was specified - parse it and add it to the list */
if (PRTE_SUCCESS != (rc = prte_util_add_hostfile_nodes(&nodes, hosts))) {
free(hosts);
PRTE_DESTRUCT(&nodes);
/* set an error event */
PRTE_ACTIVATE_JOB_STATE(jdata, PRTE_JOB_STATE_ALLOC_FAILED);
PRTE_RELEASE(caddy);
return;
}
hostlist = prte_argv_split(hosts, ',');
free(hosts);
for (j=0; NULL != hostlist[j]; j++) {
if (PRTE_SUCCESS != (rc = prte_util_add_hostfile_nodes(&nodes, hostlist[j]))) {
prte_argv_free(hostlist);
PRTE_DESTRUCT(&nodes);
/* set an error event */
PRTE_ACTIVATE_JOB_STATE(jdata, PRTE_JOB_STATE_ALLOC_FAILED);
PRTE_RELEASE(caddy);
return;
}
}
prte_argv_free(hostlist);
}
}

Expand Down
3 changes: 2 additions & 1 deletion src/mca/routed/routed.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
* Copyright (c) 2019 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2020 Cisco Systems, Inc. All rights reserved
* Copyright (c) 2021 Nanook Consulting. All rights reserved.
* Copyright (c) 2021-2022 Nanook Consulting All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
Expand Down Expand Up @@ -39,6 +39,7 @@

#include "src/mca/mca.h"
#include "types.h"
#include "src/pmix/pmix-internal.h"

#include "src/mca/routed/routed_types.h"

Expand Down
36 changes: 20 additions & 16 deletions src/mca/schizo/base/base.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
* Copyright (c) 2015-2020 Intel, Inc. All rights reserved.
* Copyright (c) 2020 IBM Corporation. All rights reserved.
* Copyright (c) 2020 Cisco Systems, Inc. All rights reserved
* Copyright (c) 2021 Nanook Consulting. All rights reserved.
* Copyright (c) 2021-2022 Nanook Consulting. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
Expand Down Expand Up @@ -66,32 +66,28 @@ typedef struct {
} prte_schizo_base_active_module_t;
PRTE_CLASS_DECLARATION(prte_schizo_base_active_module_t);

typedef struct {
char *name;
char **conflicts;
} prte_schizo_conflicts_t;

/* base support functions */
PRTE_EXPORT int prte_schizo_base_convert(char ***argv, int idx, int ntodelete, char *option,
char *directive, char *modifier, bool report);
PRTE_EXPORT int prte_schizo_base_convert(prte_cli_result_t *results,
char *deprecated,
char *key, char *option,
prte_schizo_conflicts_t *conflicts,
bool report);

/* the base stub functions */

PRTE_EXPORT int prte_schizo_base_parse_env(prte_cmd_line_t *cmd_line, char **srcenv, char ***dstenv,
bool cmdline);
PRTE_EXPORT prte_schizo_base_module_t *prte_schizo_base_detect_proxy(char *cmdpath);

PRTE_EXPORT int prte_schizo_base_setup_app(prte_app_context_t *app);
PRTE_EXPORT int prte_schizo_base_setup_fork(prte_job_t *jdata, prte_app_context_t *context);
PRTE_EXPORT int prte_schizo_base_setup_child(prte_job_t *jobdat, prte_proc_t *child,
prte_app_context_t *app, char ***env);
PRTE_EXPORT void prte_schizo_base_job_info(prte_cmd_line_t *cmdline, void *jobinfo);
PRTE_EXPORT int prte_schizo_base_check_sanity(prte_cmd_line_t *cmdline);
PRTE_EXPORT void prte_schizo_base_finalize(void);
PRTE_EXPORT void prte_schizo_base_root_error_msg(void);
PRTE_EXPORT char *prte_schizo_base_getline(FILE *fp);
PRTE_EXPORT char *prte_schizo_base_strip_quotes(char *p);
PRTE_EXPORT int prte_schizo_base_process_deprecated_cli(prte_cmd_line_t *cmdline, int *argc,
char ***argv, char **options, bool single_dash_okay,
prte_schizo_convertor_fn_t convert);
PRTE_EXPORT int prte_schizo_base_parse_prte(int argc, int start, char **argv, char ***target);
PRTE_EXPORT int prte_schizo_base_parse_pmix(int argc, int start, char **argv, char ***target);
PRTE_EXPORT int prte_schizo_base_sanity(prte_cmd_line_t *cmd_line);
PRTE_EXPORT int prte_schizo_base_sanity(prte_cli_result_t *cmd_line);
PRTE_EXPORT bool prte_schizo_base_check_directives(char *directive,
char **valid,
char **quals,
Expand All @@ -101,6 +97,14 @@ PRTE_EXPORT bool prte_schizo_base_check_qualifiers(char *directive,
char *qual);
PRTE_EXPORT bool prte_schizo_base_check_prte_param(char *param);
PRTE_EXPORT bool prte_schizo_base_check_pmix_param(char *param);
PRTE_EXPORT void prte_schizo_base_expose(char *param, char *prefix);
PRTE_EXPORT int prte_schizo_base_add_directive(prte_cli_result_t *results,
const char *deprecated, const char *target,
char *directive, bool report);
PRTE_EXPORT int prte_schizo_base_add_qualifier(prte_cli_result_t *results,
char *deprecated, char *target,
char *qualifier, bool report);


END_C_DECLS

Expand Down
31 changes: 30 additions & 1 deletion src/mca/schizo/base/help-schizo-base.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
#
# Copyright (c) 2020 Intel, Inc. All rights reserved.
# Copyright (c) 2020 IBM Corporation. All rights reserved.
# Copyright (c) 2021 Nanook Consulting. All rights reserved.
# Copyright (c) 2021-2022 Nanook Consulting. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
Expand Down Expand Up @@ -135,3 +135,32 @@ this parameter belongs to (e.g., "--prtemca" for a PRRTE param,
"--pmixmca" for a PMIx param, "--omca" for an OMPI param) or
ensure that the generic param is correct and known to the active
personality.
#
[too-many-instances]
The command line includes too many instances of the following option:

Command line: %s
Option: %s
Number of instances: %d
Allowed number: %d

Please correct the command line and try again.
#
[too-many-directives]
When resolving deprecated command line options, the result generated
too many directives for the target option:

Command line option: %s
Initial value: %s
Deprecated option: %s %s

Please correct the command line and try again.
#
[too-many-values]
When resolving deprecated command line options, the following
command line option has more than one value:

Option: %s

The resolution function cannot handle this scenario - please
notify the developers
Loading