Provided by: libslurm-dev_20.11.7+really20.11.4-2_amd64 bug

NAME

       slurm_step_launch_params_t_init,      slurm_step_launch,     slurm_step_launch_fwd_signal,
       slurm_step_launch_wait_start,  slurm_step_launch_wait_finish,  slurm_step_launch_abort   -
       Slurm job step launch functions

SYNTAX

       #include <slurm/slurm.h>

       void slurm_step_launch_params_t_init (
            slurm_step_launch_params_t *launch_req
       );

       int slurm_step_launch (
            slurm_step_ctx ctx,
            const slurm_step_launch_params_t *launch_req,
            const slurm_step_launch_callbacks_t callbacks
       );

       void slurm_step_launch_fwd_signal (
            slurm_step_ctx ctx,
            int signo
       );

       int slurm_step_launch_wait_start (
            slurm_step_ctx ctx
       );

       void slurm_step_launch_wait_finish (
            slurm_step_ctx ctx
       );

       void slurm_step_launch_abort {
            slurm_step_ctx ctx
       );

ARGUMENTS

       callbacks
              Identify functions to be called when various events occur.

       ctx    Job  step  context.  Created  by slurm_step_ctx_create, used in subsequent function
              calls, and destroyed by slurm_step_ctx_destroy.

       launch_req
              Pointer to a structure allocated by the user containing specifications of  the  job
              step to be launched.

DESCRIPTION

       slurm_step_launch_params_t_init  initialize  a  user-allocated  slurm_step_launch_params_t
       structure with default values.  default values.  This function will NOT allocate  any  new
       memory.

       slurm_step_launch Launch a parallel job step.

       slurm_step_launch_fwd_signal Forward a signal to all those nodes with running tasks.

       slurm_step_launch_wait_start Block until all tasks have started.

       slurm_step_launch_wait_finish  Block  until  all  tasks  have finished (or failed to start
       altogether).

       slurm_step_launch_abort Abort an in-progress launch, or terminate the fully  launched  job
       step. Can be called from a signal handler.

IO Redirection

       Use  the  local_fds entry in  slurm_step_launch_params_t to specify file descriptors to be
       used for standard input, output and error. Any local_fds not specified will result in  the
       launched  tasks  using  the  calling  process's standard input, output and error.  Threads
       created by slurm_step_launch will  completely  handle  copying  data  between  the  remote
       processes and the specified local file descriptors.

       Use  the  substructure  in  slurm_step_io_fds_t  to  restrict  the redirection of I/O to a
       specific node or task ID. For example, to redirect standard output only from task 0, set

       params.local_fs.out.taskid=0;

       Use the remote_*_filename fields in slurm_step_launch_params_t to have launched tasks read
       and/or write directly to local files rather than transferring data over the network to the
       calling process.  These strings support many of  the  same  format  options  as  the  srun
       command.  Any  remote_*_filename  fields  set  will  supersede the corresponding local_fds
       entries. For example, the following code will direct each task to  write  standard  output
       and   standard   error   to   local   files  with  names  containing  the  task  ID  (e.g.
       "/home/bob/test_output/run1.out.0" and "/home/bob/test_output/run.1.err.0" for task 0).

       params.remote_output_filename = "/home/bob/test_output/run1.out.%t"
       params.remote_error_filename  = "/home/bob/test_output/run1.err.%t"

RETURN VALUE

       slurm_step_launch and slurm_step_launch_wait_start  will  return  SLURM_SUCCESS  when  all
       tasks have successfully started, or SLURM_ERROR if the job step is aborted during launch.

ERRORS

       EINVAL Invalid argument

       SLURM_PROTOCOL_VERSION_ERROR Protocol version has changed, re-link your code.

       ESLURM_INVALID_JOB_ID the requested job id does not exist.

       ESLURM_ALREADY_DONE the specified job has already completed and can not be modified.

       ESLURM_ACCESS_DENIED  the  requesting  user  lacks  authorization for the requested action
       (e.g. trying to delete or modify another user's job).

       ESLURM_INTERCONNECT_FAILURE failed to configure the node interconnect.

       ESLURM_BAD_DIST task distribution specification is invalid.

       SLURM_PROTOCOL_SOCKET_IMPL_TIMEOUT Timeout in communicating with Slurm controller.

EXAMPLE

       /*
        * To compile:
        * gcc test.c -o test -g -pthread -lslurm
        *
        * Or if Slurm is not in your default search paths:
        * gcc test.c -o test -g -pthread -I{$SLURM_DIR}/include \
        *     -Wl,--rpath={$SLURM_DIR}/lib -L{$SLURM_DIR}/lib -lslurm
        */
       #include <stdio.h>
       #include <stdlib.h>
       #include <string.h>
       #include <slurm/slurm.h>
       #include <slurm/slurm_errno.h>

       static void _task_start(launch_tasks_response_msg_t *msg)
       {
            printf("%d tasks started on node %s\n",
                 msg->count_of_pids, msg->node_name);
       }

       static void _task_finish(task_exit_msg_t *msg)
       {
            printf("%d tasks finished\n", msg->num_tasks);
       }

       int main (int argc, char *argv[])
       {
            slurm_step_ctx_params_t step_params;
            slurm_step_ctx step_ctx;
            slurm_step_launch_params_t params;
            slurm_step_launch_callbacks_t callbacks;
            uint32_t job_id, step_id;

            slurm_step_ctx_params_t_init(&step_params);
            step_params.node_count = 1;
            step_params.task_count = 4;
            step_params.overcommit = true;

            step_ctx = slurm_step_ctx_create(&step_params);
            if (step_ctx == NULL) {
                 slurm_perror("slurm_step_ctx_create");
                 exit(1);
            }
            slurm_step_ctx_get(step_ctx, SLURM_STEP_CTX_JOBID, &job_id);
            slurm_step_ctx_get(step_ctx, SLURM_STEP_CTX_STEPID, &step_id);
            printf("Ready to start job %u step %u\n", job_id, step_id);

            slurm_step_launch_params_t_init(&params);
            params.argc = argc - 1;
            params.argv = argv + 1;
            callbacks.task_start = _task_start;
            callbacks.task_finish = _task_finish;
            if (slurm_step_launch(step_ctx, NULL, &params, &callbacks)
                      != SLURM_SUCCESS) {
                 slurm_perror("slurm_step_launch");
                 exit(1);
            }
            printf("Sent step launch RPC\n");

            if (slurm_step_launch_wait_start(step_ctx) != SLURM_SUCCESS) {
                 fprintf(stderr, "job step was aborted during launch\n");
            } else {
                 printf("All tasks have started\n");
            }

            slurm_step_launch_wait_finish(step_ctx);
            printf("All tasks have finished\n");

            slurm_step_ctx_destroy(step_ctx);
            exit(0);
       }

NOTE

       These functions are included in the libslurm library, which must be linked to your process
       for use (e.g. "cc -lslurm myprog.c").

COPYING

       Copyright  (C)  2006-2007 The Regents of the University of California.  Copyright (C) 2008
       Lawrence Livermore National Security.  Produced at Lawrence Livermore National  Laboratory
       (cf, DISCLAIMER).  CODE-OCEC-09-009. All rights reserved.

       This   file   is  part  of  Slurm   a  resource  management  program.   For  details,  see
       <https://slurm.schedmd.com/>.

       Slurm is free software; you can redistribute it and/or modify it under the  terms  of  the
       GNU  General Public License as published by the Free Software Foundation; either version 2
       of the License, or (at your option) any later version.

       Slurm is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without
       even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
       GNU General Public License for more details.

SEE ALSO

       slurm_step_ctx_create(3), slurm_step_ctx_destroy(3), slurm_get_errno(3),  slurm_perror(3),
       slurm_strerror(3), salloc(1), srun(1)