#! /bin/bash # Copyright 2015 Princeton University Research Computing # Input should look like this for an end record: # $1: '-s' (the subject argument keyword) # $2: The subject itself # $3: The To: email address. # # The subject should look like this for an start record: # Slurm Job_id=323 Name=ddt_clone Began, Queued time 00:00:01 # # The subject should look like this for an end record: # Slurm Job_id=327 Name=ddt_clone Ended, Run time 00:05:01, COMPLETED, ExitCode 0 # Slurm Job_id=328 Name=ddt_clone Failed, Run time 00:05:01, FAILED, ExitCode 127 # Slurm Job_id=342 Name=ddt_clone Ended, Run time 00:00:33, CANCELLED, ExitCode 0 # Not sure what to do about PENDING state resulting from a requeue request. # Doing a seff on it for now: # Slurm Job_id=326 Name=ddt_clone Failed, Run time 00:00:41, PENDING, ExitCode 0 # # These end records are the only types of messages to process. They have 4 (rather # than 2) comma-delimited arguments, of which ending status is the 3rd. # Just pass through notifications without an ending status. SEFF=/usr/bin/seff MAIL=/bin/mail IFS="," array=($2) IFS=" " #### JOB_COMPLETION_TIME ### # The time needed for job to complete and synchronize accounting data with # slurmdbd. If you're running slurmctld under systemd control executing # `systemctl stop slurmctld` or `systemctl restart slurmctld` may hang for this # time. While slurmctld will be down systemd will still waits for all # descendant processes (in this case sleep executed from smail) to complete. # The default value is 5s (half of default MessageTimeout) JOB_COMPLETION_TIME=5 # Get the ClusterName ClusterName=${SLURM_CLUSTER_NAME} subject="$ClusterName $2" recipient=$3 # If we decide later to seff based on specific status codes, # we can test against $status. status=`echo "${array[2]}" | tr -d ' '` if [ -n "$status" ]; then sarray=(${array[0]}) IFS="=" if [ "${sarray[1]}" = "Array" ]; then sarray=(${sarray[3]}) else sarray=(${sarray[1]}) fi IFS=" " jobid="${sarray[1]}" # Remove the trailing "_*" until seff supports array jobs fully jobid=${jobid%"_*"} sleep $JOB_COMPLETION_TIME $SEFF $jobid | $MAIL -s "$subject" $recipient else $MAIL -s "$subject" $recipient fi