#! /bin/bash

# Copyright 2015 Princeton University Research Computing

# Input should look like this for an end record:
# $1: '-s' (the subject argument keyword)
# $2: The subject itself
# $3: The To: email address.
#
# The subject should look like this for an start record:
# Slurm Job_id=323 Name=ddt_clone Began, Queued time 00:00:01
#
# The subject should look like this for an end record:
# Slurm Job_id=327 Name=ddt_clone Ended, Run time 00:05:01, COMPLETED, ExitCode 0
# Slurm Job_id=328 Name=ddt_clone Failed, Run time 00:05:01, FAILED, ExitCode 127
# Slurm Job_id=342 Name=ddt_clone Ended, Run time 00:00:33, CANCELLED, ExitCode 0
# Not sure what to do about PENDING state resulting from a requeue request.
# Doing a seff on it for now:
# Slurm Job_id=326 Name=ddt_clone Failed, Run time 00:00:41, PENDING, ExitCode 0
#
# These end records are the only types of messages to process. They have 4 (rather
# than 2) comma-delimited arguments, of which ending status is the 3rd.
# Just pass through notifications without an ending status.

SEFF=/usr/bin/seff
MAIL=/bin/mail

IFS=","
array=($2)
IFS=" "

#### JOB_COMPLETION_TIME ###
# The time needed for job to complete and synchronize accounting data with
# slurmdbd. If you're running slurmctld under systemd control executing
# `systemctl stop slurmctld` or `systemctl restart slurmctld` may hang for this
# time. While slurmctld will be down systemd will still waits for all
# descendant processes (in this case sleep executed from smail) to complete.
# The default value is 5s (half of default MessageTimeout)
JOB_COMPLETION_TIME=5

# Get the ClusterName
ClusterName=${SLURM_CLUSTER_NAME}
subject="$ClusterName $2"
recipient=$3

# If we decide later to seff based on specific status codes,
# we can test against $status.
status=`echo "${array[2]}" | tr -d ' '`
if [ -n "$status" ]; then
    sarray=(${array[0]})
    IFS="="
    if [ "${sarray[1]}" = "Array" ]; then
        sarray=(${sarray[3]})
    else
        sarray=(${sarray[1]})
    fi
    IFS=" "
    jobid="${sarray[1]}"
    # Remove the trailing "_*" until seff supports array jobs fully
    jobid=${jobid%"_*"}
    sleep $JOB_COMPLETION_TIME
    $SEFF $jobid | $MAIL -s "$subject" $recipient
else
    $MAIL -s "$subject" $recipient
fi