#!/bin/bash # # sgather - a counterpart of "sbcast" # # Copyright (c) 2013, ZIH, TU Dresden, Federal Republic of Germany # # Author: Matthias Jurenz # Version 1.0 # # Change history: # version date author comment # 1.0 2013-10-29 jurenz released initial version # 2013-10-28 jurenz do not perform a remote copy if the destination # file is node-global (i.e. /scratch/* or /home/*) # # # "global" variables # SCONTROL="scontrol" SRUN="srun" SCP="scp" RM="rm" VERSION="1.0" # Define node-global file systems for which a remote copy is not required # Specify any number of file system paths as desired here GLOBAL_FILE[0]="/home/*" GLOBAL_FILE[1]="/scratch/*" # # show_help - display help message # function show_help { cat << EOF Usage: sgather [OPTIONS] SOURCE DEST -C, --compress compress the file being transmitted -f, --force ignore nonexistent source file -F, --fanout=num specify message fanout -k, --keep do not remove source file after transmission -p, --preserve preserve modes and times of source file -r, --recursive copy directories recursively -t, --timeout=secs specify message timeout (seconds) -v, --verbose provide detailed event logging -V, --version print version information and exit Help options: --help show this help message --usage display brief usage message EOF } # # show_usage - display brief usage message # function show_usage { echo "Usage: sgather [-CfFkprtvV] SOURCE DEST" } # # verbose - print a verbose message to stdout # function verbose { if [ $_SGATHER_VERBOSE -ge $1 ]; then if [ -z "$_SGATHER_SPAWNED" ]; then prefix="sgather:" else prefix="sgather($(hostname)):" fi echo "$prefix $2" fi } # # "main" # if [ -z "$_SGATHER_SPAWNED" ]; then if [ ! -z ${SGATHER_COMPRESS+x} ]; then _SGATHER_COMPRESS=true; else _SGATHER_COMPRESS=false; fi if [ ! -z ${SGATHER_FORCE+x} ]; then _SGATHER_FORCE=true; else _SGATHER_FORCE=false; fi if [ ! -z ${SGATHER_FANOUT+x} ]; then _SGATHER_FANOUT=$SGATHER_FANOUT; else _SGATHER_FANOUT=8; fi if [ ! -z ${SGATHER_KEEP+x} ]; then _SGATHER_KEEP=true; else _SGATHER_KEEP=false; fi if [ ! -z ${SGATHER_PRESERVE+x} ]; then _SGATHER_PRESERVE=true; else _SGATHER_PRESERVE=false; fi if [ ! -z ${SGATHER_RECURSIVE+x} ]; then _SGATHER_RECURSIVE=true; else _SGATHER_RECURSIVE=false; fi if [ ! -z ${SGATHER_TIMEOUT+x} ]; then _SGATHER_TIMEOUT=$SGATHER_TIMEOUT; else _SGATHER_TIMEOUT=60; fi _SGATHER_VERBOSE=0 _SGATHER_SOURCE= _SGATHER_DEST= _SGATHER_REMOTE_COPY=true _SGATHER_BATCHHOST= # parse command line options # opt_error=0 opts=$(getopt -n "$0" --options "CfF:hkprt:vV" --long "compress,force,fanout:,help,keep,preserve,recursive,timeout:,usage,verbose,version" -- "$@") || opt_error=$? eval set -- "$opts" while [ $opt_error -eq 0 -a $# -gt 0 ]; do case "$1" in -C|--compress) _SGATHER_COMPRESS=true shift ;; -f|--force) _SGATHER_FORCE=true shift ;; -F|--fanout) _SGATHER_FANOUT=$2 shift 2 ;; -h|--help) show_help exit 0 ;; -k|--keep) _SGATHER_KEEP=true shift ;; -p|--preserve) _SGATHER_PRESERVE=true shift ;; -r|--recursive) _SGATHER_RECURSIVE=true shift ;; -t|--timeout) _SGATHER_TIMEOUT=$2 shift 2 ;; --usage) show_usage exit 0 ;; -v|--verbose) _SGATHER_VERBOSE=$(( $_SGATHER_VERBOSE + 1 )) shift ;; -V|--version) echo "sgather $VERSION ($($SRUN --version))" exit $? ;; --) shift if [ $# -ne 2 ]; then echo "Need two file names, have $# names" >&2 opt_error=1 else _SGATHER_SOURCE="$1" # convert relative to absolute destination path, if necessary case $2 in /*) _SGATHER_DEST="$2" ;; *) _SGATHER_DEST="$PWD/$2" ;; esac fi break ;; esac done # verify given fanout # if [ $opt_error -eq 0 ] && ! [ $opt_error -eq 0 -a $_SGATHER_FANOUT -eq $_SGATHER_FANOUT -a $_SGATHER_FANOUT -gt 0 -a $_SGATHER_FANOUT -le 8 ] 2>/dev/null; then echo "$0: invalid fanout -- '$_SGATHER_FANOUT'" >&2 opt_error=1 fi # verify given timeout # if [ $opt_error -eq 0 ] && ! [ $opt_error -eq 0 -a $_SGATHER_TIMEOUT -eq $_SGATHER_TIMEOUT -a $_SGATHER_TIMEOUT -gt 0 ] 2>/dev/null; then echo "$0: invalid timeout -- '$_SGATHER_TIMEOUT'" >&2 opt_error=1 fi if [ $opt_error -ne 0 ]; then echo "Try \"sgather --help\" for more information" >&2 exit $opt_error fi verbose 1 "-----------------------------" verbose 1 "compress = $_SGATHER_COMPRESS" verbose 1 "force = $_SGATHER_FORCE" verbose 1 "fanout = $_SGATHER_FANOUT" verbose 1 "keep source = $_SGATHER_KEEP" verbose 1 "preserve = $_SGATHER_PRESERVE" verbose 1 "recursive = $_SGATHER_RECURSIVE" verbose 1 "timeout = $_SGATHER_TIMEOUT" verbose 1 "verbose = $_SGATHER_VERBOSE" verbose 1 "source = $_SGATHER_SOURCE" verbose 1 "dest = $_SGATHER_DEST" verbose 1 "-----------------------------" # check whether we're within a Slurm job # if [ -z $SLURM_JOBID ]; then echo "$0: error: Command only valid from within Slurm job" >&2 exit 1 fi verbose 1 "jobid = $SLURM_JOBID" verbose 1 "node_cnt = $SLURM_NNODES" verbose 1 "node_list = $SLURM_NODELIST" # check whether the destination file is node-global # (->no remote copying is necessary) # inx=0 while [ $inx -lt ${#GLOBAL_FILE[@]} ] do case $_SGATHER_DEST in ${GLOBAL_FILE[$inx]}) _SGATHER_REMOTE_COPY=false ;; esac inx=$((inx+1)) done verbose 1 "remote copy = $_SGATHER_REMOTE_COPY" # determine the batch host node via scontrol # tmp=$($SCONTROL show job $SLURM_JOBID | grep BatchHost) || exit $? _SGATHER_BATCHHOST=$(echo $tmp | cut -d '=' -f 2) # export control environment variables for subsequent call to itself # export _SGATHER_COMPRESS export _SGATHER_FORCE export _SGATHER_FANOUT export _SGATHER_KEEP export _SGATHER_PRESERVE export _SGATHER_RECURSIVE export _SGATHER_TIMEOUT export _SGATHER_VERBOSE export _SGATHER_SOURCE export _SGATHER_DEST export _SGATHER_REMOTE_COPY export _SGATHER_BATCHHOST export _SGATHER_SPAWNED=1 # spawn this script to all job nodes # # either in one step, if the destination file is node-global (remote copying # isn't necessary) ... # if ! $_SGATHER_REMOTE_COPY; then $SRUN --nodes=$SLURM_NNODES --ntasks-per-node=1 $0 || exit $? # ... or in multiple steps with regard to $_SGATHER_FANOUT # else nodelist=$($SCONTROL show hostnames $SLURM_NODELIST | sort) set $nodelist nodesublist="" nodesubcnt=0 while [ $# -gt 0 ]; do if [ -z $nodesublist ]; then nodesublist=$1 else nodesublist="$nodesublist,$1" fi nodesubcnt=$(( $nodesubcnt + 1 )) shift if [ $# -eq 0 -o $nodesubcnt -eq $_SGATHER_FANOUT ]; then $SRUN --nodelist="$nodesublist" --ntasks=$nodesubcnt --ntasks-per-node=1 $0 || exit $? nodesublist="" nodesubcnt=0 fi done fi else #_SGATHER_SPAWNED # check whether the source file exist # if [ ! -e $_SGATHER_SOURCE ]; then verbose 0 "error: Can't open $_SGATHER_SOURCE" >&2 if $_SGATHER_FORCE; then verbose 0 "$_SGATHER_SOURCE ignored" >&2 exit 0 else exit 1 fi fi # compose scp command # # prepend destination node name to destination file, if remote copying # is necessary # if $_SGATHER_REMOTE_COPY && [ $(hostname) != $_SGATHER_BATCHHOST ]; then _SGATHER_DEST="$_SGATHER_BATCHHOST:$_SGATHER_DEST" fi # append source node name to destination file _SGATHER_DEST="$_SGATHER_DEST.$(hostname)" scp_cmd="$SCP" if [ $_SGATHER_VERBOSE -eq 0 ]; then scp_cmd="$SCP -q" elif [ $_SGATHER_VERBOSE -ge 2 ]; then scp_cmd="$SCP -v" fi if $_SGATHER_RECURSIVE; then scp_cmd="$scp_cmd -r" fi if $_SGATHER_COMPRESS; then scp_cmd="$scp_cmd -C" fi if [ $_SGATHER_TIMEOUT -ne 0 ]; then scp_cmd="$scp_cmd -o ConnectTimeout=$_SGATHER_TIMEOUT" fi if $_SGATHER_PRESERVE; then scp_cmd="$scp_cmd -p" fi scp_cmd="$scp_cmd $_SGATHER_SOURCE $_SGATHER_DEST" # run scp # verbose 1 "executing \"$scp_cmd\"" $scp_cmd || exit $? # remove source file, if desired # if ! $_SGATHER_KEEP; then verbose 1 "removing $_SGATHER_SOURCE" $RM -r $_SGATHER_SOURCE || exit $? fi fi exit 0