#! /usr/bin/perl -w
###############################################################################
#
# qstat - queries slurm jobs in familar pbs format.
#
#
###############################################################################
#  Copyright (C) 2007 The Regents of the University of California.
#  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
#  Written by Danny Auble <auble1@llnl.gov>.
#  CODE-OCEC-09-009. All rights reserved.
#
#  This file is part of Slurm, a resource management program.
#  For details, see <https://slurm.schedmd.com/>.
#  Please also read the included file: DISCLAIMER.
#
#  Slurm is free software; you can redistribute it and/or modify it under
#  the terms of the GNU General Public License as published by the Free
#  Software Foundation; either version 2 of the License, or (at your option)
#  any later version.
#
#  In addition, as a special exception, the copyright holders give permission
#  to link the code of portions of this program with the OpenSSL library under
#  certain conditions as described in each individual source file, and
#  distribute linked combinations including the two. You must obey the GNU
#  General Public License in all respects for all of the code used other than
#  OpenSSL. If you modify file(s) with this exception, you may extend this
#  exception to your version of the file(s), but you are not obligated to do
#  so. If you do not wish to do so, delete this exception statement from your
#  version.  If you delete this exception statement from all source files in
#  the program, then also delete it here.
#
#  Slurm is distributed in the hope that it will be useful, but WITHOUT ANY
#  WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
#  FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
#  details.
#
#  You should have received a copy of the GNU General Public License along
#  with Slurm; if not, write to the Free Software Foundation, Inc.,
#  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA.
#
#  Based off code with permission copyright 2006, 2007 Cluster Resources, Inc.
###############################################################################


use strict;
use FindBin;
use Getopt::Long 2.24 qw(:config no_ignore_case);
use lib qw(/usr/lib/x86_64-linux-gnu/perl/5.30.0);
use autouse 'Pod::Usage' => qw(pod2usage);
use Slurm ':all';
use Slurmdb ':all'; # needed for getting the correct cluster dims

# Parse Command Line Arguments
my (
    $all,              $diskReservation, $executable,   $full,
    $giga,             $help,            $idle,         $mega,
    $man,              $nodes,           $one,          $queueList,
    $queueStatus,      $running,         $serverStatus, $siteSpecific,
    $userList,         $hostname,        $rc,           $header_printed
);

GetOptions(
	'a'      => \$all,
	'B=s'    => \$serverStatus,
	'e'      => \$executable,
	'f'      => \$full,
	'help|?' => \$help,
	'i'      => \$idle,
	'G'      => \$giga,
	'man'    => \$man,
	'M'      => \$mega,
	'n'      => \$nodes,
	'1'      => \$one,
	'r'      => \$running,
	'R'      => \$diskReservation,
	'q'      => \$queueStatus,
	'Q:s'    => \$queueList,
	'u=s'    => \$userList,
	'W=s'    => \$siteSpecific,
	) or pod2usage(2);

$rc = 153; # if we don't find what we are looking for return this.

# Display usage if necessary
pod2usage(0) if $help;
if ($man)
{
	if ($< == 0)    # Cannot invoke perldoc as root
	{
		my $id = eval { getpwnam("nobody") };
		$id = eval { getpwnam("nouser") } unless defined $id;
		$id = -2                          unless defined $id;
		$<  = $id;
	}
	$> = $<;                         # Disengage setuid
	$ENV{PATH} = "/bin:/usr/bin";    # Untaint PATH
	delete @ENV{'IFS', 'CDPATH', 'ENV', 'BASH_ENV'};
	if ($0 =~ /^([-\/\w\.]+)$/) { $0 = $1; }    # Untaint $0
	else { die "Illegal characters were found in \$0 ($0)\n"; }
	pod2usage(-exitstatus => 0, -verbose => 2);
}

# Use sole remaining argument as jobIds

if($^O eq 'aix') {
	$hostname = `hostname`;
} else {
	$hostname = `hostname -f`;
}
chomp $hostname;

# Handle unsupported arguments
#unsupportedOption("-e", DIE) if $executable;
#unsupportedOption("-W", DIE) if $siteSpecific;
#unsupportedOption("-R", DIE) if $diskReservation;
#unsupportedOption("-q", DIE) if $queueStatus;
#unsupportedOption("-Q <queue_destination>", DIE) if $queueDestination;
#unsupportedOption("-B", DIE) if $serverStatus;

# Build command

# foreach my $job (@{$resp->{job_array}}) {
# 	while(my ($key, $value) = each(%$job)) {
# 		print "$key => $value\n";
# 	}
# }
my $now_time = time();
my $job_flags = SHOW_ALL | SHOW_DETAIL;

if(defined($queueList)) {
	my @queueIds = split(/,/, $queueList) if $queueList;

	my $resp = Slurm->load_partitions(0, SHOW_ALL);
	if(!$resp) {
		die "Problem loading jobs.\n";
	}

	my $line = 0;
	foreach my $part (@{$resp->{partition_array}}) {
		if (@queueIds) {
			next unless grep /^$part->{'name'}$/, @queueIds;
		}

		if ($full) { # Full
			print_part_full($part);
		} else { # Brief
			print_part_brief($part, $line);
			$line++;
		}
		$rc = 0;
	}
} elsif($queueStatus) {
	my $jresp = Slurm->load_jobs(0, $job_flags);
	die "Problem loading jobs.\n" if(!$jresp);
	my $resp = Slurm->load_partitions(0, SHOW_ALL);
	die "Problem loading partitions.\n" if(!$resp);
	my $total_running = 0;
	my $total_queued = 0;
	my $line = 0;
	foreach my $part (@{$resp->{partition_array}}) {
		$part->{'running_jobs'} = 0;
		$part->{'queued_jobs'} = 0;
		foreach my $job (@{$jresp->{job_array}}) {
			next if($job->{'partition'} ne $part->{'name'});
			$part->{'running_jobs'}++
				if($job->{'job_state'} == JOB_RUNNING);
			$part->{'queued_jobs'}++
				if($job->{'job_state'} == JOB_PENDING);
		}
		$total_running += $part->{'running_jobs'};
		$total_queued += $part->{'queued_jobs'};
		print_part_limits($part, $line);
		$line++;
        }
	printf("                                               ----- -----\n");
	printf("                                               %5d %5d\n",
	       $total_running, $total_queued);
} else {
	my @jobIds = @ARGV;
	my @userIds = split(/,/, $userList) if $userList;

	my $resp = Slurm->load_jobs(0, $job_flags);
	if(!$resp) {
		die "Problem loading jobs.\n";
	}

	my $line = 0;
	foreach my $job (@{$resp->{job_array}}) {
		my $use_time      = $now_time;
		my $state = $job->{'job_state'};
		if($job->{'end_time'} && $job->{'end_time'} < $now_time) {
			$use_time = $job->{'end_time'};
		}
		$job->{'statPSUtl'} = job_time_used($job);
		$job->{'aWDuration'} = $job->{'statPSUtl'};

		$job->{'allocNodeList'} = $job->{'nodes'} || "--";
		$job->{'stateCode'} = stateCode($job->{'job_state'});
		$job->{'user_name'} = getpwuid($job->{'user_id'}) || "nobody";
		$job->{'name'} = "Allocation" if !$job->{'name'};

		# Filter jobs according to options and arguments
		if (@jobIds) {
			next unless grep /^$job->{'job_id'}$/, @jobIds;
		} else {
			if ($running) {
				next unless ($job->{'stateCode'} eq 'R'
					     || $job->{'stateCode'} eq 'S');
			}
			if ($idle) {
				next unless ($job->{'stateCode'} eq 'Q');
			}
			if (@userIds) {
				next unless
					grep /^$job->{'user_name'}$/, @userIds;
			}
		}

		if ($all || $idle || $nodes || $running
		    || $giga || $mega || $userList) { # Normal
			print_job_select($job);
		} elsif ($full) { # Full
			print_job_full($job);
		} else { # Brief
			print_job_brief($job, $line);
			$line++;
		}
		$rc = 0;
	}

	# return 0 even if no records printed when using -u flag
	if (@userIds) {
		$rc = 0;
	}
}

# Exit with status code
exit $rc;

################################################################################
# $stateCode = stateCode($state)
# Converts a slurm state into a one-character pbs state code
################################################################################
sub stateCode
{
	my ($state) = @_;

	if(!defined($state)) {
		return 'U';
	}
	my $base_state = $state & JOB_STATE_BASE;
	if ($base_state == JOB_COMPLETE
		|| $base_state == JOB_CANCELLED
		|| $base_state == JOB_TIMEOUT
		|| $base_state == JOB_NODE_FAIL
		|| $base_state == JOB_PREEMPTED
		|| $base_state == JOB_BOOT_FAIL
		|| $base_state == JOB_FAILED)    { return 'C' }
	elsif ($base_state == JOB_RUNNING)   { return 'R' }
	elsif ($base_state == JOB_PENDING)   { return 'Q' }
	elsif ($base_state == JOB_SUSPENDED) { return 'S' }
	else                                 { return 'U' }   # Unknown
	return 'U';
}


################################################################################
# $hhmm = hhmm($hhmmss)
# Converts a duration in hhmmss to hhmm
################################################################################
sub hhmm
{
	my ($hhmmss) = @_;

	if ($hhmmss == INFINITE) {
		return "Infinite";
	}

	# Convert hhmmss to duration in minutes
	$hhmmss = 0 unless $hhmmss;
	$hhmmss =~ /(?:(\d+):)?(?:(\d+):)?([\d\.]+)/;
	my ($hh, $mm, $ss) = ($1 || 0, $2 || 0, $3 || 0);

	my $duration = int($ss / 60);
	$duration += $mm * 1;
	$duration += $hh * 60;

	# Convert duration in minutes to hhmm
	my $hours = int($duration / 60);
	$duration = $duration - $hours * 60;
	my $minutes = $duration;
	my $hhmm = sprintf('%02d:%02d', $hours, $minutes);
	return $hhmm;
}

################################################################################
# $hhmmss = hhmmss($hhmmss)
# Converts a duration in hhmmss to hhmmss
################################################################################
sub hhmmss
{
	my ($hhmmss) = @_;
	if ($hhmmss == INFINITE) {
		return "Infinite";
	}
	# Convert hhmmss to duration in seconds
	$hhmmss = 0 unless $hhmmss;
	$hhmmss =~ /(?:(\d+):)?(?:(\d+):)?([\d\.]+)/;
	my ($hh, $mm, $ss) = ($1 || 0, $2 || 0, $3 || 0);

	my $duration = int($ss);
	$duration += $mm * 60;
	$duration += $hh * 3600;

	# Convert duration in seconds to hhmmss
	my $hours = int($duration / 3600);
	$duration = $duration - $hours * 3600;
	my $minutes = int($duration / 60);
	$duration = $duration - $minutes * 60;
	my $seconds = $duration;
	my $hhmmss2 = sprintf('%02d:%02d:%02d', $hours, $minutes, $seconds);
	return $hhmmss2;
}

################################################################################
# $ddhhmm = ddhhmm($hhmmss)
# Converts a duration in hhmmss to ddhhmm
################################################################################
sub ddhhmm
{
	my ($hhmmss) = @_;

	if ($hhmmss && $hhmmss == INFINITE) {
		return "Infinite";
	}
	# Convert hhmmss to duration in minutes
	$hhmmss = 0 unless $hhmmss;
	$hhmmss =~ /(?:(\d+):)?(?:(\d+):)?([\d\.]+)/;
	my ($hh, $mm, $ss) = ($1 || 0, $2 || 0, $3 || 0);

	my $duration = int($ss / 60);
	$duration += $mm * 1;
	$duration += $hh * 60;

	# Convert duration in minutes to ddhhmm
	my $days = int($duration / 1440);
	$duration = $duration - $days * 1440;
	my $hours = int($duration / 60);
	$duration = $duration - $hours * 60;
	my $minutes = $duration;
	my $ddhhmm = sprintf('%02d:%02d:%02d', $days, $hours, $minutes);
	return $ddhhmm;
}

################################################################################
# $humanReadableTime = hRTime($epochTime)
# Converts an epoch time into a human readable time
################################################################################
sub hRTime
{
	my ($epochTime) = @_;

	if ($epochTime == INFINITE) {
		return "Infinite";
	}

	return scalar localtime $epochTime;
}

sub job_time_used
{
	my ($job) = @_;

	my $end_time;

	return 0 if ($job->{'start_time'} == 0)
		|| ($job->{'job_state'} == JOB_PENDING);

	return $job->{'pre_sus_time'} if $job->{'job_state'} == JOB_SUSPENDED;

	if (($job->{'job_state'} == JOB_RUNNING)
	    ||  ($job->{'end_time'} == 0)) {
		$end_time = time;
	} else {
		$end_time = $job->{'end_time'};
	}

	return ($end_time - $job->{'suspend_time'} + $job->{'pre_sus_time'})
		if $job->{'suspend_time'};
	return ($end_time - $job->{'start_time'});
}

sub yes_no
{
	my ($query) = @_;
	return "yes" if $query;
	return "no";
}

sub en_dis
{
	my ($query) = @_;
	return "E" if $query;
	return "D";
}

sub running_stopped
{
	my ($query) = @_;
	return "R" if $query;
	return "S";
}

sub get_exec_host
{
	my ($job) = @_;

	my $execHost = "--";
	if ($job->{'nodes'} && $job->{'job_resrcs'}) {
		my @allocNodes = ();
		my $hl = Slurm::Hostlist::create($job->{'nodes'});
		my $inx = 0;
		my $cpu_cnt = 0;
		while((my $host = Slurm::Hostlist::shift($hl))) {
			push(@allocNodes, "$host/" .
			     Slurm->job_cpus_allocated_on_node_id(
				     $job->{'job_resrcs'}, $inx++));
		}
		$execHost = join '+', @allocNodes;
	}
	return $execHost;
}

###############################################################################
# Job Functions
###############################################################################
sub print_job_brief
{
	my ($job, $line_num) = @_;

	if(!$line_num) {
		printf("%-19s %-16s %-15s %-8s %-1s %-15s\n",
		       "Job id", "Name", "Username", "Time Use",  "S", "Queue");
		printf("%-19s %-16s %-15s %-8s %-1s %-15s\n",
		       '-' x 19, '-' x 16, '-' x 15, '-' x 8, '-' x 1, '-' x 15);
	}
	printf("%-19.19s %-16.16s %-15.15s %-8.8s %-1.1s %-15.15s\n",
	       $job->{'job_id'}, $job->{'name'}, $job->{'user_name'},
	       ddhhmm($job->{'statPSUtl'}), $job->{'stateCode'},
	       $job->{'partition'});
}

sub print_job_select
{
	my ($job) = @_;

	my $sessID = "--";
	my $execHost;

	if (!defined $header_printed) {
		print "\n${hostname}:\n";

		printf("%-20s %-8s %-8s %-20s %-6s %-5s %-5s %-6s %-5s %-1s %-5s\n",
		       "", "", "", "", "", "", "", "Req'd", "Req'd", "", "Elap");
		printf(
			"%-20s %-8s %-8s %-20s %-6s %-5s %-5s %-6s %-5s %-1s %-5s\n",
			"Job id", "Username", "Queue", "Name", "SessID", "NDS",
			"TSK",    "Memory",   "Time Use",  "S",       "Time"
			);
		printf(
			"%-20s %-8s %-8s %-20s %-6s %-5s %-5s %-6s %-5s %-1s %-5s\n",
			'-' x 20, '-' x 8, '-' x 8, '-' x 20, '-' x 6, '-' x 5,
			'-' x 5,  '-' x 6, '-' x 5, '-',      '-' x 5
			);
		$header_printed = 1;
	}
	$execHost = get_exec_host($job) if $nodes;

	printf("%-20.20s %-8.8s %-8.8s %-20.20s " .
	       "%-6.6s %5.5s %5.5s %6.6s %-5.5s %-1s %-5.5s",
	       $job->{'job_id'},
	       $job->{'user_name'},
	       $job->{'partition'},
	       $job->{'name'},
	       $sessID,
	       $job->{'num_nodes'} || "--",
	       $job->{'num_cpus'} || "--",
	       $job->{'job_min_memory'} || "--",
	       hhmm($job->{'time_limit'} * 60),
	       $job->{'stateCode'},
	       hhmm($job->{'aWDuration'}));

	if($execHost) {
		if(!$one) {
			printf("\n");
		}
		printf("   %s\n", $execHost);
	} else {
		printf("\n");
	}
}

sub print_job_full
{
	my ($job) = @_;
	# Print the job attributes
	printf("Job Id:\t%s\n", $job->{'job_id'});
	printf("\tJob_Name = %s\n", $job->{'name'}) if $job->{'name'};
	printf("\tJob_Owner = %s@%s\n",
	       $job->{'user_name'}, $job->{'alloc_node'});

	printf "\tinteractive = True\n" if !$job->{'batch_flag'};

	printf("\tjob_state = %s\n", $job->{'stateCode'});
	printf("\tqueue = %s\n", $job->{'partition'});

	printf("\tqtime = %s\n", hRTime($job->{'submit_time'}));
	printf("\tmtime = %s\n", hRTime($job->{'start_time'}))
		if $job->{'start_time'};
	printf("\tctime = %s\n", hRTime($job->{'end_time'}))
		if $job->{'end_time'};

	printf("\tAccount_Name = %s\n", $job->{'account'}) if $job->{'account'};

	my $execHost = get_exec_host($job);
	printf("\texec_host = %s\n", $execHost) if $execHost ne '--';

	printf("\tPriority = %u\n", $job->{'priority'});
	printf("\teuser = %s(%d)\n", $job->{'user_name'}, $job->{'user_id'});

	# can't run getgrgid inside printf it appears the input gets set to
	# x if ran there.
	my $user_group = getgrgid($job->{'group_id'});
	printf("\tegroup = %s(%d)\n", $user_group, $job->{'group_id'});

	printf("\tResource_List.walltime = %s\n", hhmmss($job->{'time_limit'} * 60));
	printf("\tResource_List.nodect = %d\n", $job->{'num_nodes'})
		if $job->{'num_nodes'};
	printf("\tResource_List.ncpus = %s\n", $job->{'num_cpus'})
		if $job->{'num_cpus'};

	if ($job->{'reqNodes'}) {
		my $nodeExpr = $job->{'reqNodes'};
		$nodeExpr .= ":ppn=" . $job->{'ntasks_per_node'}
		        if $job->{'ntasks_per_node'};

		printf("\tResource_List.nodes = %s\n", $nodeExpr);
	}

	print "\n";
}

###############################################################################
# Partition Functions
###############################################################################
sub print_part_brief
{
	my ($part, $line_num) = @_;

	if(!$line_num) {
		printf("%-16s %5s %5s %5s %5s %5s %5s %5s %5s %5s %5s %1s\n",
		       "Queue", "Max", "Tot", "Ena",  "Str", "Que", "Run",
		       "Hld", "Wat", "Trn", "Ext", "T");
		printf("%-16s %5s %5s %5s %5s %5s %5s %5s %5s %5s %5s %1s\n",
		       "----------------", "---", "---", "---",  "---", "---",
		       "---", "---", "---", "---", "---", "-");
	}
	printf("%-16.16s %5.5s %5.5s %5.5s %5.5s %5.5s %5.5s %5.5s " .
	       "%5.5s %5.5s %5.5s %1.1s\n",
	       $part->{'name'}, '?', '?', yes_no($part->{'state_up'}),
	       yes_no($part->{'state_up'}), '?', '?', '?', '?', '?', '?',
	       en_dis($part->{'state_up'}));
}

sub print_part_full
{
	my ($part) = @_;
	# Print the part attributes
	printf("Queue:\t%s\n", $part->{'name'});
	printf("\tqueue_type = Execution\n");
	printf("\tresources_default.nodes = %d\n", $part->{'total_nodes'});

	printf("\tenabled = %s\n", yes_no($part->{'state_up'}));

	printf("\tstarted = %s\n", yes_no($part->{'state_up'}));

	print "\n";
}

sub print_part_limits
{
	my ($part, $line_num) = @_;

	if(!$line_num) {
		printf("%-16s %6s %8s %8s %4s  %3s %3s %2s %5s\n",
		       "Queue", "Memory", "CPU Time", "Walltime",
		       "Node", "Run", "Que", "Lm", "State");
		printf("%-16s %6s %8s %8s %4s  %3s %3s %2s %5s\n",
		       "----------------", "------", "--------", "--------",
		       "----", "---", "---", "--", "-----");
	}


	printf("%-16.16s   --      --    ", $part->{'name'});
	if($part->{'max_time'} != INFINITE) {
		print(hhmmss($part->{'max_time'}*60));
	} else {
		printf("   --    ");

	}

	if($part->{'max_nodes'} != INFINITE) {
		printf("%4u  ", $part->{'max_nodes'});
	} else {
		printf("  --  ");
	}

	printf("%3u %3u --  %1.1s %1.1s \n", $part->{'running_jobs'},
	       $part->{'queued_jobs'}, en_dis($part->{'state_up'}),
	       running_stopped($part->{'state_up'}));
}


##############################################################################

__END__

=head1 NAME

B<qstat> - display job/partition information in a familiar pbs format

=head1 SYNOPSIS

B<qstat> [B<-f>] [B<-a>|B<-i>|B<-r>] [B<-n> [B<-1>]] [B<-G>|B<-M>] [B<-u> I<user_list>] [B<-? | --help>] [B<--man>] [I<job_id>...]

B<qstat> -Q [-f]

B<qstat> -q

=head1 DESCRIPTION

The B<qstat> command displays information about jobs.

=head1 OPTIONS

=over 4

=item B<-a>

Displays all jobs in a single-line format. See the STANDARD OUTPUT section for format details.

=item B<-i>

Displays information about idle jobs. This includes jobs which are queued or held.

=item B<-f>

Displays the full information for each selected job in a multi-line format. See the STANDARD OUTPUT section for format details.

=item B<-G>

Display size information in gigabytes.

=item B<-M>

Show size information, disk or memory in mega-words.  A word is considered to be 8 bytes.

=item B<-n>

Displays nodes allocated to a job in addition to the basic information.

=item B<-1>

In combination with -n, the -1 option puts all of the nodes on the same line as the job id.

=item B<-r>

Displays information about running jobs. This includes jobs which are running or suspended.

=item B<-u> I<user_list>

Display job information for all jobs owned by the specified user(s). The format of I<user_list> is: I<user_name>[,I<user_name>...].

=item B<-? | --help>

brief help message

=item B<--man>

full documentation

=back

=head1 STANDARD OUTPUT

Displaying Job Status

If the -a, -i, -f, -r, -u, -n, -G, and -M options are not specified, the brief single-line display format is used. The following items are displayed on a single line, in the specified order, separated by white space:

=over 4

=item the job id

=item the job name

=item the job owner

=item the cpu time used

=item the job state

C -  Job is completed after having run
E -  Job is exiting after having run.
H -  Job is held.
Q -  job is queued, eligible to run or routed.
R -  job is running.
T -  job is being moved to new location.
W -  job is waiting for its execution time
(-a option) to be reached.
S -  job is suspended.

=item the queue that the job is in

=back

If the -f option is specified, the multi-line display format is used. The output for each job consists of the header line:
B<Job Id>:  job identifier
followed by one line per job attribute of the form:
B<attribute_name = value>

If any of the options -a, -i, -r, -u, -n, -G or -M are specified, the normal single-line display format is used. The following items are displayed on a single line, in the specified order, separated by white space:

=over 4

=item the job id

=item the job owner

=item the queue the job is in

=item the job name

=item the session id (if the job is running)

=item the number of nodes requested by the job

=item the number of cpus or tasks requested by the job

=item the amount of memory requested by the job

=item either the cpu time, if specified, or wall time requested  by the job, (in hh:mm)

=item the job state

=item The amount of cpu time or wall time used by the job (in hh:mm)

=back

=head1 EXIT STATUS

On success, B<qstat> will exit with a value of zero. On failure, B<qstat> will exit with a value greater than zero.

=cut