[padb-devel] [padb] r124 committed - Tidy up the slurm interface code, query host count for the job...
codesite-noreply at google.com
codesite-noreply at google.com
Wed Aug 19 21:16:07 BST 2009
Revision: 124
Author: apittman
Date: Wed Aug 19 13:15:00 2009
Log: Tidy up the slurm interface code, query host count for the job
rather than the job step which is probably correct as shadow jobs
only take a job. Make the code run cleanly with warnings enabled.
http://code.google.com/p/padb/source/detail?r=124
Modified:
/branches/full-duplex/src/padb
=======================================
--- /branches/full-duplex/src/padb Wed Aug 19 07:28:24 2009
+++ /branches/full-duplex/src/padb Wed Aug 19 13:15:00 2009
@@ -2193,15 +2193,20 @@
return @res;
}
+# Query the process count for the "step" as that's how many
+# processes we are going to be looking for.
sub slurm_job_to_ncpus {
my $job = shift;
- my @steps = `squeue -s -o "%i %A" 2>/dev/null`;
+ my $s = "$job." . $conf{"slurm-job-step"};
+ my @steps = `squeue -s $s -o "%i %A" 2>/dev/null`;
return undef if ( $? != 0 );
-# The %A option is new so ensure we have the TASKS output before we
believe what we see here...
+ # The %A option is new so ensure we have the TASKS output
+ # before we believe what we see here...
+ # Mind you %A is several years old now so if it's not there
+ # we probably can't do anything anyway.
my $tasks;
my $have_tasks = 0;
- my $s = "$job." . $conf{"slurm-job-step"};
foreach my $step (@steps) {
my ( $step, $cpus ) = split( " ", $step );
$tasks = $cpus if ( $step eq $s );
@@ -2211,19 +2216,35 @@
return undef;
}
+# Query the nodecount for the "job" as that is what we shall be running on.
+sub slurm_job_to_nodecount {
+ my $job = shift;
+ my @jobs = `squeue -o "%i %D" 2>/dev/null`;
+ return undef if ( $? != 0 );
+
+ foreach my $step (@jobs) {
+ my ( $left, $right ) = split( " ", $step );
+ return $right if ( $left eq $job );
+ }
+ return undef;
+}
+
+# Query the node list for the "step" which isn't the same as the node list
+# for the job, care should be taken if using this function to ensure this
+# is correct.
+# This functions isn't used currently.
sub slurm_job_to_nodelist {
my $job = shift;
- my @steps = `squeue -s -o "%i %N" 2>/dev/null`;
+ my $s = "$job." . $conf{"slurm-job-step"};
+ my @steps = `squeue -s $s -o "%i %N" 2>/dev/null`;
return undef if ( $? != 0 );
- my $hosts;
- my $s = "$job." . $conf{"slurm-job-step"};
foreach my $step (@steps) {
my ( $left, $right ) = split( " ", $step );
- $hosts = $right if ( $left eq $s );
+ return $right if ( $left eq $s );
}
- return $hosts;
+ return undef;
}
sub slurm_job_is_running {
@@ -2234,10 +2255,9 @@
}
sub slurm_setup_pcmd {
- my $job = shift;
- my $cpus = slurm_job_to_ncpus($job);
- my @nodes = slurm_job_to_nodelist($job);
- my $nc = $#nodes + 1;
+ my $job = shift;
+ my $cpus = slurm_job_to_ncpus($job);
+ my $nc = slurm_job_to_nodecount($job);
return ( "srun --jobid=$job", $cpus, $nc );
}
@@ -6318,7 +6338,7 @@
foreach my $proc (@procs) {
my ( $pid, $job, $step, $local, $global ) = split( " ", $proc );
next if ( $global eq "-" );
- next unless ( $job == $jobid );
+ next unless ( $job eq $jobid );
next unless ( $step == $confInner{"slurm-job-step"} );
$gids{$pid} = $global;
}
@@ -6380,16 +6400,19 @@
if (%gids) {
$vp = vp_from_pid( \%gids, \%resmgr, $pid );
- debug $vp, "Found $vp from $pid using scontrol listpids";
+
+ # debug $vp, "Found $vp from $pid using scontrol listpids";
}
if ( not defined $vp ) {
my %env = get_remote_env($pid);
- debug undef,
+ if (0) {
+ debug undef,
"Checking slurm pid: $pid, job $env{SLURM_JOBID}, step $env{SLURM_STEPID},
proc $env{SLURM_PROCID}, script $script";
- debug undef,
+ debug undef,
"Checking rms pid: $pid, job $env{RMS_JOBID}, proc $env{RMS_PROCID},
script $script";
+ }
if ( $env{SLURM_JOBID} eq $jobid ) {
if ( $env{SLURM_STEPID} eq $confInner{"slurm-job-step"} ) {
@@ -6938,6 +6961,7 @@
$confInner{"edb"} = find_edb();
$confInner{"minfo"} = find_minfo();
$confInner{"hostname"} = hostname();
+ $confInner{"scripts"} = "bash,sh,dash,ash,perl,xterm";
# Local vars to help with command line parsing
my $outerloc;
More information about the padb-devel
mailing list