[padb] r345 committed - Slurm: Try to pick a sensible (valid) default value for...
padb at googlecode.com
padb at googlecode.com
Wed Dec 2 16:34:58 GMT 2009
Revision: 345
Author: apittman
Date: Wed Dec 2 08:34:27 2009
Log: Slurm: Try to pick a sensible (valid) default value for
slurm_job_step rather than just using a value of zero.
Revert back to using zero if we can't find any trace of any
active steps.
Also convert slurm_setup_pcmd to slurm_setup_job.
http://code.google.com/p/padb/source/detail?r=345
Modified:
/trunk/src/padb
=======================================
--- /trunk/src/padb Wed Dec 2 06:18:09 2009
+++ /trunk/src/padb Wed Dec 2 08:34:27 2009
@@ -441,7 +441,7 @@
is_installed => \&slurm_is_installed,
get_active_jobs => \&slurm_get_jobs,
job_is_running => \&slurm_job_is_running,
- setup_pcmd => \&slurm_setup_pcmd,
+ setup_job => \&slurm_setup_job,
find_pids => \&slurm_find_pids,
require_inner_callback => 1,
};
@@ -519,7 +519,7 @@
$conf{prun_exittimeout} = '2m';
$conf{rmgr} = undef;
-$conf{slurm_job_step} = 0;
+$conf{slurm_job_step} = undef;
$conf{pbs_server} = undef;
@@ -552,6 +552,7 @@
my $EQUALS = qr{=}x;
my $SPACE = qr{\s+}x;
my $COLON = qr{:}x;
+my $PERIOD = qr{\.}x;
my $EMPTY_STRING = q{};
@@ -2472,11 +2473,41 @@
return ( $status eq 'running' );
}
-sub slurm_setup_pcmd {
- my $job = shift;
+sub slurm_setup_job {
+ my $job = shift;
+
+ # After we have selected a job id and decided to target it make a
+ # best-attempt effort to pick a sensible step_id. List all the
+ # step ids slurm thinks are running and pick the first one.
+ # Previously this value just defaulted to zero.
+ if ( not defined $conf{slurm_job_step} ) {
+ my @all_steps = slurp_cmd("squeue -s -o %i");
+ my @valid_steps;
+ foreach my $step (@all_steps) {
+ chomp $step;
+ next if $step eq "STEPID";
+ my ( $job_id, $job_step ) = split $PERIOD, $step;
+ next unless $job_id == $job;
+ push @valid_steps, $job_step;
+ }
+ if (@valid_steps) {
+ config_set_internal( 'slurm_job_step', $valid_steps[0] );
+ } else {
+ print
+ "Unable to determine any valid job steps, assuming step id
0\n";
+ config_set_internal( 'slurm_job_step', 0 );
+ }
+ }
+
my $cpus = slurm_job_to_ncpus($job);
my $nc = slurm_job_to_nodecount($job);
- return ( "srun --jobid=$job", $cpus, $nc );
+
+ my %pcmd;
+ $pcmd{nprocesses} = $cpus;
+ $pcmd{nhosts} = $nc;
+ $pcmd{command} = "srun --jobid=$job";
+ return %pcmd;
+
}
###############################################################################
@@ -5085,7 +5116,13 @@
}
foreach my $co (@conf_int) {
- check_int( $conf{$co} );
+
+ # Only check for defined values here, for some options only
+ # intergers are valid but the default value is undef which means
+ # padb should attempt to do the right thing.
+ if ( defined $conf{$co} ) {
+ check_int( $conf{$co} );
+ }
}
# Now go through all the config options and both verify they are
More information about the padb-devel
mailing list