[padb-devel] [padb commit] r40 - Make --full-report work better on non Quadrics systems by
codesite-noreply at google.com
codesite-noreply at google.com
Tue Jun 9 14:08:48 BST 2009
Author: apittman
Date: Tue Jun 9 06:08:33 2009
New Revision: 40
Modified:
trunk/src/padb
Log:
Make --full-report work better on non Quadrics systems by
removing a lot of harmless error messages along the way.
Modified: trunk/src/padb
==============================================================================
--- trunk/src/padb (original)
+++ trunk/src/padb Tue Jun 9 06:08:33 2009
@@ -35,6 +35,7 @@
# * Don't enable local-qsnet on non-qsnet systems.
# * inner_main() now uses callbacks for resource manager support.
# * --signal now takes names rather than numbers.
+# * Check job is valid when using the --full-report option.
# Version 2.2
# * Add a --core-stack option along with --core and --exe to extract stack
@@ -458,8 +459,8 @@
XXXX
--full-report=JOBID All of the above.
- --nostrip-below-main Don't strip stack traces below main.
- --nostrip-above-wait Don't strip stack traces about elan_waitWord.
+ --nostrip-below-main Don\'t strip stack traces below main.
+ --nostrip-above-wait Don\'t strip stack traces about elan_waitWord.
--proc-format Specify information to show about processes.
@@ -1430,7 +1431,7 @@
# vp's only, if it's not set then display a total for everyone.
if ( not $d ) {
- print("Statistics not valid\n");
+ print("QsNet Statistics not valid\n");
return;
}
@@ -2366,6 +2367,11 @@
sub open_get_data {
my ($filename) = @_;
+ # Simply return if called more than once.
+ if ( keys(%open_jobs) != 0 ) {
+ return;
+
+ }
my $hostname = hostname();
my $job;
my @out;
@@ -2379,6 +2385,11 @@
close OPEN;
}
+ # Handle being called multiple times, zero the hash every
+ # time we are called. Of course we could just return the
+ # existing hash which might be quicker.
+ %open_jobs = ();
+
foreach my $l (@out) {
chomp $l;
next if ( $l eq "" );
@@ -2389,26 +2400,19 @@
} else {
my @elems = split( /\|/, $l );
- # print "$#elems\X at elems\Y\n";
- if ( $#elems == 4 ) {
+ if ( $#elems == 6 ) {
- #print "@elems\n";
- } elsif ( $#elems == 6 ) {
-
- #print "@elems\n";
my $host = $elems[4];
$host =~ s/ //g;
$host =~ s/\t//g;
next if $host eq "Node";
$open_jobs{$job}{hosts}{$host}++;
- #print "Host is $host\n";
if ( $host eq $hostname ) {
my $name = $elems[1];
$name =~ /\[\[(\d+)\,(\d+)\]\,(\d+)\]/;
my $rank = $3;
- # my $rank = $elems[2];
my $pid = $elems[3];
$rank =~ s/ //g;
$pid =~ s/ //g;
@@ -2417,14 +2421,11 @@
}
}
- # print "$_";
}
if ( $conf{"verbose"} ) {
print Dumper \%open_jobs;
}
-
- # print keys %jobs;
}
sub open_get_jobs {
@@ -2950,6 +2951,10 @@
my $errors = 0;
+ my $report_errors = 1;
+
+ $report_errors = 0 if ($full_report);
+
my $pcmd = {
pid => -1,
in => "",
@@ -2998,7 +3003,9 @@
my $handle = $pcmd->{err};
while (<$handle>) {
my $line = $_;
- print( STDERR "Error ($jobid,$mode): $line" );
+ if ($report_errors) {
+ print( STDERR "Error ($jobid,$mode): $line" );
+ }
$errors++;
}
@@ -3015,7 +3022,10 @@
if ( $res != 0 ) {
my %status = rc_status($res);
if ( job_is_running($jobid) ) {
- printf("Failed to run parallel command (rc =
$status{rc})\n");
+ if ($report_errors) {
+ printf(
+ "Failed to run parallel command (rc =
$status{rc})\n");
+ }
} else {
printf("Job $jobid is no longer active\n");
return 1;
@@ -3333,6 +3343,14 @@
}
if ($full_report) {
+
+ if ( not job_is_running($full_report) ) {
+ printf( STDERR
+"Job $full_report is not active, use --show-jobs to see active jobs\n"
+ );
+ exit(1);
+ }
+
printf("padb version $version\n");
printf("full job report for job $full_report\n\n");
@@ -3342,7 +3360,7 @@
my $res;
$stats_total = 1;
$group = 1;
- $res = go_job( $full_report, undef );
+ $res = go_job( $full_report, "full-report" );
undef $stats_total;
undef $group;
More information about the padb-devel
mailing list