[padb-devel] [padb commit] r40 - Make --full-report work better on non Quadrics systems by

codesite-noreply at google.com codesite-noreply at google.com
Tue Jun 9 14:08:48 BST 2009


Author: apittman
Date: Tue Jun  9 06:08:33 2009
New Revision: 40

Modified:
    trunk/src/padb

Log:
Make --full-report work better on non Quadrics systems by
removing a lot of harmless error messages along the way.


Modified: trunk/src/padb
==============================================================================
--- trunk/src/padb	(original)
+++ trunk/src/padb	Tue Jun  9 06:08:33 2009
@@ -35,6 +35,7 @@
  #  * Don't enable local-qsnet on non-qsnet systems.
  #  * inner_main() now uses callbacks for resource manager support.
  #  * --signal now takes names rather than numbers.
+#  * Check job is valid when using the --full-report option.

  # Version 2.2
  #  * Add a --core-stack option along with --core and --exe to extract stack
@@ -458,8 +459,8 @@
  XXXX
     --full-report=JOBID All of the above.

-   --nostrip-below-main Don't strip stack traces below main.
-   --nostrip-above-wait Don't strip stack traces about elan_waitWord.
+   --nostrip-below-main Don\'t strip stack traces below main.
+   --nostrip-above-wait Don\'t strip stack traces about elan_waitWord.

     --proc-format       Specify information to show about processes.

@@ -1430,7 +1431,7 @@
      # vp's only, if it's not set then display a total for everyone.

      if ( not $d ) {
-        print("Statistics not valid\n");
+        print("QsNet Statistics not valid\n");
          return;
      }

@@ -2366,6 +2367,11 @@
  sub open_get_data {
      my ($filename) = @_;

+    # Simply return if called more than once.
+    if ( keys(%open_jobs) != 0 ) {
+        return;
+
+    }
      my $hostname = hostname();
      my $job;
      my @out;
@@ -2379,6 +2385,11 @@
          close OPEN;
      }

+    # Handle being called multiple times, zero the hash every
+    # time we are called.  Of course we could just return the
+    # existing hash which might be quicker.
+    %open_jobs = ();
+
      foreach my $l (@out) {
          chomp $l;
          next if ( $l eq "" );
@@ -2389,26 +2400,19 @@
          } else {
              my @elems = split( /\|/, $l );

-            # print "$#elems\X at elems\Y\n";
-            if ( $#elems == 4 ) {
+            if ( $#elems == 6 ) {

-                #print "@elems\n";
-            } elsif ( $#elems == 6 ) {
-
-                #print "@elems\n";
                  my $host = $elems[4];
                  $host =~ s/ //g;
                  $host =~ s/\t//g;
                  next if $host eq "Node";
                  $open_jobs{$job}{hosts}{$host}++;

-                #print "Host is $host\n";
                  if ( $host eq $hostname ) {
                      my $name = $elems[1];
                      $name =~ /\[\[(\d+)\,(\d+)\]\,(\d+)\]/;
                      my $rank = $3;

-                    #	    my $rank = $elems[2];
                      my $pid = $elems[3];
                      $rank =~ s/ //g;
                      $pid  =~ s/ //g;
@@ -2417,14 +2421,11 @@
              }
          }

-        # print "$_";
      }

      if ( $conf{"verbose"} ) {
          print Dumper \%open_jobs;
      }
-
-    #    print keys %jobs;
  }

  sub open_get_jobs {
@@ -2950,6 +2951,10 @@

      my $errors = 0;

+    my $report_errors = 1;
+
+    $report_errors = 0 if ($full_report);
+
      my $pcmd = {
          pid => -1,
          in  => "",
@@ -2998,7 +3003,9 @@
          my $handle = $pcmd->{err};
          while (<$handle>) {
              my $line = $_;
-            print( STDERR "Error ($jobid,$mode): $line" );
+            if ($report_errors) {
+                print( STDERR "Error ($jobid,$mode): $line" );
+            }
              $errors++;
          }

@@ -3015,7 +3022,10 @@
          if ( $res != 0 ) {
              my %status = rc_status($res);
              if ( job_is_running($jobid) ) {
-                printf("Failed to run parallel command (rc =  
$status{rc})\n");
+                if ($report_errors) {
+                    printf(
+                        "Failed to run parallel command (rc =  
$status{rc})\n");
+                }
              } else {
                  printf("Job $jobid is no longer active\n");
                  return 1;
@@ -3333,6 +3343,14 @@
      }

      if ($full_report) {
+
+        if ( not job_is_running($full_report) ) {
+            printf( STDERR
+"Job $full_report is not active, use --show-jobs to see active jobs\n"
+            );
+            exit(1);
+        }
+
          printf("padb version $version\n");
          printf("full job report for job $full_report\n\n");

@@ -3342,7 +3360,7 @@
          my $res;
          $stats_total = 1;
          $group       = 1;
-        $res         = go_job( $full_report, undef );
+        $res         = go_job( $full_report, "full-report" );
          undef $stats_total;
          undef $group;





More information about the padb-devel mailing list