[padb-devel] [padb] r143 committed - Add a mechanism for getting error strings back from the rank targettin...

codesite-noreply at google.com codesite-noreply at google.com
Mon Aug 24 16:24:11 BST 2009


Revision: 143
Author: apittman
Date: Mon Aug 24 08:23:48 2009
Log: Add a mechanism for getting error strings back from the rank targetting
code to the outer process and onto the user.  Each rank can report
error via a call to target_error($vp,$errstr), which then causes the errstr
to be passed back up the tree and eventually onto the user.
The data for this is reduced in the network properly using some new
merge_ranges() functions for merging node ranges.  Each rank can report
more than one error, errors are reported in alphabetical order with each
error message also stating which ranks received that error.

http://code.google.com/p/padb/source/detail?r=143

Modified:
  /trunk/src/padb

=======================================
--- /trunk/src/padb	Mon Aug 24 01:57:58 2009
+++ /trunk/src/padb	Mon Aug 24 08:23:48 2009
@@ -3470,6 +3470,14 @@
      # may not be the same thing as the request we are currently
      # sending.
      my $mode = $comm_data->{current_req}->{mode};
+
+    if ( defined $d->{target_error} ) {
+        printf("Warning: errors reported by some ranks\n========\n");
+        foreach my $error ( sort( keys( %{ $d->{target_error} } ) ) ) {
+            printf("$d->{target_error}{$error}: $error\n");
+        }
+        printf("========\n");
+    }
      if ( defined( $allfns{$mode}{out_handler} ) ) {
          $allfns{$mode}{out_handler}( $conf{mode_options}{$mode}, $d );
      } else {
@@ -3579,6 +3587,130 @@
      $cdata{event_cb}            = \&handle_event_from_socket;
      $comm_data->{sockets}{$new} = \%cdata;
  }
+
+sub sub_range_assemble {
+    my ( $lower, $upper ) = @_;
+
+    if ( $lower == $upper ) {
+        return $lower;
+    } else {
+        return "$lower-$upper";
+    }
+}
+
+sub add_value_to_range {
+    my ( $range, $value ) = @_;
+
+    if ( $range =~ m/^\[([\d\-\,]+)\]$/ ) {
+        $range = $1;
+    } else {
+        carp("Bad range $range");
+        printf("Bad range $range");
+        return undef;
+
+    }
+
+    my @parts;
+
+    my $added = 0;
+
+    # Walk through the individal contigous sub-ranges adding the
+    # value to any subrange it's adjacent to.  If it falls between two
+    # then insert it as a new subrange, if it's not found then add it
+    # after the last subrange.
+    # If a sub-range is extended upwards then it's possible it not
+    # meets the subsequent one so handle that case as well.
+
+    my $prev_part_upper;
+    my $prev_part_lower;
+
+    foreach my $part ( split( ",", $range ) ) {
+        my $lower;
+        my $upper;
+
+        if ( $part =~ m/^(\d+)$/ ) {
+            $lower = $1;
+            $upper = $1;
+        } elsif ( $part =~ m/^(\d+)\-(\d+)$/ ) {
+            $lower = $1;
+            $upper = $2;
+        } else {
+            carp("Bad range $range");
+            return undef;
+        }
+
+        if ( not $added ) {
+            if ( $lower == $value + 1 ) {
+                $lower--;
+                $added = 1;
+            } elsif ( $upper == $value - 1 ) {
+
+                # Extend the current part upwards to include the new value.
+                $upper++;
+                $added           = 1;
+                $prev_part_upper = $upper;
+                $prev_part_lower = $lower;
+                next;
+            } elsif ( $lower > $value ) {
+
+                # If we have skipped over the value then add it now.
+                # note that we are adding it here and hence cannot have  
extended
+                # the last sub-range.
+                push( @parts, $value );
+                $added = 1;
+            }
+        }
+
+        if ( defined $prev_part_upper ) {
+
+            # We have extended a segment.
+            if ( $prev_part_upper == $lower - 1 ) {
+                $part = sub_range_assemble( $prev_part_lower, $upper );
+                push( @parts, $part );
+            } else {
+                $part =
+                  sub_range_assemble( $prev_part_lower, $prev_part_upper );
+                push( @parts, $part );
+                $part = sub_range_assemble( $lower, $upper );
+                push( @parts, $part );
+            }
+            undef($prev_part_upper);
+            undef($prev_part_lower);
+        } else {
+            $part = sub_range_assemble( $lower, $upper );
+            push( @parts, $part );
+        }
+    }
+
+    if ( defined $prev_part_upper ) {
+        my $part = sub_range_assemble( $prev_part_lower, $prev_part_upper  
);
+        push( @parts, $part );
+    }
+
+    # If we haven't add it stick it on the end now.
+    if ( not $added ) {
+        push( @parts, $value );
+    }
+
+    my $newrange = join( ",", @parts );
+
+    return "[$newrange]";
+}
+
+# Merge two port ranges.  For now just do the simple thing,
+# this really needs to be re-visited from a scalability aspect however.
+sub merge_ranges {
+    my ( $left, $right ) = @_;
+
+    my ( $val, $range ) = shift_from_range($right);
+
+    while ( defined $val ) {
+        $left = add_value_to_range( $left, $val );
+        ( $val, $range ) = shift_from_range($range);
+    }
+
+    return "[$left]";
+}

  # "shift" a rank or port number from the standard spec format, returns the  
id
  # and the range with the first entry removed.  Returns both the first  
entry and
@@ -4263,6 +4395,7 @@
  }

  my %inner_output;
+my %inner_error;

  sub output {
      my ( $vp, $str ) = @_;
@@ -4270,6 +4403,18 @@
      push( @{ $inner_output{$vp} }, $str );

  }
+
+# Report a single string error for a given target rank.
+sub target_error {
+    my ( $rank, $error ) = @_;
+
+    if ( defined $inner_error{$error} ) {
+        $inner_error{$error} =
+          add_value_to_range( $inner_error{$error}, $rank );
+    } else {
+        $inner_error{$error} = "[$rank]";
+    }
+}

  sub p_die {
      my ( $vp, $str ) = @_;
@@ -5833,9 +5978,9 @@
              push( @all, $proc );
          } else {
              if ( defined $gdb->{error} ) {
-                output $vp, $gdb->{error};
+                target_error( $vp, $gdb->{error} );
              } else {
-                output $vp, "Failed to attach to process";
+                target_error( $vp, "Failed to attach to process" );
              }
          }

@@ -6400,8 +6545,6 @@
      # Merge this reply into the local one.
      $handle->{child_replys}++;

-    # $handle->{all_replys}{raw}{ $sd->{hostname} } = $r;
-
      # Combine the host responces.
      foreach my $status ( keys( %{ $r->{host_responce} } ) ) {
          foreach my $host ( keys( %{ $r->{host_responce}{$status} } ) ) {
@@ -6418,7 +6561,7 @@
          }
      }

-    # Combine the target process responces.
+    # Combine the target process responces from child.
      if ( exists $r->{target_output} ) {
          foreach my $tp ( keys( %{ $r->{target_output} } ) ) {
              $handle->{all_replys}->{target_output}{$tp} =
@@ -6426,12 +6569,29 @@
          }
      }

+    # Copy the target local responces.
      if ( exists $handle->{target_responce} ) {
          foreach my $tp ( keys( %{ $handle->{target_responce} } ) ) {
              $handle->{all_replys}->{target_responce}{$tp} =
                $handle->{target_responce}{$tp};
          }
      }
+
+    # Copy the network target errors into responce.
+    if ( exists $r->{target_error} ) {
+        $handle->{all_replys}->{target_error} = $r->{target_error};
+    }
+
+    # Merge in local target responces.
+    foreach my $key ( keys(%inner_error) ) {
+        if ( defined $handle->{all_replys}->{target_error}{$key} ) {
+            $handle->{all_replys}->{target_error}{$key} =
+              merge_ranges( $handle->{all_replys}->{target_error}{$key},
+                $inner_error{$key} );
+        } else {
+            $handle->{all_replys}->{target_error}{$key} =  
$inner_error{$key};
+        }
+    }

      # Save any output we've got from this node.
      foreach my $key ( keys(%inner_output) ) {
@@ -6439,6 +6599,7 @@
      }

      %inner_output = ();
+    %inner_error  = ();

      # If this isn't the last child to signon don't reply up-stream yet.
      if ( $handle->{child_replys} != $handle->{children} ) {
@@ -6457,6 +6618,7 @@
      $handle->{all_replys}      = undef;
      $handle->{child_replys}    = 0;
      $handle->{target_responce} = undef;
+    $handle->{target_error}    = undef;
  }

  # Convert from a pid to a command name and do it in a safe manner to avoid
@@ -6673,7 +6835,6 @@
      }
      if ($res) {
          $netdata->{target_responce} = $res;
-        $netdata->{all_replys}{target_responce} = $res;
      }

      return;
@@ -6698,7 +6859,7 @@
      $cmd->{host_responce}{ok}{ $confInner{hostname} } = 1;

      my $reply = my_encode($cmd);
-    $netdata->{parent}->{socket}->printf("$reply\n");
+    $netdata->{parent}->{socket}->print("$reply\n");
  }

  # Process a single line of input onto a socket we are
@@ -6743,9 +6904,15 @@
              $res->{target_output}{$key} = $inner_output{$key};
          }

-        %inner_output = ();
+        if (%inner_error) {
+            $res->{target_error} = \%inner_error;
+        }

          reply_to_parent( $netdata, $res );
+
+        # Clear down the local inputs.
+        %inner_error                = ();
+        %inner_output               = ();
          $netdata->{target_responce} = undef;

          if ( $netdata->{shutdown} ) {




More information about the padb-devel mailing list