[padb-devel] [padb] r117 committed - Add some more debugging logs, implement a inner timeout where...

codesite-noreply at google.com codesite-noreply at google.com
Tue Aug 4 19:21:10 BST 2009


Revision: 117
Author: apittman
Date: Tue Aug  4 11:20:27 2009
Log: Add some more debugging logs, implement a inner timeout where
if it isn't contacted by the outer in thirty seconds it exits
to avoid leaving around stale processes (and hence jobs).

http://code.google.com/p/padb/source/detail?r=117

Modified:
  /branches/full-duplex/src/padb

=======================================
--- /branches/full-duplex/src/padb	Sun Jul 19 13:53:38 2009
+++ /branches/full-duplex/src/padb	Tue Aug  4 11:20:27 2009
@@ -584,7 +584,7 @@

  sub debug_log {
      my ( $type, $handle, $str, @params ) = @_;
-    if ( not defined $debugModes{$type} ) {
+    if ( not exists $debugModes{$type} ) {
          printf("Unknown debug mode: $type\n");
          exit(1);
      }
@@ -598,10 +598,12 @@

  # Valid debug modes, a full list is maintained here so using unexpected
  # ones can generate warnings.
-$debugModes{"full-duplex"} = 0;
-$debugModes{"show-cmd"}    = 0;
-$debugModes{"all"}         = 0;
-$debugModes{"tree"}        = 0;
+$debugModes{"full-duplex"} = undef;
+$debugModes{"show-cmd"}    = undef;
+$debugModes{"all"}         = undef;
+$debugModes{"tree"}        = undef;
+$debugModes{"verbose"}     = undef;
+$debugModes{"signon"}      = undef;

  sub parse_args_outer {

@@ -657,7 +659,7 @@
      if ( defined $debugflag ) {
          foreach my $f ( split( ",", $debugflag ) ) {
              my ( $name, $v ) = split( "=", $f );
-            if ( defined $debugModes{$name} ) {
+            if ( exists $debugModes{$name} ) {
                  $debugModes{$name} = defined($v) ? $v : "basic";
              } else {
                  printf("Attempt to set unknown debug flag \"$name\".\n");
@@ -3216,6 +3218,9 @@
  # Called once when we have the socket details of the last child.
  sub connect_to_children {
      my $comm_data = shift;
+
+    debug_log( "signon", undef, "Received last signon, connecting to  
inner" );
+
      @{ $comm_data->{host_ids} } = sort( keys( %{ $comm_data->{remote} } )  
);
      $comm_data->{connection_tree} =
        generate_comm_tree( $comm_data->{host_ids} );
@@ -3729,6 +3734,9 @@
      $conf{"verbose"} && defined $ncpus && print "Job has $ncpus  
processe(s)\n";
      $conf{"verbose"} && defined $hosts && print "Job spans $hosts  
host(s)\n";

+    debug_log( "verbose", undef,
+        "There are $ncpus processes over $hosts hosts" );
+
    # Maybe do it this way, edb works best when run with the same  
LD_LIBRARY_PATH
    # as the application.  It's very important when running the message queue
    # extraction code but less so here.  You may find you get linker errors  
though
@@ -3759,6 +3767,9 @@
          return 1;
      }
      my $errors = go_parallel( $jobid, $cmd, $ncpus, $hosts );
+
+    debug_log( "verbose", undef, "Completed command" );
+
      cleanup_pcmd();
      return $errors;
  }
@@ -6824,52 +6835,63 @@

      my $sel = $netdata->{sel};

-    while ( my @data = $sel->can_read() ) {
-        foreach my $s (@data) {
-            if ( $s == $server ) {
-                my $new = $server->accept() or die("Failed accept");
-                $sel->add($new);
-                my $peer = getpeername($new);
-                my ( $port, $addr ) = unpack_sockaddr_in($peer);
-                my $ip = inet_ntoa($addr);
-                my $hostname = gethostbyaddr( $addr, AF_INET );
-
-                #printf "New connection from $hostname ($ip) $port\n";
-                my %sinfo;
-                $sinfo{hostname}              = $hostname;
-                $sinfo{trusted}               = 0;
-                $sinfo{port}                  = $port;
-                $sinfo{desc}                  = "$hostname:$port";
-                $sinfo{socket}                = $new;
-                $sinfo{line_cb}               = \&command_from_outer;
-                $netdata->{connections}{$new} = \%sinfo;
-
-                # $new->printf("Hello from padb\n");
-                #$new->autoflush();
-                next;
-            }
-
-            my $sinfo = $netdata->{connections}{$s};
-            my $d;
-            my $count = sysread( $s, $d, 65536 );
-
-            # Dead connection.
-            if ( not defined $d or $count eq 0 ) {
-
-                # printf("null read from $sinfo->{desc}\n");
-                if ( eof($s) ) {
-                    $sel->remove($s);
-                    $s->close();
-                    $sinfo->{trusted} = 0;
-                    $sinfo->{dead}    = 1;
-                    my $scount = $sel->count();
-                }
-                next;
-            }
-
-            $sinfo->{str} .= $d;
-            extract_line( $netdata, $sinfo );
-
+    my $stime = time();
+
+    while ( $sel->count() > 0 ) {
+        while ( my @data = $sel->can_read(5) ) {
+            foreach my $s (@data) {
+                if ( $s == $server ) {
+                    my $new = $server->accept() or die("Failed accept");
+                    $sel->add($new);
+                    my $peer = getpeername($new);
+                    my ( $port, $addr ) = unpack_sockaddr_in($peer);
+                    my $ip = inet_ntoa($addr);
+                    my $hostname = gethostbyaddr( $addr, AF_INET );
+
+                    #printf "New connection from $hostname ($ip) $port\n";
+                    my %sinfo;
+                    $sinfo{hostname}              = $hostname;
+                    $sinfo{trusted}               = 0;
+                    $sinfo{port}                  = $port;
+                    $sinfo{desc}                  = "$hostname:$port";
+                    $sinfo{socket}                = $new;
+                    $sinfo{line_cb}               = \&command_from_outer;
+                    $netdata->{connections}{$new} = \%sinfo;
+
+                    # $new->printf("Hello from padb\n");
+                    #$new->autoflush();
+                    next;
+                }
+
+                my $sinfo = $netdata->{connections}{$s};
+                my $d;
+                my $count = sysread( $s, $d, 65536 );
+
+                # Dead connection.
+                if ( not defined $d or $count eq 0 ) {
+
+                    # printf("null read from $sinfo->{desc}\n");
+                    if ( eof($s) ) {
+                        $sel->remove($s);
+                        $s->close();
+                        $sinfo->{trusted} = 0;
+                        $sinfo->{dead}    = 1;
+                        my $scount = $sel->count();
+                    }
+                    next;
+                }
+
+                $sinfo->{str} .= $d;
+                extract_line( $netdata, $sinfo );
+
+            }
+        }
+        my $time = time();
+
+       # Should probably handle this better, if the outer or tree never  
signons
+       # for whatever reason silently die as it's probably the best thing  
do to.
+        if ( ( $sel->count() == 1 ) and ( ( $time - $stime ) > 30 ) ) {
+            exit(0);
          }
      }
      my $count = $sel->count();




More information about the padb-devel mailing list