[padb-devel] [padb] r117 committed - Add some more debugging logs, implement a inner timeout where...
codesite-noreply at google.com
codesite-noreply at google.com
Tue Aug 4 19:21:10 BST 2009
Revision: 117
Author: apittman
Date: Tue Aug 4 11:20:27 2009
Log: Add some more debugging logs, implement a inner timeout where
if it isn't contacted by the outer in thirty seconds it exits
to avoid leaving around stale processes (and hence jobs).
http://code.google.com/p/padb/source/detail?r=117
Modified:
/branches/full-duplex/src/padb
=======================================
--- /branches/full-duplex/src/padb Sun Jul 19 13:53:38 2009
+++ /branches/full-duplex/src/padb Tue Aug 4 11:20:27 2009
@@ -584,7 +584,7 @@
sub debug_log {
my ( $type, $handle, $str, @params ) = @_;
- if ( not defined $debugModes{$type} ) {
+ if ( not exists $debugModes{$type} ) {
printf("Unknown debug mode: $type\n");
exit(1);
}
@@ -598,10 +598,12 @@
# Valid debug modes, a full list is maintained here so using unexpected
# ones can generate warnings.
-$debugModes{"full-duplex"} = 0;
-$debugModes{"show-cmd"} = 0;
-$debugModes{"all"} = 0;
-$debugModes{"tree"} = 0;
+$debugModes{"full-duplex"} = undef;
+$debugModes{"show-cmd"} = undef;
+$debugModes{"all"} = undef;
+$debugModes{"tree"} = undef;
+$debugModes{"verbose"} = undef;
+$debugModes{"signon"} = undef;
sub parse_args_outer {
@@ -657,7 +659,7 @@
if ( defined $debugflag ) {
foreach my $f ( split( ",", $debugflag ) ) {
my ( $name, $v ) = split( "=", $f );
- if ( defined $debugModes{$name} ) {
+ if ( exists $debugModes{$name} ) {
$debugModes{$name} = defined($v) ? $v : "basic";
} else {
printf("Attempt to set unknown debug flag \"$name\".\n");
@@ -3216,6 +3218,9 @@
# Called once when we have the socket details of the last child.
sub connect_to_children {
my $comm_data = shift;
+
+ debug_log( "signon", undef, "Received last signon, connecting to
inner" );
+
@{ $comm_data->{host_ids} } = sort( keys( %{ $comm_data->{remote} } )
);
$comm_data->{connection_tree} =
generate_comm_tree( $comm_data->{host_ids} );
@@ -3729,6 +3734,9 @@
$conf{"verbose"} && defined $ncpus && print "Job has $ncpus
processe(s)\n";
$conf{"verbose"} && defined $hosts && print "Job spans $hosts
host(s)\n";
+ debug_log( "verbose", undef,
+ "There are $ncpus processes over $hosts hosts" );
+
# Maybe do it this way, edb works best when run with the same
LD_LIBRARY_PATH
# as the application. It's very important when running the message queue
# extraction code but less so here. You may find you get linker errors
though
@@ -3759,6 +3767,9 @@
return 1;
}
my $errors = go_parallel( $jobid, $cmd, $ncpus, $hosts );
+
+ debug_log( "verbose", undef, "Completed command" );
+
cleanup_pcmd();
return $errors;
}
@@ -6824,52 +6835,63 @@
my $sel = $netdata->{sel};
- while ( my @data = $sel->can_read() ) {
- foreach my $s (@data) {
- if ( $s == $server ) {
- my $new = $server->accept() or die("Failed accept");
- $sel->add($new);
- my $peer = getpeername($new);
- my ( $port, $addr ) = unpack_sockaddr_in($peer);
- my $ip = inet_ntoa($addr);
- my $hostname = gethostbyaddr( $addr, AF_INET );
-
- #printf "New connection from $hostname ($ip) $port\n";
- my %sinfo;
- $sinfo{hostname} = $hostname;
- $sinfo{trusted} = 0;
- $sinfo{port} = $port;
- $sinfo{desc} = "$hostname:$port";
- $sinfo{socket} = $new;
- $sinfo{line_cb} = \&command_from_outer;
- $netdata->{connections}{$new} = \%sinfo;
-
- # $new->printf("Hello from padb\n");
- #$new->autoflush();
- next;
- }
-
- my $sinfo = $netdata->{connections}{$s};
- my $d;
- my $count = sysread( $s, $d, 65536 );
-
- # Dead connection.
- if ( not defined $d or $count eq 0 ) {
-
- # printf("null read from $sinfo->{desc}\n");
- if ( eof($s) ) {
- $sel->remove($s);
- $s->close();
- $sinfo->{trusted} = 0;
- $sinfo->{dead} = 1;
- my $scount = $sel->count();
- }
- next;
- }
-
- $sinfo->{str} .= $d;
- extract_line( $netdata, $sinfo );
-
+ my $stime = time();
+
+ while ( $sel->count() > 0 ) {
+ while ( my @data = $sel->can_read(5) ) {
+ foreach my $s (@data) {
+ if ( $s == $server ) {
+ my $new = $server->accept() or die("Failed accept");
+ $sel->add($new);
+ my $peer = getpeername($new);
+ my ( $port, $addr ) = unpack_sockaddr_in($peer);
+ my $ip = inet_ntoa($addr);
+ my $hostname = gethostbyaddr( $addr, AF_INET );
+
+ #printf "New connection from $hostname ($ip) $port\n";
+ my %sinfo;
+ $sinfo{hostname} = $hostname;
+ $sinfo{trusted} = 0;
+ $sinfo{port} = $port;
+ $sinfo{desc} = "$hostname:$port";
+ $sinfo{socket} = $new;
+ $sinfo{line_cb} = \&command_from_outer;
+ $netdata->{connections}{$new} = \%sinfo;
+
+ # $new->printf("Hello from padb\n");
+ #$new->autoflush();
+ next;
+ }
+
+ my $sinfo = $netdata->{connections}{$s};
+ my $d;
+ my $count = sysread( $s, $d, 65536 );
+
+ # Dead connection.
+ if ( not defined $d or $count eq 0 ) {
+
+ # printf("null read from $sinfo->{desc}\n");
+ if ( eof($s) ) {
+ $sel->remove($s);
+ $s->close();
+ $sinfo->{trusted} = 0;
+ $sinfo->{dead} = 1;
+ my $scount = $sel->count();
+ }
+ next;
+ }
+
+ $sinfo->{str} .= $d;
+ extract_line( $netdata, $sinfo );
+
+ }
+ }
+ my $time = time();
+
+ # Should probably handle this better, if the outer or tree never
signons
+ # for whatever reason silently die as it's probably the best thing
do to.
+ if ( ( $sel->count() == 1 ) and ( ( $time - $stime ) > 30 ) ) {
+ exit(0);
}
}
my $count = $sel->count();
More information about the padb-devel
mailing list