[padb-devel] [padb commit] r61 - Find processes as part of the startup phase as report basic meta-data back
codesite-noreply at google.com
codesite-noreply at google.com
Sat Jun 20 22:33:25 BST 2009
Author: apittman
Date: Sat Jun 20 11:47:51 2009
New Revision: 61
Modified:
branches/full-duplex/src/padb
Log:
Find processes as part of the startup phase as report basic meta-data back
to the outer process (state,pid,exe) although don't do anything with this
information yet.
Cleanup the proc-summary code so it passes datatypes back from the innter to
the outer and clean up a lot of the outer code accordingly.
Add some debugging options to full-duplex.
On mpd pass the process mapping from the outer to the inner via a datatype
so as not to require files.
Modified: branches/full-duplex/src/padb
==============================================================================
--- branches/full-duplex/src/padb (original)
+++ branches/full-duplex/src/padb Sat Jun 20 11:47:51 2009
@@ -2392,7 +2392,11 @@
} else {
my @elems = split( /\|/, $l );
- if ( $#elems == 6 ) {
+ if ( $#elems == 4 ) {
+ my $nprocs = $elems[3];
+ $nprocs =~ s/ //g;
+ $open_jobs{$job}{nprocs} = $nprocs;
+ } elsif ( $#elems == 6 ) {
my $host = $elems[4];
$host =~ s/ //g;
@@ -2416,7 +2420,7 @@
}
#if ( $conf{"verbose"} ) {
- # print Dumper \%open_jobs;
+ #print Dumper \%open_jobs;
#}
}
@@ -2462,7 +2466,7 @@
my $cmd = "orterun -machinefile $fn -np $i $prefix";
my $hosts = $#hosts + 1;
- return ( $cmd, undef, $hosts );
+ return ( $cmd, $open_jobs{$job}{nprocs}, $hosts );
}
sub open_cleanup_pcmd {
@@ -2795,11 +2799,11 @@
# Nicely format process information.
# XXX: proc-sort-key should probably sort on column headers as
# well as keys.
+# Idealy we'd know what format we wanted and only ask the nodes
+# to report relevent info, for now they still report everything.
sub show_proc_format {
my ( $handle, $nlines ) = @_;
- my $lines = $nlines->{lines};
-
my @proc_format_array;
my %proc_format_header;
my $show_fields = 0;
@@ -2826,21 +2830,21 @@
}
my @all;
+ my $lines = $nlines->{target_responce};
foreach my $tag ( sort ( keys %$lines ) ) {
my %hash;
$hash{vp} = $tag;
- foreach my $data ( @{ $lines->{$tag} } ) {
- if ( $data =~ /([\w\.]+)\:[ \t]*(.+)/ ) {
- my $key = lc($1);
-
- next unless defined $proc_format_lengths{$key} or
$show_fields;
+ foreach my $key ( keys( %{ $lines->{$tag} } ) ) {
- if ( length($2) > $proc_format_lengths{$key} ) {
- $proc_format_lengths{$key} = length($2);
- }
+ my $value = $lines->{$tag}{$key};
+ next unless defined $proc_format_lengths{$key} or $show_fields;
- $hash{$key} = $2;
+ if ( length($value) > $proc_format_lengths{$key} ) {
+ $proc_format_lengths{$key} = length($value);
}
+
+ $hash{$key} = $value;
+
}
if ($show_fields) {
my @fields = sort ( keys(%hash) );
@@ -2861,8 +2865,6 @@
}
my $line = join( $separator, @res );
print "$line\n";
-
- #print "@proc_format_array\n";
}
foreach my $hash (@all) {
my @res;
@@ -2876,7 +2878,6 @@
my $line = join( $separator, @res );
print "$line\n";
}
-
}
sub show_results {
@@ -2965,6 +2966,11 @@
}
}
+sub default_output_handler {
+ my $d = shift;
+ print Dumper $d;
+}
+
sub go_file {
my $file = shift;
my $mode = shift;
@@ -3115,17 +3121,33 @@
sub issue_command_to_inner {
my ( $cdata, $cmd ) = @_;
my $str = my_encode($cmd);
+ if ( $conf{"full-duplex"} eq "debug" ) {
+ printf( "Sending command to inner, %d bytes\n", length($str) );
+ print( Dumper($cmd) );
+
+ }
$cdata->{socket}->print("$str\n");
}
sub command_from_inner {
- my ( $handle, $cdata, $line ) = @_;
+ my ( $comm_data, $cdata, $line ) = @_;
if ( $line eq "Welcome" ) {
my $req;
$req->{mode} = "signon";
- $req->{connection_tree} = $handle->{connection_tree};
- $req->{remote} = $handle->{remote};
+ $req->{connection_tree} = $comm_data->{connection_tree};
+ $req->{remote} = $comm_data->{remote};
+
+ # Also send over some of the per-run (as opposed to per-mode)
+ # configuration options.
+ # XXX: Need to send over scripts and other stuff here as well.
+ $req->{jobconfig}{jobid} = $comm_data->{jobid};
+ $req->{jobconfig}{rmgr} = $conf{rmgr};
+
+ if ( $conf{rmgr} eq "orte" ) {
+ $req->{jobconfig}{"orte-data"} = $open_jobs{
$comm_data->{jobid} };
+ }
+
issue_command_to_inner( $cdata, $req );
return;
}
@@ -3133,33 +3155,46 @@
# A reply from inner.
my $d = my_decode($line);
- if ( $handle->{state} eq "connecting" ) {
- $handle->{state} = "live";
+ if ( $conf{"full-duplex"} eq "debug" ) {
+ printf( "Reply from inner, %d bytes\n", length($line) );
+ print( Dumper($d) );
+ }
+
+ if ( $comm_data->{state} eq "connecting" ) {
+
+ #XXX: Check all target_processes are here.
+ print Dumper $d;
+
+ $comm_data->{state} = "live";
my $req;
- $req->{mode} = $handle->{mode};
- $req->{jobid} = $handle->{jobid};
+ $req->{mode} = $comm_data->{mode};
+ $req->{jobid} = $comm_data->{jobid};
$req->{cinner} = \%cinner;
issue_command_to_inner( $cdata, $req );
return;
}
- if ( $handle->{state} eq "live" ) {
- $handle->{state} = "shutdown";
+ if ( $comm_data->{state} eq "live" ) {
+ $comm_data->{state} = "shutdown";
my $req;
$req->{mode} = "exit";
issue_command_to_inner( $cdata, $req );
- $allfns{ $handle->{mode} }{out_handler}( undef, $d );
+ if ( defined($allfns{ $comm_data->{mode} }{out_handler})) {
+ $allfns{ $comm_data->{mode} }{out_handler}( undef, $d );
+ } else {
+ default_output_handler($d);
+ }
return;
}
- if ( $handle->{state} eq "shutdown" ) {
+ if ( $comm_data->{state} eq "shutdown" ) {
# Nothing to do here.
return;
}
- print("Hmm, unknown state! $handle->{state}\n");
+ print("Hmm, unknown state! $comm_data->{state}\n");
return;
}
@@ -5292,14 +5327,24 @@
return $jiffies;
}
+my $proc_info;
+
sub proc_output {
my ( $vp, $key, $value ) = @_;
- output( $vp, "$key: $value" );
+ if ( $confInner{mode} eq "proc-summary" ) {
+ $proc_info->{$vp}{ lc($key) } = $value;
+ } else {
+ output( $vp, "$key: $value" );
+ }
}
sub show_proc_all {
my ($list) = @_;
+ if ( $confInner{mode} eq "proc-summary" ) {
+ $proc_info = undef;
+ }
+
my @all;
foreach my $proc ( @{$list} ) {
@@ -5373,6 +5418,10 @@
proc_output( $vp, "pcpu", $used_str );
}
+
+ if ( $confInner{mode} eq "proc-summary" ) {
+ return $proc_info;
+ }
}
sub show_proc {
@@ -5823,16 +5872,18 @@
debug( $vp, "Looking at $vp, pid: $pid" );
- if ( defined $allfns{ $confInner{mode} }{handler} ) {
- my $res = $allfns{ $confInner{mode} }{handler}( $vp, $pid );
- if ( defined $allfns{ $confInner{mode} }{out_handler} ) {
- output_dtype( $vp, $res );
- }
- } else {
+ if ( $confInner{pre}
+ or not defined( $allfns{ $confInner{mode} }{handler} ) )
+ {
my %d;
$d{pid} = $pid;
$d{vp} = $vp;
push( @{ $confInner{"all-pids"} }, \%d );
+ } else {
+ my $res = $allfns{ $confInner{mode} }{handler}( $vp, $pid );
+ if ( defined $allfns{ $confInner{mode} }{out_handler} ) {
+ output_dtype( $vp, $res );
+ }
}
}
@@ -6084,7 +6135,15 @@
sub open_find_pids {
my $job = shift;
- open_get_data( $confInner{"open-ps"} );
+
+ # Be careful here, we are the inner process then load data from
+ # the outer.
+ if ( defined $confInner{"orte-data"} ) {
+ %open_jobs = ();
+ $open_jobs{$job} = $confInner{"orte-data"};
+ } else {
+ open_get_data( $confInner{"open-ps"} );
+ }
my $hostname = hostname();
foreach my $rank ( keys( %{ $open_jobs{$job}{ranks}{$hostname} } ) ) {
@@ -6224,6 +6283,34 @@
$handle->{target_responce} = undef;
}
+# Find and report pids as part of the signon protocol, we should
+# also report name
+sub inner_find_pids {
+ my ( $netdata, $cmd ) = @_;
+
+ # Cache config data and search for pids, storing
+ # data in $netdata->{target_responce} and $netdata->??
+
+ if ( $cmd->{jobconfig}{rmgr} eq "orte" ) {
+ $confInner{"orte-data"}{ $cmd->{jobconfig}{jobid} } =
+ $cmd->{jobconfig}{"orte-data"};
+ $confInner{"orte-data"} = $cmd->{jobconfig}{"orte-data"};
+ }
+ $confInner{pre} = 1;
+
+ $rmgr{ $cmd->{jobconfig}{rmgr} }{find_pids}( $cmd->{jobconfig}{jobid}
);
+
+ foreach my $proc ( @{ $confInner{"all-pids"} } ) {
+ my $pid = $proc->{pid};
+ my $vp = $proc->{vp};
+ my $name = readlink("/proc/$pid/exe");
+ my $state = find_from_status( $pid, "State" );
+ $netdata->{target_responce}{$vp}->{pid} = $pid;
+ $netdata->{target_responce}{$vp}->{name} = $name;
+ $netdata->{target_responce}{$vp}->{state} = $state;
+ }
+}
+
# Receive a command (perl reference) from our parent.
#
# When we receive a command:
@@ -6241,6 +6328,7 @@
exists $cmd->{connection_tree}{ $confInner{hostname}
}{children} )
{
$netdata->{children} = 0;
+ inner_find_pids( $netdata, $cmd );
return;
}
@@ -6264,6 +6352,7 @@
$netdata->{connections}{$socket} = \%cdata;
push @{ $netdata->{child_sockets} }, $socket;
}
+ inner_find_pids( $netdata, $cmd );
return;
}
@@ -6288,14 +6377,11 @@
$confInner{mode} = $cmd->{mode};
- # Find the pids and register them all.
- $rmgr{ $confInner{rmgr} }{find_pids}( $cmd->{jobid} );
-
# Now do the work.
- my $z = $allfns{ $cmd->{mode} }{handler_all}( $confInner{"all-pids"} );
+ my $res = $allfns{ $cmd->{mode} }{handler_all}( $confInner{"all-pids"}
);
- $netdata->{target_responce} = $z;
- $netdata->{all_replys}{target_responce} = $z;
+ $netdata->{target_responce} = $res;
+ $netdata->{all_replys}{target_responce} = $res;
return;
}
More information about the padb-devel
mailing list