[padb-devel] [padb commit] r65 - Big cleanup of the full-duplex code, it's now the
codesite-noreply at google.com
codesite-noreply at google.com
Wed Jun 24 13:13:27 BST 2009
Author: apittman
Date: Wed Jun 24 01:10:44 2009
New Revision: 65
Modified:
branches/full-duplex/src/padb
Log:
Big cleanup of the full-duplex code, it's now the
default and works for all modes. Started removing the old
code. Still to do:
Re-instate the QsNet stats modes.
Implement full-report to use a single inner instance.
There is still some more code which can now be removed.
Modified: branches/full-duplex/src/padb
==============================================================================
--- branches/full-duplex/src/padb (original)
+++ branches/full-duplex/src/padb Wed Jun 24 01:10:44 2009
@@ -349,9 +349,6 @@
my %allfns;
-my $line_formatted = 0;
-my $countoutput = 0;
-
my %cinner; # Config options to be passed to inner.
my $rem_jobid;
@@ -2884,6 +2881,7 @@
}
}
+# XXX: Now only called when loading things from file.
sub show_results {
my ( $nlines, $mode, $handle ) = @_;
@@ -3045,10 +3043,6 @@
return;
}
- if ( not $line_formatted ) {
- die("input file specified but no formatting selected\n");
- }
-
open( PCMD, "$file" ) or die "$prog: cant open file $file: $!\n";
my @data = <PCMD>;
close(PCMD);
@@ -3199,6 +3193,10 @@
$req->{jobconfig}{jobid} = $comm_data->{jobid};
$req->{jobconfig}{rmgr} = $conf{rmgr};
+ if ( $#ranks != -1 ) {
+ @{ $req->{ranks} } = @ranks;
+ }
+
if ( $conf{rmgr} eq "orte" ) {
$req->{jobconfig}{"orte-data"} = $open_jobs{
$comm_data->{jobid} };
}
@@ -3322,7 +3320,6 @@
my $jobid = shift;
my $cmd = shift;
my $ncpus = shift;
- my $raw = shift;
my $stats = shift;
my $mode = shift;
my $h = shift;
@@ -3478,127 +3475,7 @@
}
}
- cleanup_pcmd();
-
- exit(0);
-}
-
-sub go_job_once {
- my $jobid = shift;
- my $cmd = shift;
- my $ncpus = shift;
- my $raw = shift;
- my $stats = shift;
- my $mode = shift;
- my $h = shift;
-
- my $errors = 0;
-
- my $report_errors = 1;
-
- $report_errors = 0 if ($full_report);
-
- my $pcmd = {
- pid => -1,
- in => "",
- out => *OUT,
- err => *ERR,
- };
-
- # According to the docs there is potential for deadlock here
- # if the amount of data coming in is enough to fill the buffers
- # We should really use IO::select it's not clear to me how
- # you detect EOF in that case and this works for now.
-
- $pcmd->{pid} = open3( $pcmd->{in}, *OUT, *ERR, $cmd )
- or die "Unable to open3() pcmd: $!\n";
-
- close $pcmd->{in};
-
- {
- my %lines;
- my @data;
-
- if ($raw) {
- my $handle = $pcmd->{out};
- while (<$handle>) {
- my $line = $_;
- print("$line");
- }
- } else {
- if ($stats) {
- local $/ = "\n\n";
- my $handle = $pcmd->{out};
- while (<$handle>) {
- s/\n//g;
- push @data, $_;
- }
- } elsif ($line_formatted) {
- my $handle = $pcmd->{out};
- while (<$handle>) {
- my $line = $_;
- process_line( $line, \%lines );
- }
- post_process_lines( \%lines );
- }
- }
-
- my $handle = $pcmd->{err};
- while (<$handle>) {
- my $line = $_;
- if ($report_errors) {
- print( STDERR "Error ($jobid,$mode): $line" );
- }
- $errors++;
- }
-
- close $pcmd->{in};
- close $pcmd->{out};
- close $pcmd->{err};
-
- waitpid( $pcmd->{pid}, 0 );
- my $res = $?;
-
- printf("result from parallel command was $res\n")
- if ( $conf{"verbose"} );
-
- if ( $res != 0 ) {
- my %status = rc_status($res);
- if ( job_is_running($jobid) ) {
- if ($report_errors) {
- printf(
- "Failed to run parallel command (rc =
$status{rc})\n");
- }
- } else {
- printf("Job $jobid is no longer active\n");
- return 1;
- }
- }
-
- if ($stats) {
- if ( $conf{"stats-raw"} ) {
- local $, = "\n\n";
- print @data;
- print "\n";
- } else {
- my $s = read_stats(@data);
- show_stats($s);
- }
- } elsif ($line_formatted) {
- if ( defined $ncpus ) {
- for ( my $vp = 0 ; $vp < $ncpus ; $vp++ ) {
- push(
- @{ $lines{lines}{$vp} },
- "no output for this process"
- ) if ( not defined $lines{lines}{$vp}[0] );
- }
- }
- show_results( \%lines, $mode, $h );
- }
- }
-
- return $errors;
-
+ return 0;
}
sub find_padb_secret {
@@ -3632,19 +3509,6 @@
sub go_job {
my $jobid = shift;
my $mode = shift;
- my $rops = "";
-
- if ( defined $mode ) {
- $rops .= " --$allfns{$mode}{arg_long}";
-
- if ( defined $allfns{$mode}{secondary} ) {
- foreach my $sec ( @{ $allfns{$mode}{secondary} } ) {
- $rops .= " --$sec->{arg_long}=$sec->{value}";
- }
- }
- }
-
- my $key = job_to_key($jobid);
my $stats;
@@ -3664,10 +3528,6 @@
}
- foreach my $rank (@ranks) {
- $rops .= " --rank=$rank";
- }
-
$conf{"verbose"} && print "Attaching to job $jobid\n";
$rem_jobid = $jobid;
@@ -3687,69 +3547,41 @@
$conf{"verbose"} && defined $ncpus && print "Job has $ncpus cpus\n";
$conf{"verbose"} && defined $hosts && print "Job has $hosts hosts\n";
- # Some versions of perl like to have a space after the O and report
that
- # -ormgr isn't a valid option if it's not there, perhaps this is a bug
- # in GetOptions but for now just work around it.
- foreach my $opt ( keys %cinner ) {
- $rops .= " -O $opt=\"$cinner{$opt}\"";
- }
-
# Maybe do it this way, edb works best when run with the same
LD_LIBRARY_PATH
# as the application. It's very important when running the message queue
# extraction code but less so here. You may find you get linker errors
though
# although they shouldn't be to hard to work around.
- # Another problem, if using slurm then the key isn't valid, you need to
- # convert from jobId to key locally on the node, hence you need to use
- # a padb-helper process
- if ( $stats_total or $group ) {
- $stats = 1;
- if ( defined $key ) {
- $cmd .=
- " $conf{edb} --stats-raw --parallel --key=$key
$conf{edbopt}";
- } else {
- $cmd .=
- " $0 --inner --jobid=$rem_jobid $rops --stats-full
$conf{edbopt}";
- }
- } else {
- $rops .= " --line-formatted" if ( $line_formatted or $#ranks != 0
);
- $cmd .= " $0 --inner --jobid=$rem_jobid" . $rops;
- }
+ # Another problem, if using slurm then the key isn't valid, you need to
+ # convert from jobId to key locally on the node, hence you need to use
+ # a padb-helper process
+ #if ( $stats_total or $group ) {
+ # $stats = 1;
+ # if ( defined $key ) {
+ # $cmd .=
+ # " $conf{edb} --stats-raw --parallel --key=$key
$conf{edbopt}";
+ # } else {
+ # $cmd .=
+ # " $0 --inner --jobid=$rem_jobid $rops --stats-full
$conf{edbopt}";
+ # }
+ #} else {
+ $cmd .= " $0 --inner --full-duplex";
- ( $conf{"verbose"} > 1 or $conf{"showcmd"} ) && print "$cmd\n";
+ #}
- my $raw = ( ( not $stats ) and ( not $line_formatted ) );
+ ( $conf{"verbose"} > 1 or $conf{"showcmd"} ) && print "$cmd\n";
my $h;
if ( defined $allfns{$mode}{pre_out_handler} ) {
$h = $allfns{$mode}{pre_out_handler}($ncpus);
}
- # This makes thing easier...
- #if ($watch) {
- ## while (1) {
- # maybe_clear_screen();
- # my $errors =
- # go_job_once( $jobid, $cmd, $ncpus, $raw, $stats, $mode, $h
);
- # if ( $errors != 0 ) {
- # cleanup_pcmd();
- # return $errors;
- # }
- # sleep( $conf{"interval"} );
- # }
- #}
- my $errors;
- if ( $conf{"full-duplex"} ) {
- if ( not defined $hosts ) {
- printf("Full duplex mode needs to know the host count\n");
- printf("Which is doesn't for this resource manager:
$conf{rmgr}\n");
- return 1;
- }
- $errors = go_parallel( $jobid, "$cmd --full-duplex",
- $ncpus, $raw, $stats, $mode, $h, $hosts );
- } else {
- $errors = go_job_once( $jobid, $cmd, $ncpus, $raw, $stats, $mode,
$h );
+ if ( not defined $hosts ) {
+ printf("Full duplex mode needs to know the host count\n");
+ printf("Which is doesn't for this resource manager:
$conf{rmgr}\n");
+ return 1;
}
+ my $errors = go_parallel( $jobid, $cmd, $ncpus, $stats, $mode, $h,
$hosts );
cleanup_pcmd();
return $errors;
}
@@ -3956,12 +3788,12 @@
# Bit of a cheat here, do two things at once...
# This should probably me modified to work better on
# non Quadrics systems.
- my $res;
- $stats_total = 1;
- $group = 1;
- $res = go_job( $full_report, "full-report" );
- undef $stats_total;
- undef $group;
+ #my $res;
+ #$stats_total = 1;
+ #$group = 1;
+ #$res = go_job( $full_report, "stats" );
+ #undef $stats_total;
+ #undef $group;
# Don't exit on failure here.
#if ( $res != 0 ) {
@@ -3970,8 +3802,6 @@
printf("\n");
- $line_formatted = 1;
-
$compress = 1;
go_job( $full_report, "queue" );
undef $compress;
@@ -3979,7 +3809,6 @@
printf("\n");
$strip_above_wait = 0;
- $countoutput = 1;
$tree = 1;
go_job( $full_report, "stack" );
undef $tree;
@@ -4026,21 +3855,13 @@
);
}
- $line_formatted = ( grep { $_ } ( $compress, $compress_C, $tree ) );
- if ( $line_formatted > 1 ) {
+ my $style_count = ( grep { $_ } ( $compress, $compress_C, $tree ) );
+ if ( $style_count > 1 ) {
cmdline_error(
"$prog: Error: only specify one of --compress, --compress-long or --tree\n"
);
}
- if ( defined $mode && $mode eq "proc-summary" ) {
- $line_formatted = 1;
- }
-
- if ( defined $mode && defined $allfns{$mode}{out_handler} ) {
- $line_formatted = 1;
- }
-
if ( not $input_file
and
( ( grep { $_ } ( $stats_total, $group, $have_allfns_option ) ) !=
1 )
@@ -4066,17 +3887,6 @@
cmdline_error("$prog: Error: --tree only works with
--stack-trace\n");
}
- if ( ( ( grep { $_ } ($stats_total) ) == 1 )
- and $line_formatted )
- {
- cmdline_error(
-"$prog: Error: requested output not compatible with requested formatting\n"
- );
- }
-
- $countoutput = 1
- if ( ( defined $mode and $mode eq "stack" ) or $conf{"verbose"} );
-
if ( defined($input_file) ) {
my $m = "input";
if ( defined $mode ) {
@@ -4583,7 +4393,7 @@
my $offset = 0;
my $str = "";
my @s = gdb_read_raw( $gdb, $strp, 128 );
- return undef if ( $s[0] eq undef );
+ return undef if ( not defined( $s[0] ) );
foreach my $d (@s) {
my $v = hex($d);
return $str if ( $v == 0 );
@@ -4620,7 +4430,7 @@
$stats->{symbol}++;
} elsif ( $cmd eq "data" ) {
my @r = gdb_read_raw( $gdb, $params[0], $params[1] );
- if ( $r[0] ne undef ) {
+ if ( defined( $r[0] ) ) {
$res = "@r";
$stats->{datareads}++;
$stats->{databytes} += $params[1];
@@ -4765,6 +4575,7 @@
sub show_mpi_queue_for_deadlock_all {
my ( $carg, $list ) = @_;
+ my $ret;
my @all;
foreach my $proc ( @{$list} ) {
@@ -4793,7 +4604,9 @@
my $gdb = $proc->{gdb};
my @mq = fetch_mpi_queue_gdb( $vp, $pid, $gdb );
- output_dtype( $vp, \@mq );
+ $ret->{$vp} = \@mq;
+
+ #output_dtype( $vp, \@mq );
}
foreach my $proc (@all) {
@@ -4801,6 +4614,7 @@
gdb_detach($gdb);
gdb_quit($gdb);
}
+ return $ret;
}
sub go_deadlock_detect {
@@ -4845,10 +4659,10 @@
}
}
- #print Dumper \%ad;
my $ret = "";
my $i_count = 0; # Interesting groups.
- foreach my $gid ( sort { $a <=> $b } keys %ad ) {
+ #foreach my $gid ( sort { $a <=> $b } keys %ad ) {
+ foreach my $gid ( sort keys %ad ) {
if ( $#target_groups != -1 ) {
next unless defined $tg[$gid];
@@ -4946,8 +4760,8 @@
# code work with input files, the whole thing is due
# a tidy-up on the full-duplex branch where this should
# be solved properly.
- if ( defined $lines->{raw} ) {
- $data = $lines->{raw};
+ if ( defined $lines->{target_responce} ) {
+ $data = $lines->{target_responce};
} else {
$data = $lines->{lines};
}
@@ -6044,19 +5858,10 @@
debug( $vp, "Looking at $vp, pid: $pid" );
- if ( $confInner{pre}
- or not defined( $allfns{ $confInner{mode} }{handler} ) )
- {
- my %d;
- $d{pid} = $pid;
- $d{vp} = $vp;
- push( @{ $confInner{"all-pids"} }, \%d );
- } else {
- my $res = $allfns{ $confInner{mode} }{handler}( $vp, $pid );
- if ( defined $allfns{ $confInner{mode} }{out_handler} ) {
- output_dtype( $vp, $res );
- }
- }
+ my %d;
+ $d{pid} = $pid;
+ $d{vp} = $vp;
+ push( @{ $confInner{"all-pids"} }, \%d );
}
sub maybe_show_pid {
@@ -6487,8 +6292,17 @@
$cmd->{jobconfig}{"orte-data"};
$confInner{"orte-data"} = $cmd->{jobconfig}{"orte-data"};
}
- $confInner{pre} = 1;
+# Save the rank list if supplied, if there is no list then assume all,
should probably
+# be sent over as part of the request rather than the header ready for
when padb
+# can handle multiple commands over one run.
+# XXX: We also need to handle the case where modes don't accept partial
input, for example
+# deadlock detect where this shouldn't be passed.
+ if ( exists( $cmd->{ranks} ) ) {
+ @ranks = @{ $cmd->{ranks} };
+ }
+
+# Query the resource manager to find the pids, they'll be added to
the "all-pids" field.
$rmgr{ $cmd->{jobconfig}{rmgr} }{find_pids}( $cmd->{jobconfig}{jobid}
);
foreach my $proc ( @{ $confInner{"all-pids"} } ) {
@@ -6500,6 +6314,7 @@
$netdata->{target_responce}{$vp}->{name} = $name;
$netdata->{target_responce}{$vp}->{state} = $state;
}
+
}
# Receive a command (perl reference) from our parent.
@@ -6770,7 +6585,6 @@
# Local vars to help with command line parsing
my @config_options;
- my $line_formatted;
my $jobid;
my $full_duplex;
my $outerloc;
@@ -6778,7 +6592,6 @@
my %optionhash = (
"config-option|O=s" => \@config_options,
"jobid=i" => \$jobid,
- "line-formatted" => \$line_formatted,
"rank=i" => \@ranks,
"stats-full" => \$stats,
"verbose|v+" => \$confInner{"verbose"},
@@ -6806,47 +6619,14 @@
GetOptions(%optionhash) or die("could not parse options\n");
+ $confInner{"myld"} = $ENV{"LD_LIBRARY_PATH"};
+
# If this works then nothing below here is needed as all
# requests can be sent over the socket.
- if ($full_duplex) {
- inner_loop_for_comms($outerloc);
- }
+ inner_loop_for_comms($outerloc);
+ exit(0);
my $mode;
-
- foreach my $arg ( keys %config_hash ) {
- next unless defined $config_hash{$arg};
- $mode = $arg;
- }
-
- $confInner{"mode"} = $mode;
-
- # Put the args in a hash so that they can be referenced by name.
- if ( defined $allfns{$mode}{secondary} ) {
- foreach my $sec ( @{ $allfns{$mode}{secondary} } ) {
- $confInner{"args"}{ $sec->{arg_long} } = $sec->{value};
- }
- }
-
- # Load all config options from the command line, unlike the outer
- # code we don't check them to be valid here, any set on the outer
- # command line are automatically passed on and they might not mean
- # anything to us so silently ignore them.
- foreach my $config_option (@config_options) {
- my @pairs = split( ",", $config_option );
- foreach my $pair (@pairs) {
- my ( $name, $val ) = split( "=", $pair );
- if ( not defined $confInner{$name} ) {
- debug undef, "Unknown option $name";
- }
- $confInner{$name} = $val;
- }
- }
-
- # Load some non user-modifiable data into conf now
- $confInner{"lineformatted"} = $line_formatted;
-
- $confInner{"myld"} = $ENV{"LD_LIBRARY_PATH"};
# $rjobid is used for accessing the stats on slurm
# systems, on rms it's just the jobId but on combined
More information about the padb-devel
mailing list