From codesite-noreply at google.com Mon Jun 8 17:47:03 2009
From: codesite-noreply at google.com (codesite-noreply at google.com)
Date: Mon, 08 Jun 2009 16:47:03 +0000
Subject: [padb-devel] [padb commit] r32 - Update the slurm link to the https
version of their website.
Message-ID: <001636ed657ae467d7046bd8fd91@google.com>
Author: apittman
Date: Mon Jun 8 09:46:18 2009
New Revision: 32
Modified:
trunk/doc/index.html
Log:
Update the slurm link to the https version of their website.
Modified: trunk/doc/index.html
==============================================================================
--- trunk/doc/index.html (original)
+++ trunk/doc/index.html Mon Jun 8 09:46:18 2009
@@ -50,7 +50,7 @@
Modified: trunk/doc/index.html
==============================================================================
--- trunk/doc/index.html (original)
+++ trunk/doc/index.html Mon Jun 8 14:14:56 2009
@@ -1,13 +1,12 @@
-
);
+ close FDI;
+ foreach my $fdi (@fdi) {
+ chomp($fdi);
+ my ( $key, $value ) = split( ":", $fdi );
+ $value =~ s/\t//g;
+ $fdhash{$key} = $value;
+ }
+ }
+ }
+ push( @all_fddata, \%fdhash );
+ }
+ foreach my $fd (@all_fddata) {
+ if ( defined $fd->{pos} ) {
+ output( $vp,
+ "fd$fd->{fd}: $fd->{target} \($fd->{pos}
$fd->{flags}\)" );
+ } else {
+ output( $vp, "fd$fd->{fd}: $fd->{target}" );
+ }
}
}
if ( $confInner{"proc-shows-maps"} ) {
From codesite-noreply at google.com Tue Jun 9 11:57:00 2009
From: codesite-noreply at google.com (codesite-noreply at google.com)
Date: Tue, 09 Jun 2009 10:57:00 +0000
Subject: [padb-devel] [padb commit] r38 - Use symbolic names rather than
numbers when delivering
Message-ID: <000e0cd28d52e03a3e046be8372e@google.com>
Author: apittman
Date: Tue Jun 9 03:55:34 2009
New Revision: 38
Modified:
trunk/src/padb
Log:
Use symbolic names rather than numbers when delivering
a signal. Fixes issue #1.
Modified: trunk/src/padb
==============================================================================
--- trunk/src/padb (original)
+++ trunk/src/padb Tue Jun 9 03:55:34 2009
@@ -201,6 +201,7 @@
use Sys::Hostname;
use File::Temp qw(tempfile);
use MIME::Base64;
+use Config;
###############################################################################
#
@@ -566,6 +567,10 @@
my @config_options;
my %ic_names;
+# Populated in the outer args section so that outer code
+# can access secondary comamnd line argunments by name.
+my %secondary_args;
+
sub parse_args_outer {
Getopt::Long::Configure("bundling");
@@ -627,6 +632,13 @@
$have_allfns_option++;
}
+ # Put the args in a hash so that they can be referenced by name.
+ if ( defined $allfns{$mode}{secondary} ) {
+ foreach my $sec ( @{ $allfns{$mode}{secondary} } ) {
+ $secondary_args{ $sec->{arg_long} } = $sec->{value};
+ }
+ }
+
return $mode;
}
@@ -3414,6 +3426,17 @@
);
}
+ # If delivering a signal check that it's valid.
+ if ( defined($mode) and ( $mode eq "kill" ) ) {
+ my $signal = $secondary_args{signal};
+ my %sig_names;
+ map { $sig_names{$_} = 1 } split( " ", $Config{"sig_name"} );
+
+ if ( not defined $sig_names{$signal} ) {
+ cmdline_error("$prog: Error: signal $signal is invalid\n");
+ }
+ }
+
if ( $tree and !( ( defined $mode && $mode eq "stack" ) or $input_file
) ) {
cmdline_error("$prog: Error: --tree only works with
--stack-trace\n");
}
@@ -5652,6 +5675,7 @@
$confInner{"mode"} = $mode;
+ # Put the args in a hash so that they can be referenced by name.
if ( defined $allfns{$mode}{secondary} ) {
foreach my $sec ( @{ $allfns{$mode}{secondary} } ) {
$confInner{"args"}{ $sec->{arg_long} } = $sec->{value};
@@ -5785,8 +5809,8 @@
'secondary' => [
{
'arg_long' => 'signal',
- 'type' => 'i',
- 'default' => '15'
+ 'type' => 's',
+ 'default' => 'TERM'
}
]
};
From codesite-noreply at google.com Tue Jun 9 12:43:21 2009
From: codesite-noreply at google.com (codesite-noreply at google.com)
Date: Tue, 09 Jun 2009 11:43:21 +0000
Subject: [padb-devel] [padb commit] r39 - Setup a find_pids callback for the
inner to use when
Message-ID: <000e0cd2e2a4a40652046be8dda1@google.com>
Author: apittman
Date: Tue Jun 9 04:42:53 2009
New Revision: 39
Modified:
trunk/src/padb
Log:
Setup a find_pids callback for the inner to use when
accessing the resource manager. Simplifies the code
and should make it easier to add more resource managers
in future.
Modified: trunk/src/padb
==============================================================================
--- trunk/src/padb (original)
+++ trunk/src/padb Tue Jun 9 04:42:53 2009
@@ -33,7 +33,9 @@
# maintainer.
# * Added "orte" to the list of resource managers supported
# * Don't enable local-qsnet on non-qsnet systems.
-#
+# * inner_main() now uses callbacks for resource manager support.
+# * --signal now takes names rather than numbers.
+
# Version 2.2
# * Add a --core-stack option along with --core and --exe to extract stack
# traces from core files.
@@ -165,12 +167,10 @@
# * Multi-pass argument handling, --kill also accepts --signal for example,
# this should really be done at the getopt layer. Also proper usage
# info for these secondary args.
-# * inner_main() should possibly use callbacks for resource manager
support,
-# it's starting to look a bit messy.
-# * find_slurm_pids() has some good code in it for finding parallel
processes
+# * slurm_find_pids() has some good code in it for finding parallel
processes
# this should be extrapolated out and so it can be used in the mpd case,
# ideally on non-rms systems (RMS rocks in this regard) the rmgr callback
-# should return a list of spawned pids and the code in find_slurm_pids()
should
+# should return a list of spawned pids and the code in slurm_find_pids()
should
# pass this tree to find the most interesting one.
# * The mode {handler} functions should only be called once per node, it
could then
# correctly handle $confInner{gdb_file} and also attach to every process
per node
@@ -259,6 +259,7 @@
# job_to_key job key no Convert from jobId to shm key.
# setup_pcmd job cmd|ncpus yes Command needed to launch shadow
jobs.
# cleanup_pcmd - - no Cleans up and temporary files.
+# find_pids job - maybe Called on the inner to locate
pids.
# inner_rmgr var n/a no Resource manager to masquerade
as.
@@ -270,6 +271,7 @@
'job_is_running' => \&rms_job_is_running,
'job_to_key' => \&rms_job_to_key,
'setup_pcmd' => \&rms_setup_pcmd,
+ 'find_pids' => \&rms_find_pids,
};
$rmgr{"mpd"} = {
@@ -277,6 +279,7 @@
'get_active_jobs' => \&mpd_get_jobs,
'setup_pcmd' => \&mpd_setup_pcmd,
'cleanup_pcmd' => \&mpd_cleanup_pcmd,
+ 'find_pids' => \&mpd_find_pids,
};
$rmgr{"orte"} = {
@@ -284,6 +287,7 @@
'get_active_jobs' => \&open_get_jobs,
'setup_pcmd' => \&open_setup_pcmd,
'cleanup_pcmd' => \&open_cleanup_pcmd,
+ 'find_pids' => \&open_find_pids,
};
$rmgr{"lsf-rms"} = {
@@ -298,12 +302,14 @@
'get_active_jobs' => \&slurm_get_jobs,
'job_is_running' => \&slurm_job_is_running,
'setup_pcmd' => \&slurm_setup_pcmd,
+ 'find_pids' => \&slurm_find_pids,
};
$rmgr{"local"} = {
'get_active_jobs' => \&local_get_jobs,
'job_is_running' => \&local_job_is_running,
'setup_pcmd' => \&local_setup_pcmd,
+ 'find_pids' => \&local_find_pids,
};
$rmgr{"local-qsnet"} = {
@@ -5414,7 +5420,7 @@
}
# Do the right thing with slurm...
-sub find_slurm_pids {
+sub slurm_find_pids {
my $jobid = shift;
# Slurm has the concept of a "job" and a "job step" which are
@@ -5509,7 +5515,7 @@
}
# Local processes per node, i.e. no resource manager support.
-sub find_local_pids {
+sub local_find_pids {
my $pid = shift;
# Hard-wire this to vp 0, probably not true but without the resource
manager it's difficult
@@ -5522,7 +5528,7 @@
maybe_show_pid( $vp, $pid );
}
-sub find_mpd_pids {
+sub mpd_find_pids {
my $job = shift;
my $d = mpd_get_data();
@@ -5533,7 +5539,7 @@
}
}
-sub find_open_pids {
+sub open_find_pids {
my $job = shift;
open_get_data( $confInner{"open-ps"} );
my $hostname = hostname();
@@ -5543,7 +5549,7 @@
}
}
-sub show_all_pids {
+sub rms_find_pids {
my $jobid = shift;
my %vps;
@@ -5703,6 +5709,10 @@
$confInner{"myld"} = $ENV{"LD_LIBRARY_PATH"};
+ # $rjobid is used for accessing the stats on slurm
+ # systems, on rms it's just the jobId but on combined
+ # slurm/rms systems it's modifed to be the rms id
+ # and the jobid is left as the slurm job id.
my $rjobid = $jobid;
if ( exists $ENV{"SLURM_PROCID"} ) {
$rjobid = get_rms_jobid($jobid);
@@ -5719,25 +5729,23 @@
exit(0);
}
- if ( $confInner{"rmgr"} eq "local" ) {
-
- # Takes a pid.
- find_local_pids($jobid);
-
- } elsif ( $confInner{"rmgr"} eq "mpd" ) {
- find_mpd_pids($jobid);
+ # Handle resource managers better, simply call a callback
+ # as the outer does.
+ # As usual there is a special case, on Slurm systems
+ # running QsNet you can have the RMS kernel module loaded
+ # and these need to be handled differently so deal with
+ # them first and then go to the standard callback.
- } elsif ( $confInner{"rmgr"} eq "orte" ) {
- find_open_pids($jobid);
-
- } elsif ( -d "/proc/rms" ) {
+ if ( ( $confInner{rmgr} eq "slurm" ) and ( -d "/proc/rms" ) ) {
# Takes a RMS job id.
- show_all_pids($rjobid);
+ rms_find_pids($rjobid);
} else {
-
- # Takes a native job id.
- find_slurm_pids($jobid);
+ if ( not defined $rmgr{ $confInner{rmgr} }{find_pids} ) {
+ printf("Error, rmgr $confInner{rmgr} has no find_pids
callback\n");
+ exit(1);
+ }
+ $rmgr{ $confInner{rmgr} }{find_pids}($jobid);
}
if ( defined $allfns{$mode}{handler_all} ) {
From codesite-noreply at google.com Tue Jun 9 14:08:48 2009
From: codesite-noreply at google.com (codesite-noreply at google.com)
Date: Tue, 09 Jun 2009 13:08:48 +0000
Subject: [padb-devel] [padb commit] r40 - Make --full-report work better on
non Quadrics systems by
Message-ID: <001636e0a8262fbfe5046bea0f60@google.com>
Author: apittman
Date: Tue Jun 9 06:08:33 2009
New Revision: 40
Modified:
trunk/src/padb
Log:
Make --full-report work better on non Quadrics systems by
removing a lot of harmless error messages along the way.
Modified: trunk/src/padb
==============================================================================
--- trunk/src/padb (original)
+++ trunk/src/padb Tue Jun 9 06:08:33 2009
@@ -35,6 +35,7 @@
# * Don't enable local-qsnet on non-qsnet systems.
# * inner_main() now uses callbacks for resource manager support.
# * --signal now takes names rather than numbers.
+# * Check job is valid when using the --full-report option.
# Version 2.2
# * Add a --core-stack option along with --core and --exe to extract stack
@@ -458,8 +459,8 @@
XXXX
--full-report=JOBID All of the above.
- --nostrip-below-main Don't strip stack traces below main.
- --nostrip-above-wait Don't strip stack traces about elan_waitWord.
+ --nostrip-below-main Don\'t strip stack traces below main.
+ --nostrip-above-wait Don\'t strip stack traces about elan_waitWord.
--proc-format Specify information to show about processes.
@@ -1430,7 +1431,7 @@
# vp's only, if it's not set then display a total for everyone.
if ( not $d ) {
- print("Statistics not valid\n");
+ print("QsNet Statistics not valid\n");
return;
}
@@ -2366,6 +2367,11 @@
sub open_get_data {
my ($filename) = @_;
+ # Simply return if called more than once.
+ if ( keys(%open_jobs) != 0 ) {
+ return;
+
+ }
my $hostname = hostname();
my $job;
my @out;
@@ -2379,6 +2385,11 @@
close OPEN;
}
+ # Handle being called multiple times, zero the hash every
+ # time we are called. Of course we could just return the
+ # existing hash which might be quicker.
+ %open_jobs = ();
+
foreach my $l (@out) {
chomp $l;
next if ( $l eq "" );
@@ -2389,26 +2400,19 @@
} else {
my @elems = split( /\|/, $l );
- # print "$#elems\X at elems\Y\n";
- if ( $#elems == 4 ) {
+ if ( $#elems == 6 ) {
- #print "@elems\n";
- } elsif ( $#elems == 6 ) {
-
- #print "@elems\n";
my $host = $elems[4];
$host =~ s/ //g;
$host =~ s/\t//g;
next if $host eq "Node";
$open_jobs{$job}{hosts}{$host}++;
- #print "Host is $host\n";
if ( $host eq $hostname ) {
my $name = $elems[1];
$name =~ /\[\[(\d+)\,(\d+)\]\,(\d+)\]/;
my $rank = $3;
- # my $rank = $elems[2];
my $pid = $elems[3];
$rank =~ s/ //g;
$pid =~ s/ //g;
@@ -2417,14 +2421,11 @@
}
}
- # print "$_";
}
if ( $conf{"verbose"} ) {
print Dumper \%open_jobs;
}
-
- # print keys %jobs;
}
sub open_get_jobs {
@@ -2950,6 +2951,10 @@
my $errors = 0;
+ my $report_errors = 1;
+
+ $report_errors = 0 if ($full_report);
+
my $pcmd = {
pid => -1,
in => "",
@@ -2998,7 +3003,9 @@
my $handle = $pcmd->{err};
while (<$handle>) {
my $line = $_;
- print( STDERR "Error ($jobid,$mode): $line" );
+ if ($report_errors) {
+ print( STDERR "Error ($jobid,$mode): $line" );
+ }
$errors++;
}
@@ -3015,7 +3022,10 @@
if ( $res != 0 ) {
my %status = rc_status($res);
if ( job_is_running($jobid) ) {
- printf("Failed to run parallel command (rc =
$status{rc})\n");
+ if ($report_errors) {
+ printf(
+ "Failed to run parallel command (rc =
$status{rc})\n");
+ }
} else {
printf("Job $jobid is no longer active\n");
return 1;
@@ -3333,6 +3343,14 @@
}
if ($full_report) {
+
+ if ( not job_is_running($full_report) ) {
+ printf( STDERR
+"Job $full_report is not active, use --show-jobs to see active jobs\n"
+ );
+ exit(1);
+ }
+
printf("padb version $version\n");
printf("full job report for job $full_report\n\n");
@@ -3342,7 +3360,7 @@
my $res;
$stats_total = 1;
$group = 1;
- $res = go_job( $full_report, undef );
+ $res = go_job( $full_report, "full-report" );
undef $stats_total;
undef $group;
From codesite-noreply at google.com Tue Jun 9 14:23:51 2009
From: codesite-noreply at google.com (codesite-noreply at google.com)
Date: Tue, 09 Jun 2009 13:23:51 +0000
Subject: [padb-devel] [padb commit] r41 - Remove a very old function which
is no longer called.
Message-ID: <000e0cd179e20e15c4046bea4544@google.com>
Author: apittman
Date: Tue Jun 9 06:22:39 2009
New Revision: 41
Modified:
trunk/src/padb
Log:
Remove a very old function which is no longer called.
Modified: trunk/src/padb
==============================================================================
--- trunk/src/padb (original)
+++ trunk/src/padb Tue Jun 9 06:22:39 2009
@@ -2693,31 +2693,6 @@
}
}
-sub stack_to_hash {
- my $line = shift;
- if ( $line =~ /([\w\?]*)\(\) at ([\?\w\/\.]*):(\d*)/ ) {
- my %data;
- $data{func} = $1;
- $data{file} = $2;
- $data{line} = $3;
- return %data;
- }
- return undef;
-}
-
-sub munge_stack_traces {
- my $lines = shift;
-
- foreach my $tag ( keys %$lines ) {
- for ( my $l = 0 ; $l <= $#{ $lines->{$tag} } ; $l++ ) {
- my %data = stack_to_hash( $lines->{$tag}->[$l] );
- if ( %data and $data{func} ) {
- $lines->{$tag}->[$l] = "$data{func}()";
- }
- }
- }
-}
-
sub sort_proc_hashes {
my $key = shift;
my @all = @_;
@@ -2768,10 +2743,6 @@
if ( $strip_below_main or $strip_above_wait ) {
strip_stack_traces($lines);
}
-
- #if ( defined $conf{stack_format} ) {
- # munge_stack_traces($lines);
- #}
}
if ($tree) {
From codesite-noreply at google.com Wed Jun 10 14:44:04 2009
From: codesite-noreply at google.com (codesite-noreply at google.com)
Date: Wed, 10 Jun 2009 13:44:04 +0000
Subject: [padb-devel] [padb commit] r42 - Write the extensions page and add
my email to the front
Message-ID: <000e0cd2dda43037b3046bfeabcc@google.com>
Author: apittman
Date: Wed Jun 10 06:43:10 2009
New Revision: 42
Modified:
trunk/doc/extensions.html
trunk/doc/header.html
trunk/doc/index.html
Log:
Write the extensions page and add my email to the front
page.
Modified: trunk/doc/extensions.html
==============================================================================
--- trunk/doc/extensions.html (original)
+++ trunk/doc/extensions.html Wed Jun 10 06:43:10 2009
@@ -1,10 +1,109 @@
-Patches
-The advanced Group Deadlock Detection algorithm within padb
-requires modifications to the MPI library to function properly.
-At this time patches are available for Open MPI svn trunk
-only.
-Contact the developer mailing
list
-for more information.
+
+MPI collective debugger extension proposal
+
+Overview
+The current
+MPI
debugger interface
+is used to export information from a running application to a debugger.
The current
+interface allow the debugger to look at a MPI Process, to iterate over
communicators
+within that process and to view message queues associated with a
communicator.
+
+
+I propose an extension to this to export information about individual
communicators
+within a process, in particular information about collective operations
(MPI_Bcast,
+MPI_Reduce et. al.)
+
+
Implementation
+The specific information I propose is to add a communicator specific
counter for
+each possible collective where the counter simply records the number of
times the
+collective has been called on this communicator. Along with this is
keeping a second piece
+of data, that of if the process is still performing the collective
operation.
+
+
+
+A new enum is added to the interface, mqs_comm_class with values
for each collective
+call.
+
+
+
+A single extra callback function mqs_get_comm_coll_state is added
to the
+interface and queries the current communicator in the same way as
mqs_next_operation.
+This function takes the standard process parameter, a
mqs_comm_class enum as input
+for which collective to query and two int *, the first of these is
a pointer to a
+int set which should be set to the count of the number of calls to the
collective, the second
+is a pointer to a int which should be set zero or one depending if the
collective operation is still
+active.
+
+
+
+A successful call to the mqs_get_comm_coll_state should return
mqs_ok with
+mqs_no_information being used in the case where information isn't
available. This allows
+further enum values to be added in the future should the mpi-forum approve
new collective
+functions without needing to change the debugger function interface.
+
+
Performance Impact
+Maintaining this data does add code to the "critical path" of the MPI
stack, in
+it's simplest form all it requires is a pair of counter increments per
collective call,
+one on function entry and one on function exit so whilst there is a
non-zero run-time cost
+associated with maintaining this information it's a minimal one.
+
+mpi_interface.h
+The additions required to mpi_interface.h are shown below.
+
+
+typedef enum
+{
+ mqs_comm_barrier,
+ mqs_comm_broadcast,
+ mqs_comm_allgather,
+ mqs_comm_allgatherv,
+ mqs_comm_allreduce,
+ mqs_comm_alltoall,
+ mqs_comm_alltoallv,
+ mqs_comm_reduce_scatter,
+ mqs_comm_reduce,
+ mqs_comm_gather,
+ mqs_comm_gatherv,
+ mqs_comm_scan,
+ mqs_comm_scatter,
+ mqs_comm_scatterv
+} mqs_comm_class;
+
+/***********************************************************************
+ * Collective extension
+ *
+ * This extension should be considered optional and the debugger should
+ * correctly the case where it doesn't exist.
+ *
+ */
+
+/*
+ * Return the state of collective operations for the currently active
+ * communicator, that is the number of times the collective has been
+ * called and if the operation is still in progress.
+ *
+ * The first int is *really* mqs_comm_class.
+ */
+extern int mqs_get_comm_coll_state (mqs_process *, int, int *, int *);
+
+
+Benefits
+The extension allows a debugger or external program to know the state of
collective
+calls with the parallel program. In the typical scenario of debugging a
hung
+application this knowledge allows the debugger and programmer to know
instantly
+which processes are stuck in collective calls and which aren't, either
because they
+have successful made the collective call and returned or because they
haven't
+made the calls other ranks in a communicator have. This information
allows swift
+identification of problem areas within the job where further investigation
may be
+required.
+
+
+This extension was originally developed in early 2007 whilst I was working
at
+Quadrics and has proved it's value numerous times in real-life cases.
+
+
Sample Implementation
+At this time a sample implementation is available for OpenMPI only
although work
+is being done a MPICH2 version.
Patch for OpenMPI trunk.
Modified: trunk/doc/header.html
==============================================================================
--- trunk/doc/header.html (original)
+++ trunk/doc/header.html Wed Jun 10 06:43:10 2009
@@ -1,7 +1,7 @@
Padb: A parallel debugging tool
-Parallel Application Discovery Browser
+Parallel Application Debugger
Padb
usage
download
Modified: trunk/doc/index.html
==============================================================================
--- trunk/doc/index.html (original)
+++ trunk/doc/index.html Wed Jun 10 06:43:10 2009
@@ -9,6 +9,9 @@
open source, non-interactive, command line, script-able tool intended
for use by programmers and system administrators alike.
+
+Padb is currently maintained outside of Quadrics by Ashley Pittman
+
Features
- Stack trace generation
@@ -40,7 +43,7 @@
kind of problems facing them at the time. It's been a part of the
Quadrics software stack for a number of years and has recently been
made available to a wider audience. It has been commercially supported
-for a number of years and is known to work at a scale of thousands of
+for a number of years and is known to work at a scale of tens of thousands
of
processes.
Parallel Environments
From codesite-noreply at google.com Thu Jun 11 10:58:14 2009
From: codesite-noreply at google.com (codesite-noreply at google.com)
Date: Thu, 11 Jun 2009 09:58:14 +0000
Subject: [padb-devel] [padb commit] r43 - Work on proc-info and proc-format,
measure percent
Message-ID: <00163630f5c9666b62046c0fa173@google.com>
Author: apittman
Date: Thu Jun 11 02:57:52 2009
New Revision: 43
Modified:
trunk/src/padb
Log:
Work on proc-info and proc-format, measure percent
cpu usage ourself based on jiffies rather than using
the figure presented by ps. Overhaul the proc-format
option so it aligns columns nicely with a properly
readable format.
Modified: trunk/src/padb
==============================================================================
--- trunk/src/padb (original)
+++ trunk/src/padb Thu Jun 11 02:57:52 2009
@@ -2727,6 +2727,79 @@
print("$s\n");
}
+sub show_proc_format {
+ my ( $nlines, $mode, $handle ) = @_;
+
+ my $lines = $nlines->{lines};
+
+ my @proc_format_array;
+ my $show_fields = 0;
+
+ my %proc_format_lengths;
+
+ # Split the command line options into a array.
+ foreach my $format (@proc_format) {
+ my @i = split( ",", $format );
+ foreach my $j (@i) {
+ my $key = lc($j);
+ push @proc_format_array, $key;
+ $proc_format_lengths{$key} = length($key);
+ $show_fields = 1 if ( $key eq "fields" );
+ }
+ }
+
+ my @all;
+ foreach my $tag ( sort ( keys %$lines ) ) {
+ my %hash;
+ $hash{vp} = $tag;
+ foreach my $data ( @{ $lines->{$tag} } ) {
+ if ( $data =~ /([\w\.]+)\:[ \t]*(.+)/ ) {
+ my $key = lc($1);
+
+ next unless defined $proc_format_lengths{$key};
+
+ if ( length($2) > $proc_format_lengths{$key} ) {
+ $proc_format_lengths{$key} = length($2);
+ }
+
+ $hash{$key} = $2;
+ }
+ }
+ if ($show_fields) {
+ my @fields = sort ( keys(%hash) );
+ print "@fields\n";
+ exit(0);
+ }
+ push @all, \%hash;
+ }
+ @all = sort_proc_hashes( $conf{"proc-sort-key"}, @all );
+
+ if ( $conf{"proc-show-header"} ) {
+ my @res;
+ foreach my $key (@proc_format_array) {
+ my $l .= sprintf( "%$proc_format_lengths{$key}s", $key );
+ push @res, $l;
+ }
+ print "@res\n";
+
+ #print "@proc_format_array\n";
+ }
+ foreach my $hash (@all) {
+ my @res;
+ my @res;
+ foreach my $key (@proc_format_array) {
+ my $value = "?";
+ if ( defined $hash->{$key} ) {
+ $value = $hash->{$key};
+
+ }
+ push @res, sprintf( "%$proc_format_lengths{$key}s", $value );
+ }
+ print "@res\n";
+ }
+
+}
+
sub show_results {
my ( $nlines, $mode, $handle ) = @_;
@@ -2776,48 +2849,7 @@
}
}
} elsif ( $mode eq "pinfo" and $#proc_format != -1 ) {
- my @proc_format_array;
- my $show_fields = 0;
-
- foreach my $format (@proc_format) {
- my @i = split( ",", $format );
- foreach my $j (@i) {
- my $key = lc($j);
- push @proc_format_array, $key;
- $show_fields = 1 if ( $key eq "fields" );
- }
- }
-
- my @all;
- foreach my $tag ( sort ( keys %$lines ) ) {
- my %hash;
- $hash{vp} = $tag;
- foreach my $data ( @{ $lines->{$tag} } ) {
- if ( $data =~ /(\w+)\:[ \t]*(.+)/ ) {
- $hash{ lc($1) } = $2;
- }
- }
- if ($show_fields) {
- my @fields = sort ( keys(%hash) );
- print "@fields\n";
- exit(0);
- }
- push @all, \%hash;
- }
- @all = sort_proc_hashes( $conf{"proc-sort-key"}, @all );
-
- if ( $conf{"proc-show-header"} ) {
- print "@proc_format_array\n";
- }
- foreach my $hash (@all) {
- my @res;
- foreach my $key (@proc_format_array) {
- if ( defined $hash->{$key} ) {
- push @res, $hash->{$key};
- }
- }
- print "@res\n";
- }
+ show_proc_format( $nlines, $mode, $handle );
}
}
@@ -4742,17 +4774,16 @@
output $vp, "exe:$exe";
}
- # It should be possible to calculate this from info
- # in /proc but I've not discovered a way so do it
- # so for now just call ps.
- # Turns out this isn't great either, ps reports time
+ # pcpu is calculated from /proc elsewhere.
+ # This isn't either, ps reports time
# as a percentage since the program started so
# isn't live as the top-reported figure is.
- my $pcpu = `ps --pid $pid -o pcpu= 2>/dev/null`;
- chomp($pcpu);
- if ( $pcpu != "" ) {
- output( $vp, "pcpu:$pcpu%" );
- }
+
+ #my $pcpu = `ps --pid $pid -o pcpu= 2>/dev/null`;
+ #chomp($pcpu);
+ #if ( $pcpu != "" ) {
+ # output( $vp, "pcpu:$pcpu%" );
+ #}
show_task_file( $vp, "$dir/status" );
show_task_file( $vp, "$dir/wchan", "wchan" );
@@ -4833,6 +4864,120 @@
}
}
+# Convert the first line of /proc/stat to elapsed jiffies.
+sub string_to_jiffies {
+ my ($ps) = @_;
+
+ my @usecc = split( " ", $ps );
+
+ my $jiffies = 0;
+
+ # Remove the "cpu" prefix.
+ shift(@usecc);
+ foreach my $usecv (@usecc) {
+ $jiffies += $usecv;
+ }
+ return $jiffies;
+}
+
+sub add_and_divide_jiffies {
+ my ( $pre, $post ) = @_;
+
+ my $jiffies;
+
+ my @pre = split( " ", $pre );
+
+ return ( ( string_to_jiffies($pre) + string_to_jiffies($post) ) / 2 );
+}
+
+# Convert /proc/self/stat into used jiffies.
+sub stat_to_jiffies {
+ my $stat = shift;
+ my @values = split( " ", $stat );
+ my $jiffies = 0;
+ $jiffies += $values[13]; # utime
+ $jiffies += $values[14]; # stime
+ return $jiffies;
+}
+
+sub show_proc_all {
+ my ($list) = @_;
+
+ my @all;
+
+ foreach my $proc ( @{$list} ) {
+ my $pid = $proc->{pid};
+ open( $proc->{handle}, "/proc/$pid/stat" );
+ }
+
+ open( SFD, "/proc/stat\n" );
+
+ # Begin critical path.
+ my $stat = ;
+
+ foreach my $proc ( @{$list} ) {
+ my $pid = $proc->{pid};
+ my $h = $proc->{handle};
+ $proc->{stat_start} = <$h>;
+ seek( $proc->{handle}, 0, 0 );
+ }
+
+ seek( SFD, 0, 0 );
+ my $stat2 = ;
+
+ # End critical path.
+
+ my $jiffies_start = add_and_divide_jiffies( $stat, $stat2 );
+
+ foreach my $proc ( @{$list} ) {
+ my $vp = $proc->{vp};
+ my $pid = $proc->{pid};
+ show_proc( $vp, $pid );
+ }
+
+ sleep(1);
+
+ seek( SFD, 0, 0 );
+
+ # Begin critical path.
+ $stat = ;
+
+ foreach my $proc ( @{$list} ) {
+ my $pid = $proc->{pid};
+ my $h = $proc->{handle};
+ $proc->{stat_end} = <$h>;
+ close( $proc->{handle} );
+ }
+
+ seek( SFD, 0, 0 );
+ $stat2 = ;
+
+ # End critical path.
+
+ my $cpucount = 0;
+ while () {
+ if ( $_ =~ /^cpu\d/ ) {
+ $cpucount++;
+ }
+ }
+ close(SFD);
+
+ my $jiffies_end = add_and_divide_jiffies( $stat, $stat2 );
+
+ my $elapsed = $jiffies_end - $jiffies_start;
+
+ foreach my $proc ( @{$list} ) {
+ my $vp = $proc->{vp};
+ my $jpre = stat_to_jiffies( $proc->{stat_start} );
+ my $jpost = stat_to_jiffies( $proc->{stat_end} );
+ my $jused = $jpost - $jpre;
+ my $used = ( $jused / $elapsed ) * $cpucount * 100;
+ my $used_str = sprintf( "%d", $used );
+
+ output( $vp, "pcpu: $used_str" );
+ }
+}
+
sub show_proc {
my ( $vp, $pid ) = @_;
@@ -5832,10 +5977,10 @@
};
$allfns{pinfo} = {
- 'handler' => \&show_proc,
- 'arg_long' => 'proc-info',
- 'help' => "Show process information",
- 'options_i' => {
+ 'handler_all' => \&show_proc_all,
+ 'arg_long' => 'proc-info',
+ 'help' => "Show process information",
+ 'options_i' => {
"proc-shows-proc" => 1,
"proc-shows-fds" => 1,
"proc-shows-maps" => 0,
From codesite-noreply at google.com Thu Jun 11 11:39:50 2009
From: codesite-noreply at google.com (codesite-noreply at google.com)
Date: Thu, 11 Jun 2009 10:39:50 +0000
Subject: [padb-devel] [padb commit] r44 - Accept signal names in lower or
mixed case.
Message-ID: <001636163cc724a9d9046c10364b@google.com>
Author: apittman
Date: Thu Jun 11 03:38:40 2009
New Revision: 44
Modified:
trunk/src/padb
Log:
Accept signal names in lower or mixed case.
Modified: trunk/src/padb
==============================================================================
--- trunk/src/padb (original)
+++ trunk/src/padb Thu Jun 11 03:38:40 2009
@@ -3455,7 +3455,7 @@
# If delivering a signal check that it's valid.
if ( defined($mode) and ( $mode eq "kill" ) ) {
- my $signal = $secondary_args{signal};
+ my $signal = uc( $secondary_args{signal} );
my %sig_names;
map { $sig_names{$_} = 1 } split( " ", $Config{"sig_name"} );
@@ -5157,7 +5157,8 @@
sub kill_proc {
my ( $vp, $pid ) = @_;
- kill( $confInner{args}{signal}, $pid );
+ my $signal = uc( $confInner{args}{signal} );
+ kill( $signal, $pid );
}
sub show_queue {
From codesite-noreply at google.com Thu Jun 11 16:34:03 2009
From: codesite-noreply at google.com (codesite-noreply at google.com)
Date: Thu, 11 Jun 2009 15:34:03 +0000
Subject: [padb-devel] [padb commit] r45 - Be more intelegent when -a is
given and more
Message-ID: <0016e644cef651e2e2046c14524d@google.com>
Author: apittman
Date: Thu Jun 11 08:33:17 2009
New Revision: 45
Modified:
trunk/src/padb
Log:
Be more intelegent when -a is given and more
than one resource manager is detected. If only
one resource manager has active jobs then target
that resource manager.
Modified: trunk/src/padb
==============================================================================
--- trunk/src/padb (original)
+++ trunk/src/padb Thu Jun 11 08:33:17 2009
@@ -2552,18 +2552,24 @@
#
###############################################################################
+sub setup_rmgr {
+ $conf{"rmgr"} = shift;
+
+ # Now setup the variable for the rest of the program.
+ if ( defined $rmgr{ $conf{"rmgr"} }{inner_rmgr} ) {
+ $cinner{rmgr} = $rmgr{ $conf{"rmgr"} }{inner_rmgr};
+ } else {
+ $cinner{rmgr} = $conf{"rmgr"};
+ }
+}
+
sub find_rmgr {
# If it's been set on the command line and it's valid then just use what
we are given.
# Don't do any sanity checks here to cope with non-default installs.
- my $r = $conf{"rmgr"};
-
- if ( defined $rmgr{$r} ) {
- if ( defined $rmgr{ $conf{"rmgr"} }{inner_rmgr} ) {
- $cinner{rmgr} = $rmgr{ $conf{"rmgr"} }{inner_rmgr};
- }
-
+ if ( defined $rmgr{ $conf{"rmgr"} } ) {
+ setup_rmgr( $conf{"rmgr"} );
return;
}
@@ -2589,13 +2595,78 @@
exit(1);
}
- # Now setup the variable for the test of the program.
- $conf{"rmgr"} = $ok[0];
- if ( defined $rmgr{ $conf{"rmgr"} }{inner_rmgr} ) {
- $cinner{rmgr} = $rmgr{ $conf{"rmgr"} }{inner_rmgr};
- } else {
- $cinner{rmgr} = $conf{"rmgr"};
+ setup_rmgr( $ok[0] );
+}
+
+# Find any active resource manager, that is --any or --all
+# have been passed on the command line so look for any resource
+# manager that have active jobs, if there is one active resource
+# manager use that one, if there are zero or many exit with an
+# error.
+sub find_any_rmgr {
+
+# If it's been set on the command line and it's valid then just use what
we are given.
+# Don't do any sanity checks here to cope with non-default installs.
+
+ if ( defined $rmgr{ $conf{"rmgr"} } ) {
+ setup_rmgr( $conf{"rmgr"} );
+ return;
+ }
+
+ if ( $conf{"rmgr"} ne "auto" ) {
+ printf("Error, resource manager \"$conf{rmgr}\" not supported\n");
+ exit(1);
+ }
+
+ my @installed;
+ foreach my $res ( sort( keys %rmgr ) ) {
+ next unless defined $rmgr{$res}{is_installed};
+ if ( $rmgr{$res}{is_installed}() ) {
+ push @installed, $res;
+
+ }
+ }
+
+ # One resource manager is installed, good.
+ if ( $#installed == 0 ) {
+ setup_rmgr( $installed[0] );
+ return;
+ }
+
+ # No resource managers are installed, bad.
+ if ( $#installed == -1 ) {
+ printf(
+"Error, multiple resource managers detected, use -Ormgr=\n"
+ );
+ push @installed, "local-fd";
+ push @installed, "local";
+ printf("@installed\n");
+ exit(1);
+ }
+
+ my @active;
+ foreach my $res (@installed) {
+ my @jobs = $rmgr{$res}{get_active_jobs}($user);
+ if ( $#jobs != -1 ) {
+ push @active, $res;
+ }
+ }
+
+ # Only one resource manager has active jobs, let's use it.
+ if ( $#active == 0 ) {
+ setup_rmgr( $active[0] );
+ return;
}
+
+ # Multiple resource managers are installed and have jobs,
+ # bouce back to the user to specify which one they want.
+ printf(
+"Error, multiple active resource managers detected, use -Ormgr=\n"
+ );
+ push @installed, "local-fd";
+ push @installed, "local";
+ printf("@installed\n");
+ exit(1);
}
sub get_all_jobids {
@@ -3325,8 +3396,6 @@
exit(0);
}
- find_rmgr();
-
if ($core_stack) {
if ( not defined $core_name or not defined $exe_name ) {
printf(
@@ -3347,6 +3416,8 @@
if ($full_report) {
+ find_rmgr();
+
if ( not job_is_running($full_report) ) {
printf( STDERR
"Job $full_report is not active, use --show-jobs to see active jobs\n"
@@ -3392,12 +3463,14 @@
}
if ($show_jobs) {
+ find_rmgr();
my @jobids = get_all_jobids($user);
print("@jobids\n");
exit(0);
}
if ($local_stats) {
+
if ($watch) {
while (1) {
maybe_clear_screen();
@@ -3488,6 +3561,8 @@
if ( $any or $all ) {
+ find_any_rmgr();
+
@jobids = get_all_jobids($user);
printf( "Active jobs (%d) are @jobids\n", $#jobids + 1 )
if $conf{"verbose"};
@@ -3500,6 +3575,8 @@
exit 1;
}
} else {
+ find_rmgr();
+
foreach my $jobid (@ARGV) {
if ( job_is_running($jobid) ) {
push @jobids, $jobid;
From codesite-noreply at google.com Sun Jun 14 20:04:44 2009
From: codesite-noreply at google.com (codesite-noreply at google.com)
Date: Sun, 14 Jun 2009 19:04:44 +0000
Subject: [padb-devel] [padb commit] r46 - Add install targets to the
Makefile and a very basic
Message-ID: <0016e644de6852b992046c539dd9@google.com>
Author: apittman
Date: Sun Jun 14 12:03:53 2009
New Revision: 46
Added:
trunk/README
trunk/gpl-2.0.txt
trunk/lgpl-2.1.txt
trunk/src/padb.conf
Modified:
trunk/src/Makefile
Log:
Add install targets to the Makefile and a very basic
README about how to build and install the software.
Also add copies of the LGPL and GPL to satisfy the legal
requirments. At this stage simply acknowledge the licence
that Quadrics release this software under rather than
update to a newer version.
Added: trunk/README
==============================================================================
--- (empty file)
+++ trunk/README Sun Jun 14 12:03:53 2009
@@ -0,0 +1,39 @@
+
+###
+This file is part of Padb.
+
+Padb is free software: you can redistribute it and/or modify
+it under the terms of the GNU Lesser General Public License as published by
+the Free Software Foundation, either version 2.1 of the License, or
+(at your option) any later version.
+
+Foobar is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public License
+along with Padb. If not, see .
+###
+
+Building:
+
+Padb itself is a perl script and does not require any building
+however some functionality uses helper programs which do. To build
+these programs enter the following commands:
+
+cd src
+make
+
+
+Installing:
+
+Padb needs to be available on all nodes where it is expected to run,
+this can either be achieved by installing to a shared directory
+or installing to every node in your cluster. In addition a optional
+configuration file can be installed to any node where the padb command
+might be executed. To install on a cluster run the following commands:
+
+cd src
+sudo make install
+sudo make config_install
Added: trunk/gpl-2.0.txt
==============================================================================
--- (empty file)
+++ trunk/gpl-2.0.txt Sun Jun 14 12:03:53 2009
@@ -0,0 +1,339 @@
+ GNU GENERAL PUBLIC LICENSE
+ Version 2, June 1991
+
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.,
+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+ Preamble
+
+ The licenses for most software are designed to take away your
+freedom to share and change it. By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users. This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it. (Some other Free Software Foundation software is covered by
+the GNU Lesser General Public License instead.) You can apply it to
+your programs, too.
+
+ When we speak of free software, we are referring to freedom, not
+price. Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+ To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+ For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have. You must make sure that they, too, receive or can get the
+source code. And you must show them these terms so they know their
+rights.
+
+ We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+ Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software. If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+ Finally, any free program is threatened constantly by software
+patents. We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary. To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+ The precise terms and conditions for copying, distribution and
+modification follow.
+
+ GNU GENERAL PUBLIC LICENSE
+ TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+ 0. This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License. The "Program", below,
+refers to any such program or work, and a "work based on the Program"
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language. (Hereinafter, translation is included without limitation in
+the term "modification".) Each licensee is addressed as "you".
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope. The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+ 1. You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+ 2. You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+ a) You must cause the modified files to carry prominent notices
+ stating that you changed the files and the date of any change.
+
+ b) You must cause any work that you distribute or publish, that in
+ whole or in part contains or is derived from the Program or any
+ part thereof, to be licensed as a whole at no charge to all third
+ parties under the terms of this License.
+
+ c) If the modified program normally reads commands interactively
+ when run, you must cause it, when started running for such
+ interactive use in the most ordinary way, to print or display an
+ announcement including an appropriate copyright notice and a
+ notice that there is no warranty (or else, saying that you provide
+ a warranty) and that users may redistribute the program under
+ these conditions, and telling the user how to view a copy of this
+ License. (Exception: if the Program itself is interactive but
+ does not normally print such an announcement, your work based on
+ the Program is not required to print an announcement.)
+
+These requirements apply to the modified work as a whole. If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works. But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+ 3. You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+ a) Accompany it with the complete corresponding machine-readable
+ source code, which must be distributed under the terms of Sections
+ 1 and 2 above on a medium customarily used for software interchange;
or,
+
+ b) Accompany it with a written offer, valid for at least three
+ years, to give any third party, for a charge no more than your
+ cost of physically performing source distribution, a complete
+ machine-readable copy of the corresponding source code, to be
+ distributed under the terms of Sections 1 and 2 above on a medium
+ customarily used for software interchange; or,
+
+ c) Accompany it with the information you received as to the offer
+ to distribute corresponding source code. (This alternative is
+ allowed only for noncommercial distribution and only if you
+ received the program in object code or executable form with such
+ an offer, in accord with Subsection b above.)
+
+The source code for a work means the preferred form of the work for
+making modifications to it. For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable. However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+ 4. You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License. Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+ 5. You are not required to accept this License, since you have not
+signed it. However, nothing else grants you permission to modify or
+distribute the Program or its derivative works. These actions are
+prohibited by law if you do not accept this License. Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+ 6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions. You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+ 7. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License. If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all. For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices. Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+ 8. If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded. In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+ 9. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time. Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number. If the Program
+specifies a version number of this License which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation. If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free
Software
+Foundation.
+
+ 10. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission. For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this. Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+ NO WARRANTY
+
+ 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+ 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+
+ END OF TERMS AND CONDITIONS
+
+ How to Apply These Terms to Your New Programs
+
+ If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+ To do so, attach the following notices to the program. It is safest
+to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+
+ Copyright (C)
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License along
+ with this program; if not, write to the Free Software Foundation, Inc.,
+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+Also add information on how to contact you by electronic and paper mail.
+
+If the program is interactive, make it output a short notice like this
+when it starts in an interactive mode:
+
+ Gnomovision version 69, Copyright (C) year name of author
+ Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show
w'.
+ This is free software, and you are welcome to redistribute it
+ under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License. Of course, the commands you use may
+be called something other than `show w' and `show c'; they could even be
+mouse-clicks or menu items--whatever suits your program.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the program, if
+necessary. Here is a sample; alter the names:
+
+ Yoyodyne, Inc., hereby disclaims all copyright interest in the program
+ `Gnomovision' (which makes passes at compilers) written by James Hacker.
+
+ , 1 April 1989
+ Ty Coon, President of Vice
+
+This General Public License does not permit incorporating your program into
+proprietary programs. If your program is a subroutine library, you may
+consider it more useful to permit linking proprietary applications with the
+library. If this is what you want to do, use the GNU Lesser General
+Public License instead of this License.
Added: trunk/lgpl-2.1.txt
==============================================================================
--- (empty file)
+++ trunk/lgpl-2.1.txt Sun Jun 14 12:03:53 2009
@@ -0,0 +1,504 @@
+ GNU LESSER GENERAL PUBLIC LICENSE
+ Version 2.1, February 1999
+
+ Copyright (C) 1991, 1999 Free Software Foundation, Inc.
+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+[This is the first released version of the Lesser GPL. It also counts
+ as the successor of the GNU Library Public License, version 2, hence
+ the version number 2.1.]
+
+ Preamble
+
+ The licenses for most software are designed to take away your
+freedom to share and change it. By contrast, the GNU General Public
+Licenses are intended to guarantee your freedom to share and change
+free software--to make sure the software is free for all its users.
+
+ This license, the Lesser General Public License, applies to some
+specially designated software packages--typically libraries--of the
+Free Software Foundation and other authors who decide to use it. You
+can use it too, but we suggest you first think carefully about whether
+this license or the ordinary General Public License is the better
+strategy to use in any particular case, based on the explanations below.
+
+ When we speak of free software, we are referring to freedom of use,
+not price. Our General Public Licenses are designed to make sure that
+you have the freedom to distribute copies of free software (and charge
+for this service if you wish); that you receive source code or can get
+it if you want it; that you can change the software and use pieces of
+it in new free programs; and that you are informed that you can do
+these things.
+
+ To protect your rights, we need to make restrictions that forbid
+distributors to deny you these rights or to ask you to surrender these
+rights. These restrictions translate to certain responsibilities for
+you if you distribute copies of the library or if you modify it.
+
+ For example, if you distribute copies of the library, whether gratis
+or for a fee, you must give the recipients all the rights that we gave
+you. You must make sure that they, too, receive or can get the source
+code. If you link other code with the library, you must provide
+complete object files to the recipients, so that they can relink them
+with the library after making changes to the library and recompiling
+it. And you must show them these terms so they know their rights.
+
+ We protect your rights with a two-step method: (1) we copyright the
+library, and (2) we offer you this license, which gives you legal
+permission to copy, distribute and/or modify the library.
+
+ To protect each distributor, we want to make it very clear that
+there is no warranty for the free library. Also, if the library is
+modified by someone else and passed on, the recipients should know
+that what they have is not the original version, so that the original
+author's reputation will not be affected by problems that might be
+introduced by others.
+
+ Finally, software patents pose a constant threat to the existence of
+any free program. We wish to make sure that a company cannot
+effectively restrict the users of a free program by obtaining a
+restrictive license from a patent holder. Therefore, we insist that
+any patent license obtained for a version of the library must be
+consistent with the full freedom of use specified in this license.
+
+ Most GNU software, including some libraries, is covered by the
+ordinary GNU General Public License. This license, the GNU Lesser
+General Public License, applies to certain designated libraries, and
+is quite different from the ordinary General Public License. We use
+this license for certain libraries in order to permit linking those
+libraries into non-free programs.
+
+ When a program is linked with a library, whether statically or using
+a shared library, the combination of the two is legally speaking a
+combined work, a derivative of the original library. The ordinary
+General Public License therefore permits such linking only if the
+entire combination fits its criteria of freedom. The Lesser General
+Public License permits more lax criteria for linking other code with
+the library.
+
+ We call this license the "Lesser" General Public License because it
+does Less to protect the user's freedom than the ordinary General
+Public License. It also provides other free software developers Less
+of an advantage over competing non-free programs. These disadvantages
+are the reason we use the ordinary General Public License for many
+libraries. However, the Lesser license provides advantages in certain
+special circumstances.
+
+ For example, on rare occasions, there may be a special need to
+encourage the widest possible use of a certain library, so that it becomes
+a de-facto standard. To achieve this, non-free programs must be
+allowed to use the library. A more frequent case is that a free
+library does the same job as widely used non-free libraries. In this
+case, there is little to gain by limiting the free library to free
+software only, so we use the Lesser General Public License.
+
+ In other cases, permission to use a particular library in non-free
+programs enables a greater number of people to use a large body of
+free software. For example, permission to use the GNU C Library in
+non-free programs enables many more people to use the whole GNU
+operating system, as well as its variant, the GNU/Linux operating
+system.
+
+ Although the Lesser General Public License is Less protective of the
+users' freedom, it does ensure that the user of a program that is
+linked with the Library has the freedom and the wherewithal to run
+that program using a modified version of the Library.
+
+ The precise terms and conditions for copying, distribution and
+modification follow. Pay close attention to the difference between a
+"work based on the library" and a "work that uses the library". The
+former contains code derived from the library, whereas the latter must
+be combined with the library in order to run.
+
+ GNU LESSER GENERAL PUBLIC LICENSE
+ TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+ 0. This License Agreement applies to any software library or other
+program which contains a notice placed by the copyright holder or
+other authorized party saying it may be distributed under the terms of
+this Lesser General Public License (also called "this License").
+Each licensee is addressed as "you".
+
+ A "library" means a collection of software functions and/or data
+prepared so as to be conveniently linked with application programs
+(which use some of those functions and data) to form executables.
+
+ The "Library", below, refers to any such software library or work
+which has been distributed under these terms. A "work based on the
+Library" means either the Library or any derivative work under
+copyright law: that is to say, a work containing the Library or a
+portion of it, either verbatim or with modifications and/or translated
+straightforwardly into another language. (Hereinafter, translation is
+included without limitation in the term "modification".)
+
+ "Source code" for a work means the preferred form of the work for
+making modifications to it. For a library, complete source code means
+all the source code for all modules it contains, plus any associated
+interface definition files, plus the scripts used to control compilation
+and installation of the library.
+
+ Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope. The act of
+running a program using the Library is not restricted, and output from
+such a program is covered only if its contents constitute a work based
+on the Library (independent of the use of the Library in a tool for
+writing it). Whether that is true depends on what the Library does
+and what the program that uses the Library does.
+
+ 1. You may copy and distribute verbatim copies of the Library's
+complete source code as you receive it, in any medium, provided that
+you conspicuously and appropriately publish on each copy an
+appropriate copyright notice and disclaimer of warranty; keep intact
+all the notices that refer to this License and to the absence of any
+warranty; and distribute a copy of this License along with the
+Library.
+
+ You may charge a fee for the physical act of transferring a copy,
+and you may at your option offer warranty protection in exchange for a
+fee.
+
+ 2. You may modify your copy or copies of the Library or any portion
+of it, thus forming a work based on the Library, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+ a) The modified work must itself be a software library.
+
+ b) You must cause the files modified to carry prominent notices
+ stating that you changed the files and the date of any change.
+
+ c) You must cause the whole of the work to be licensed at no
+ charge to all third parties under the terms of this License.
+
+ d) If a facility in the modified Library refers to a function or a
+ table of data to be supplied by an application program that uses
+ the facility, other than as an argument passed when the facility
+ is invoked, then you must make a good faith effort to ensure that,
+ in the event an application does not supply such function or
+ table, the facility still operates, and performs whatever part of
+ its purpose remains meaningful.
+
+ (For example, a function in a library to compute square roots has
+ a purpose that is entirely well-defined independent of the
+ application. Therefore, Subsection 2d requires that any
+ application-supplied function or table used by this function must
+ be optional: if the application does not supply it, the square
+ root function must still compute square roots.)
+
+These requirements apply to the modified work as a whole. If
+identifiable sections of that work are not derived from the Library,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works. But when you
+distribute the same sections as part of a whole which is a work based
+on the Library, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote
+it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Library.
+
+In addition, mere aggregation of another work not based on the Library
+with the Library (or with a work based on the Library) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+ 3. You may opt to apply the terms of the ordinary GNU General Public
+License instead of this License to a given copy of the Library. To do
+this, you must alter all the notices that refer to this License, so
+that they refer to the ordinary GNU General Public License, version 2,
+instead of to this License. (If a newer version than version 2 of the
+ordinary GNU General Public License has appeared, then you can specify
+that version instead if you wish.) Do not make any other change in
+these notices.
+
+ Once this change is made in a given copy, it is irreversible for
+that copy, so the ordinary GNU General Public License applies to all
+subsequent copies and derivative works made from that copy.
+
+ This option is useful when you wish to copy part of the code of
+the Library into a program that is not a library.
+
+ 4. You may copy and distribute the Library (or a portion or
+derivative of it, under Section 2) in object code or executable form
+under the terms of Sections 1 and 2 above provided that you accompany
+it with the complete corresponding machine-readable source code, which
+must be distributed under the terms of Sections 1 and 2 above on a
+medium customarily used for software interchange.
+
+ If distribution of object code is made by offering access to copy
+from a designated place, then offering equivalent access to copy the
+source code from the same place satisfies the requirement to
+distribute the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+ 5. A program that contains no derivative of any portion of the
+Library, but is designed to work with the Library by being compiled or
+linked with it, is called a "work that uses the Library". Such a
+work, in isolation, is not a derivative work of the Library, and
+therefore falls outside the scope of this License.
+
+ However, linking a "work that uses the Library" with the Library
+creates an executable that is a derivative of the Library (because it
+contains portions of the Library), rather than a "work that uses the
+library". The executable is therefore covered by this License.
+Section 6 states terms for distribution of such executables.
+
+ When a "work that uses the Library" uses material from a header file
+that is part of the Library, the object code for the work may be a
+derivative work of the Library even though the source code is not.
+Whether this is true is especially significant if the work can be
+linked without the Library, or if the work is itself a library. The
+threshold for this to be true is not precisely defined by law.
+
+ If such an object file uses only numerical parameters, data
+structure layouts and accessors, and small macros and small inline
+functions (ten lines or less in length), then the use of the object
+file is unrestricted, regardless of whether it is legally a derivative
+work. (Executables containing this object code plus portions of the
+Library will still fall under Section 6.)
+
+ Otherwise, if the work is a derivative of the Library, you may
+distribute the object code for the work under the terms of Section 6.
+Any executables containing that work also fall under Section 6,
+whether or not they are linked directly with the Library itself.
+
+ 6. As an exception to the Sections above, you may also combine or
+link a "work that uses the Library" with the Library to produce a
+work containing portions of the Library, and distribute that work
+under terms of your choice, provided that the terms permit
+modification of the work for the customer's own use and reverse
+engineering for debugging such modifications.
+
+ You must give prominent notice with each copy of the work that the
+Library is used in it and that the Library and its use are covered by
+this License. You must supply a copy of this License. If the work
+during execution displays copyright notices, you must include the
+copyright notice for the Library among them, as well as a reference
+directing the user to the copy of this License. Also, you must do one
+of these things:
+
+ a) Accompany the work with the complete corresponding
+ machine-readable source code for the Library including whatever
+ changes were used in the work (which must be distributed under
+ Sections 1 and 2 above); and, if the work is an executable linked
+ with the Library, with the complete machine-readable "work that
+ uses the Library", as object code and/or source code, so that the
+ user can modify the Library and then relink to produce a modified
+ executable containing the modified Library. (It is understood
+ that the user who changes the contents of definitions files in the
+ Library will not necessarily be able to recompile the application
+ to use the modified definitions.)
+
+ b) Use a suitable shared library mechanism for linking with the
+ Library. A suitable mechanism is one that (1) uses at run time a
+ copy of the library already present on the user's computer system,
+ rather than copying library functions into the executable, and (2)
+ will operate properly with a modified version of the library, if
+ the user installs one, as long as the modified version is
+ interface-compatible with the version that the work was made with.
+
+ c) Accompany the work with a written offer, valid for at
+ least three years, to give the same user the materials
+ specified in Subsection 6a, above, for a charge no more
+ than the cost of performing this distribution.
+
+ d) If distribution of the work is made by offering access to copy
+ from a designated place, offer equivalent access to copy the above
+ specified materials from the same place.
+
+ e) Verify that the user has already received a copy of these
+ materials or that you have already sent this user a copy.
+
+ For an executable, the required form of the "work that uses the
+Library" must include any data and utility programs needed for
+reproducing the executable from it. However, as a special exception,
+the materials to be distributed need not include anything that is
+normally distributed (in either source or binary form) with the major
+components (compiler, kernel, and so on) of the operating system on
+which the executable runs, unless that component itself accompanies
+the executable.
+
+ It may happen that this requirement contradicts the license
+restrictions of other proprietary libraries that do not normally
+accompany the operating system. Such a contradiction means you cannot
+use both them and the Library together in an executable that you
+distribute.
+
+ 7. You may place library facilities that are a work based on the
+Library side-by-side in a single library together with other library
+facilities not covered by this License, and distribute such a combined
+library, provided that the separate distribution of the work based on
+the Library and of the other library facilities is otherwise
+permitted, and provided that you do these two things:
+
+ a) Accompany the combined library with a copy of the same work
+ based on the Library, uncombined with any other library
+ facilities. This must be distributed under the terms of the
+ Sections above.
+
+ b) Give prominent notice with the combined library of the fact
+ that part of it is a work based on the Library, and explaining
+ where to find the accompanying uncombined form of the same work.
+
+ 8. You may not copy, modify, sublicense, link with, or distribute
+the Library except as expressly provided under this License. Any
+attempt otherwise to copy, modify, sublicense, link with, or
+distribute the Library is void, and will automatically terminate your
+rights under this License. However, parties who have received copies,
+or rights, from you under this License will not have their licenses
+terminated so long as such parties remain in full compliance.
+
+ 9. You are not required to accept this License, since you have not
+signed it. However, nothing else grants you permission to modify or
+distribute the Library or its derivative works. These actions are
+prohibited by law if you do not accept this License. Therefore, by
+modifying or distributing the Library (or any work based on the
+Library), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Library or works based on it.
+
+ 10. Each time you redistribute the Library (or any work based on the
+Library), the recipient automatically receives a license from the
+original licensor to copy, distribute, link with or modify the Library
+subject to these terms and conditions. You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties with
+this License.
+
+ 11. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License. If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Library at all. For example, if a patent
+license would not permit royalty-free redistribution of the Library by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Library.
+
+If any portion of this section is held invalid or unenforceable under any
+particular circumstance, the balance of the section is intended to apply,
+and the section as a whole is intended to apply in other circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system which is
+implemented by public license practices. Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+ 12. If the distribution and/or use of the Library is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Library under this License may add
+an explicit geographical distribution limitation excluding those countries,
+so that distribution is permitted only in or among countries not thus
+excluded. In such case, this License incorporates the limitation as if
+written in the body of this License.
+
+ 13. The Free Software Foundation may publish revised and/or new
+versions of the Lesser General Public License from time to time.
+Such new versions will be similar in spirit to the present version,
+but may differ in detail to address new problems or concerns.
+
+Each version is given a distinguishing version number. If the Library
+specifies a version number of this License which applies to it and
+"any later version", you have the option of following the terms and
+conditions either of that version or of any later version published by
+the Free Software Foundation. If the Library does not specify a
+license version number, you may choose any version ever published by
+the Free Software Foundation.
+
+ 14. If you wish to incorporate parts of the Library into other free
+programs whose distribution conditions are incompatible with these,
+write to the author to ask for permission. For software which is
+copyrighted by the Free Software Foundation, write to the Free
+Software Foundation; we sometimes make exceptions for this. Our
+decision will be guided by the two goals of preserving the free status
+of all derivatives of our free software and of promoting the sharing
+and reuse of software generally.
+
+ NO WARRANTY
+
+ 15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO
+WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW.
+EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR
+OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY
+KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE
+LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME
+THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+
+ 16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN
+WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY
+AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU
+FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR
+CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE
+LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING
+RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A
+FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF
+SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+DAMAGES.
+
+ END OF TERMS AND CONDITIONS
+
+ How to Apply These Terms to Your New Libraries
+
+ If you develop a new library, and you want it to be of the greatest
+possible use to the public, we recommend making it free software that
+everyone can redistribute and change. You can do so by permitting
+redistribution under these terms (or, alternatively, under the terms of the
+ordinary General Public License).
+
+ To apply these terms, attach the following notices to the library. It is
+safest to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least the
+"copyright" line and a pointer to where the full notice is found.
+
+
+ Copyright (C)
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301 USA
+
+Also add information on how to contact you by electronic and paper mail.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the library, if
+necessary. Here is a sample; alter the names:
+
+ Yoyodyne, Inc., hereby disclaims all copyright interest in the
+ library `Frob' (a library for tweaking knobs) written by James Random
Hacker.
+
+ , 1 April 1990
+ Ty Coon, President of Vice
+
+That's all there is to it!
+
+
Modified: trunk/src/Makefile
==============================================================================
--- trunk/src/Makefile (original)
+++ trunk/src/Makefile Sun Jun 14 12:03:53 2009
@@ -1,8 +1,20 @@
+INSTALL_DIR=/usr/local/
+CONFIG_DIR=/etc
+
FILES = Makefile minfo.c mpi_interface.h padb
minfo.x: minfo.c mpi_interface.h
cc -g minfo.c -o minfo.x -ldl -Wall
+
+install: minfo.x
+ /bin/mkdir -p ${INSTALL_DIR}/bin
+ /bin/cp minfo.x ${INSTALL_DIR}/bin/
+ /bin/cp padb ${INSTALL_DIR}/bin/
+
+make config_install:
+ /bin/mkdir -p ${CONFIG_DIR}
+ /bin/cp padb.conf ${CONFIG_DIR}/
clean:
/bin/rm -f minfo.x
Added: trunk/src/padb.conf
==============================================================================
--- (empty file)
+++ trunk/src/padb.conf Sun Jun 14 12:03:53 2009
@@ -0,0 +1,36 @@
+
+#
+# padb.conf.
+#
+
+# A sample configuration file provided for padb.
+#
+# padb has many runtime options to control it's behavior
+# in different ways. These options can be set of the command
+# line with the --config-option or -O options, in the environment
+# via ~/.padbrc or via this file.
+
+# Options are of the format key=value where in some cases value
+# is a comma separated list.
+
+# This file is loaded on the node where the padb command is called,
+# it's contents are passed and any options are then used globally
+# across the job on all nodes where padb is run. The local config file
+# is not parsed by remote padb processes.
+
+# Example options, use padb --config-option=help for more possibilities.
+
+# Resource manager:
+# If you have more than one resource manager installed
+# or padb thinks you do then you need to tell it which
+# one to use. Possible values here are slurm,orte,rms,mpd
+
+#rmgr=slurm
+
+# Stack-strip-above
+# In normal cases stack traces are truncated at known library
+# "progression functions" to permit ease of display and viewing.
+# If you are using a software stack padb doesn't know about then
+# you may over-ride the default list of progression functions to
+# improve the quality of stack traces given.
+#stack-strip-above=elan_waitWord,elan_pollWord,elan_deviceCheck,opal_condition_wait
From ashley at pittman.co.uk Sun Jun 14 22:33:27 2009
From: ashley at pittman.co.uk (Ashley Pittman)
Date: Sun, 14 Jun 2009 22:33:27 +0100
Subject: [padb-devel] Job-wide "top" interface.
Message-ID: <1245015207.7641.26.camel@localhost.localdomain>
All,
I've been putting some work into the proc-info option and have come up
with a workable "top" style mode, output of which is shown below.
Up until till now to do this the user had to provide --proc-info
--proc-format= but I'd like to simplify this to a single option
to make it easier to use as I can image it'll be a lot more useful that
the basic --proc-info option.
Does anybody want input as to what the option should be called or what
information is shown by default, the working name/options is
--proc-summary and a standard --proc-format is
"vp=vpid,hostname,pid,vmsize,vmrss,stat.state=S,pcpu=%cpu,name=command".
The format for this is a comma separated list of fields, if a equals is
present the left hand side is used for the key, the right hand side is
shows as the column heading.
As before it's possible to sort by the different fields using
-Oproc-sort-key=, would there any requirement to change the sort
order (ascending/descending) as well?
This code is available as r47 if anybody is interested in running it,
speak quickly however due to the imminent release.
Ashley,
ashley at alpha:~/code/padb/src$ ./padb -a --proc-summary
vpid hostname pid vmsize vmrss S %cpu command
15 alpha 23441 73384 kB 4372 kB R 11 a.out
14 alpha 23440 73384 kB 4368 kB R 11 a.out
13 alpha 23439 73384 kB 4372 kB R 13 a.out
12 alpha 23438 73384 kB 4376 kB R 8 a.out
11 alpha 23437 73384 kB 4388 kB R 11 a.out
10 alpha 23436 73384 kB 4364 kB R 11 a.out
9 alpha 23435 73384 kB 4376 kB R 9 a.out
8 alpha 23434 73384 kB 4372 kB R 8 a.out
7 alpha 23433 73384 kB 4360 kB R 9 a.out
6 alpha 23432 73384 kB 4368 kB R 10 a.out
5 alpha 23431 73384 kB 4360 kB R 13 a.out
4 alpha 23430 73384 kB 4372 kB R 10 a.out
3 alpha 23429 73384 kB 4376 kB R 11 a.out
2 alpha 23428 73384 kB 4364 kB R 7 a.out
1 alpha 23427 73384 kB 4384 kB R 11 a.out
0 alpha 23426 73384 kB 4364 kB R 9 a.out
ashley at alpha:~/code/padb/src$
--
Ashley Pittman, Bath, UK.
Padb - A parallel job inspection tool for cluster computing
http://padb.pittman.org.uk
From codesite-noreply at google.com Sun Jun 14 22:47:02 2009
From: codesite-noreply at google.com (codesite-noreply at google.com)
Date: Sun, 14 Jun 2009 21:47:02 +0000
Subject: [padb-devel] [padb commit] r47 - Add a new mode --proc-summary to
replace --proc-info --proc-format=
Message-ID: <00163616451bbd70af046c55e1f7@google.com>
Author: apittman
Date: Sun Jun 14 14:28:13 2009
New Revision: 47
Modified:
trunk/src/padb
Log:
Add a new mode --proc-summary to replace --proc-info --proc-format=
proc-format now has a sensible default which should make this a
lot easier to use and access.
Modified: trunk/src/padb
==============================================================================
--- trunk/src/padb (original)
+++ trunk/src/padb Sun Jun 14 14:28:13 2009
@@ -561,7 +561,8 @@
my $core_name;
my $exe_name;
-my @proc_format;
+my $proc_format =
+ "vp=vpid,hostname,pid,vmsize,vmrss,stat.state=S,pcpu=%cpu,name=command";
my $input_file;
my $compress;
@@ -608,7 +609,7 @@
"strip-above-wait!" => \$strip_above_wait,
"watch!" => \$watch,
"local-stats" => \$local_stats,
- "proc-format=s" => \@proc_format,
+ "proc-format=s" => \$proc_format,
"show-jobs" => \$show_jobs,
"norc" => \$norc,
"config-file=s" => \$configfile
@@ -2798,24 +2799,36 @@
print("$s\n");
}
+# Nicely format process information.
+# XXX: proc-sort-key should probably sort on column headers as
+# well as keys.
sub show_proc_format {
my ( $nlines, $mode, $handle ) = @_;
my $lines = $nlines->{lines};
my @proc_format_array;
+ my %proc_format_header;
my $show_fields = 0;
my %proc_format_lengths;
- # Split the command line options into a array.
- foreach my $format (@proc_format) {
- my @i = split( ",", $format );
- foreach my $j (@i) {
- my $key = lc($j);
- push @proc_format_array, $key;
- $proc_format_lengths{$key} = length($key);
- $show_fields = 1 if ( $key eq "fields" );
+ my $separator = $conf{"column-seperator"};
+
+ my @columns = split( ",", $proc_format );
+ foreach my $column (@columns) {
+
+ $show_fields = 1 if ( $column eq "fields" );
+
+ my ( $name, $desc ) = split( "=", $column );
+ if ( defined $desc ) {
+ push @proc_format_array, lc($name);
+ $proc_format_header{ lc($name) } = $desc;
+ $proc_format_lengths{ lc($name) } = length($desc);
+ } else {
+ push @proc_format_array, lc($column);
+ $proc_format_header{ lc($column) } = $column;
+ $proc_format_lengths{ lc($column) } = length($column);
}
}
@@ -2827,7 +2840,7 @@
if ( $data =~ /([\w\.]+)\:[ \t]*(.+)/ ) {
my $key = lc($1);
- next unless defined $proc_format_lengths{$key};
+ next unless defined $proc_format_lengths{$key} or
$show_fields;
if ( length($2) > $proc_format_lengths{$key} ) {
$proc_format_lengths{$key} = length($2);
@@ -2843,15 +2856,18 @@
}
push @all, \%hash;
}
+
@all = sort_proc_hashes( $conf{"proc-sort-key"}, @all );
if ( $conf{"proc-show-header"} ) {
my @res;
foreach my $key (@proc_format_array) {
- my $l .= sprintf( "%$proc_format_lengths{$key}s", $key );
+ my $l .= sprintf( "%-$proc_format_lengths{$key}s",
+ $proc_format_header{$key} );
push @res, $l;
}
- print "@res\n";
+ my $line = join( $separator, @res );
+ print "$line\n";
#print "@proc_format_array\n";
}
@@ -2859,14 +2875,14 @@
my @res;
my @res;
foreach my $key (@proc_format_array) {
- my $value = "?";
+ my $value = "??";
if ( defined $hash->{$key} ) {
$value = $hash->{$key};
-
}
push @res, sprintf( "%$proc_format_lengths{$key}s", $value );
}
- print "@res\n";
+ my $line = join( $separator, @res );
+ print "$line\n";
}
}
@@ -2919,7 +2935,7 @@
print("$data\n");
}
}
- } elsif ( $mode eq "pinfo" and $#proc_format != -1 ) {
+ } elsif ( $mode eq "proc-summary" ) {
show_proc_format( $nlines, $mode, $handle );
}
}
@@ -3508,7 +3524,7 @@
);
}
- if ( defined $mode && $mode eq "pinfo" && ( $#proc_format != -1 ) ) {
+ if ( defined $mode && $mode eq "proc-summary" ) {
$line_formatted = 1;
}
@@ -4865,7 +4881,9 @@
show_task_file( $vp, "$dir/status" );
show_task_file( $vp, "$dir/wchan", "wchan" );
show_task_file( $vp, "$dir/stat", "stat" );
- if ( $confInner{"proc-shows-stat"} ) {
+ if ( $confInner{"proc-shows-stat"}
+ or $confInner{mode} eq "proc-summary" )
+ {
show_task_stat_file( $vp, "$dir/stat", "stat" );
}
@@ -6064,6 +6082,14 @@
"proc-shows-maps" => 0,
"proc-shows-stat" => 0
}
+
+ };
+
+ $allfns{"proc-summary"} = {
+ 'handler_all' => \&show_proc_all,
+ 'arg_long' => 'proc-summary',
+ 'help' => "Show process information in top format",
+ 'options_i' => { "column-seperator" => " ", }
};
From ashley at pittman.co.uk Sun Jun 14 22:54:57 2009
From: ashley at pittman.co.uk (Ashley Pittman)
Date: Sun, 14 Jun 2009 22:54:57 +0100
Subject: [padb-devel] Release candidate
Message-ID: <1245016497.7641.44.camel@localhost.localdomain>
All,
My plan was always to make an initial early "stable" release of padb
shortly after taking up work on it again and that time has now come.
The current subversion code contains the previous Quadrics release with
what modifications are needed to work on a variety of non-quadrics
machines and software stacks. It's also got a small number of
non-intrusive fixes and new functionality however anything major has
been put on hold to allow a swift and stable release.
I intend to cut a release candidate, probably Wednesday 24th and all
being well to make a stable release Monday 22nd. Currently there are no
outstanding fixes to go in or bugs to be fixed, if you know of anything
please speak now. The build/install system such as it is desperately
need work and I need to work on what goes in and how to build the source
tarball.
I've tried hard not to break anything and I've tested what I can but if
there is anything you particularly care about then I'd appreciate you
testing it and giving me any feedback.
One thing that springs to mind is the location of edb on Quadrics
systems, the code that's there tries to look for edb in the same install
dir as padb but this is no longer valid, is the edb on path the Elan4
version or should I be looking for /usr/lib64/qsnet/elan4/bin/edb?
Yours,
Ashley Pittman.
--
Ashley Pittman, Bath, UK.
Padb - A parallel job inspection tool for cluster computing
http://padb.pittman.org.uk
From codesite-noreply at google.com Mon Jun 15 10:13:19 2009
From: codesite-noreply at google.com (codesite-noreply at google.com)
Date: Mon, 15 Jun 2009 09:13:19 +0000
Subject: [padb-devel] [padb commit] r48 - Greatly simplify the code to look
for install location
Message-ID: <0016364ecdfc1c2535046c5f7853@google.com>
Author: apittman
Date: Mon Jun 15 01:56:37 2009
New Revision: 48
Modified:
trunk/src/padb
Log:
Greatly simplify the code to look for install location
and edb/minfo.x helper programs, just take edb from the
default install location and minfo.x for the directory
where padb itself is installed.
Modified: trunk/src/padb
==============================================================================
--- trunk/src/padb (original)
+++ trunk/src/padb Mon Jun 15 01:56:37 2009
@@ -393,39 +393,19 @@
my $norc = 0;
my $configfile = "/etc/padb.conf";
-sub find_i_dir {
-
- # If padb is running from anywhere that looks like
- # usr/bin then assume the RPM has been either installed
- # or unpacked somewhere and get scripts from the
- # equivalent place. If it doesn't match usr/bin then
- # I don't know how it got installed so assume everything
- # is in the current directory.
- # This allows -Oedb=/path/to/edb to override.
-
- # This probably needs re-writing for a non-Quadrics world,
- # if we aren't installed somewhere however look for the
- # helper code in the current directory.
- my $installdir = "usr/bin/padb";
- my $edb;
- if ( $0 =~ /(.*)$installdir/ ) {
- return "$1usr/lib/qsnet/elan4/bin/"
- if ( -d "$1usr/lib/qsnet/elan4/bin/" );
- return "$1usr/lib64/qsnet/elan4/bin/"
- if ( -d "$1usr/lib64/qsnet/elan4/bin/" );
- }
-
- return "./";
-}
-
+# Look for edb in the default install location only.
sub find_edb {
- my $dir = find_i_dir();
- return $dir . "edb";
+ return "/usr/lib/qsnet/elan4/bin/"
+ if ( -d "/usr/lib/qsnet/elan4/bin/" );
+ return "/usr/lib64/qsnet/elan4/bin/"
+ if ( -d "/usr/lib64/qsnet/elan4/bin/" );
+ return "edb";
}
+# Look for minfo.x in the same directory as padb.
sub find_minfo {
- my $dir = find_i_dir();
- return $dir . "minfo.x";
+ my $dir = dirname($0);
+ return "$dir/minfo.x";
}
###############################################################################
From codesite-noreply at google.com Mon Jun 15 19:12:44 2009
From: codesite-noreply at google.com (codesite-noreply at google.com)
Date: Mon, 15 Jun 2009 18:12:44 +0000
Subject: [padb-devel] [padb commit] r49 - Add a new "modes" page with
examples and hence
Message-ID: <0016e6454006389488046c670186@google.com>
Author: apittman
Date: Mon Jun 15 10:54:13 2009
New Revision: 49
Added:
trunk/doc/modes.html
Modified:
trunk/doc/header.html
trunk/doc/upload_website
trunk/doc/usage.html
Log:
Add a new "modes" page with examples and hence
remove some of the text from usage.
Modified: trunk/doc/header.html
==============================================================================
--- trunk/doc/header.html (original)
+++ trunk/doc/header.html Mon Jun 15 10:54:13 2009
@@ -2,11 +2,16 @@
Parallel Application Debugger
-Padb
-usage
-download
-patches
-mailing lists
+
Added: trunk/doc/modes.html
==============================================================================
--- (empty file)
+++ trunk/doc/modes.html Mon Jun 15 10:54:13 2009
@@ -0,0 +1,877 @@
+Modes of operation.
+
+Padb has a number of modes of operation depending on what data you
want it to report or what action
+you want it to perform, some like --kill generate no output but
others can generate significant
+amounts of data. This page attempts to give a summary of available modes.
+
+
+
+Some modes of operation, for example stack traces, generate information per
+process in a parallel job, some modes collate information from multiple
processes
+into a single status report for the job. For modes where information is
+process specific a number of options are offered to reduce the amount of
+information displayed to the screen. Without any of these options
specified
+padb will prefix each line of output with the vpid followed by a
colon ": ".
+The --compress-long option will print a header for each process and
+display the information for that rank below the header without any
per-line prefix.
+The --compress option will do the same however will also attempt to
merge
+output where multiple processes in the job report identical output into a
single
+report.
+Finally there is a --tree option which works well with stack traces.
+
+
+
+For modes where each rank is treated independently the --rank can
be given to target a specific processes only,
+this option can be specified multiple times to specify multiple ranks.
+
+
+
+Padb can be told to loop, performing the same query over and over
again. This is enabled by the --watch flag
+and further controlled by --config-option=interval=<seconds>
and --config-option=watch-clears-screen=<bool>
+options, the default values for these options are 10 and 1 respectively.
+
+
+
+All examples on this page show padb targeting a single specific
job, either by providing a number job identifier
+on the command line or via the --all or --any options. See
usage page for information
+on selecting which jobs.
+
+Process state
+The --proc-summary mode shows basic information about running
+processes, presented one process per line. Users can control which
information
+is shown using the --proc-format option.
+
+
+
+vpid hostname pid vmsize vmrss S %cpu command
+ 7 i3 2623 160336 kB 4464 kB R 49 a.out
+ 6 i2 2616 160336 kB 4464 kB R 48 a.out
+ 5 i1 2615 160336 kB 4460 kB R 47 a.out
+ 4 fnarp 2789 160336 kB 4464 kB R 44 a.out
+ 3 i3 2622 160336 kB 4464 kB R 49 a.out
+ 2 i2 2615 160336 kB 4464 kB R 49 a.out
+ 1 i1 2614 160336 kB 4468 kB R 47 a.out
+ 0 fnarp 2788 160336 kB 4464 kB R 44 a.out
+
+ |
+
+
+The config option proc-sort-key controls which column the table is
sorted by, the default is vpid.
+
+Per-process process state
+
+The --proc-info mode reports a much more complete report about the
state of the process but doesn't easily reduce
+when run across multiple ranks. It can be controlled by the
proc-shows-proc (default: 1), proc-shows-task
+(default: 0), proc-shows-fds (default:1 ) and
proc-shows-maps (default: 0) configuration options. This output
+also shows possible columns when running in "Process state" mode.
+
+
+
+hostname:fnarp
+exe:/home/ashley/IMB/imb/src/IMB-MPI1
+Name: IMB-MPI1
+State: R (running)
+Tgid: 7743
+Pid: 7743
+PPid: 7739
+TracerPid: 0
+Uid: 1000 1000 1000 1000
+Gid: 1000 1000 1000 1000
+FDSize: 64
+Groups: 1000
+VmPeak: 251056 kB
+VmSize: 251056 kB
+VmLck: 0 kB
+VmHWM: 99820 kB
+VmRSS: 99820 kB
+VmData: 93792 kB
+VmStk: 84 kB
+VmExe: 68 kB
+VmLib: 5320 kB
+VmPTE: 408 kB
+Threads: 1
+SigQ: 0/16382
+SigPnd: 0000000000000000
+ShdPnd: 0000000000000000
+SigBlk: 0000000000000000
+SigIgn: 0000000000000000
+SigCgt: 00000001800104e0
+CapInh: 0000000000000000
+CapPrm: 0000000000000000
+CapEff: 0000000000000000
+voluntary_ctxt_switches: 1200
+nonvoluntary_ctxt_switches: 27417502
+wchan: 0
+stat: 7743 (IMB-MPI1) R 7739 7739 2505 768 7739 4202496 35270 0 6 0 44352
110655 0 0 20 0 1 0 372663 257081344 24955 18446744073709551615 4194304
4261588 140733227100464 18446744073709551615 140479664939751 0 0 0 66784 0
0 0 17 0 0 0 0 0 0
+fd0: pipe:[56964] (0 00)
+fd1: /dev/pts/5 (0 0100002)
+fd2: pipe:[56965] (0 01)
+fd3: socket:[56977] (0 04002)
+fd4: socket:[56978] (0 02)
+fd5: socket:[56983] (0 04002)
+fd6: socket:[56985] (0 04002)
+fd7: socket:[56992] (0 04002)
+fd8: socket:[57010] (0 04002)
+fd9: socket:[57012] (0 04002)
+fd10: socket:[57016] (0 04002)
+fd11: socket:[57017] (0 04002)
+fd12: socket:[57022] (0 04002)
+fd13: socket:[57023] (0 04002)
+fd14: socket:[57024] (0 04002)
+fd15: socket:[57026] (0 04002)
+fd29: pipe:[56966] (0 01)
+pcpu: 47
+
+ |
+
+
+
+Stack traces
+The --stack-trace option, best used as shown here with the
--tree option
+shows stack traces for each process in the job. Stack traces are
shown "backwards" with main()
+at the top to facilitate the tree view shown here.
+
+
+
+-----------------
+[0-5] (6 processes)
+-----------------
+main() at bench/tping.c:2306
+ -----------------
+ [0-1,4] (3 processes)
+ -----------------
+ timed_ping() at bench/tping.c:345
+ elan_tportRxWait() at elan4/tportRx.c:1194
+ _elan_tportRxWait() at elan4/tportRx.c:1053
+ elan_pollWord() at common/progress.c:144
+ -----------------
+ [2-3,5] (3 processes)
+ -----------------
+ timed_ping() at bench/tping.c:340
+ elan_tportTxWait() at elan4/tportTx.c:940
+ elan_wait() at common/events.c:288
+ _elan_tportTxWait() at elan4/tportTx.c:920
+ elan_waitWord() at common/progress.c:819
+-----------------
+6 (1 processes)
+-----------------
+main() at bench/tping.c:2362
+ elan_gsync() at elan4/groupGsync.c:149
+ elan_gsyncShm() at elan4/groupGsync.c:464
+ elan_gsyncNet() at elan4/groupGsync.c:99
+ elan_gsyncGeneric() at elan4/groupGsync.c:76
+ _elan_aeventWait() at elan4/devent.c:361
+ elan_deviceCheck() at common/progress.c:847
+
+ |
+
+
+The config options stack-shows-locals and stack-shows-params
can be enabled to display more information in the stack
+trace, these are disabled by default and make the tree based reporting
very difficult so are best used in conjunction with the
+--rank option.
+
+
+Stack traces on Linux often show functions below main(), these are
automatically stripped unless the flag
+--nostrip-below-main is provided. Likewise padb knows the
core "progression" functions for several parallel
+stacks and will strip functions of the other end of stacks unless the
--nostrip-above-wait flag is given. The list of function names to
strip beyond can be set with the stack-strip-above and
stack-strip-below configuration options, each one
+taking a comma separated list of function names.
+
+
MPI Specific modes
+
+MPI message queues
+The option --message-queue will tell padb to read the MPI
message
+queues from your application if possible. Here shown with the
--compress option.
+
+
+
+----------------
+0
+----------------
+comm0: name: 'MPI_COMM_WORLD'
+comm0: rank: '0'
+comm0: size: '2'
+comm0: id: '(nil)'
+comm0: Rank: local 0 global 0
+comm0: Rank: local 1 global 1
+comm1: name: 'MPI_COMM_SELF'
+comm1: rank: '0'
+comm1: size: '1'
+comm1: id: '0x1'
+comm2: name: 'MPI_COMM_NULL'
+comm2: rank: '-2'
+comm2: size: '0'
+comm2: id: '0x2'
+comm3: name: 'MPI COMMUNICATOR 4 DUP FROM 0'
+comm3: rank: '0'
+comm3: size: '2'
+comm3: id: '0x4'
+comm3: Rank: local 0 global 0
+comm3: Rank: local 1 global 1
+comm4: name: 'MPI COMMUNICATOR 5 DUP FROM 0'
+comm4: rank: '0'
+comm4: size: '2'
+comm4: id: '0x5'
+comm4: Rank: local 0 global 0
+comm4: Rank: local 1 global 1
+comm5: name: 'MPI COMMUNICATOR 28 SPLIT FROM 4'
+comm5: rank: '0'
+comm5: size: '1'
+comm5: id: '0x1c'
+----------------
+1
+----------------
+comm0: name: 'MPI_COMM_WORLD'
+comm0: rank: '1'
+comm0: size: '2'
+comm0: id: '(nil)'
+comm0: Rank: local 0 global 0
+comm0: Rank: local 1 global 1
+comm1: name: 'MPI_COMM_SELF'
+comm1: rank: '0'
+comm1: size: '1'
+comm1: id: '0x1'
+comm2: name: 'MPI_COMM_NULL'
+comm2: rank: '-2'
+comm2: size: '0'
+comm2: id: '0x2'
+comm3: name: 'MPI COMMUNICATOR 4 DUP FROM 0'
+comm3: rank: '1'
+comm3: size: '2'
+comm3: id: '0x4'
+comm3: Rank: local 0 global 0
+comm3: Rank: local 1 global 1
+comm4: name: 'MPI COMMUNICATOR 5 DUP FROM 0'
+comm4: rank: '1'
+comm4: size: '2'
+comm4: id: '0x5'
+comm4: Rank: local 0 global 0
+comm4: Rank: local 1 global 1
+comm5: name: 'MPI COMMUNICATOR 28 SPLIT FROM 4'
+comm5: rank: '0'
+comm5: size: '1'
+comm5: id: '0x1c'
+
+ |
+
+
+Collective information
+If you are using a patched MPI library it's also
possible for padb to
+display the state of collective operations across your job using the
--deadlock mode.
+
+
+
+Information for group '0x4' (MPI COMMUNICATOR 4 DUP FROM 0)
+Group members [1-3] (size 4) are in call 2 to Barrier.
+Group member 0 (size 4) has completed call 1 to Barrier.
+Group member 0 (size 4) is not in a call to the collectives.
+Information for group '0x5' (MPI COMMUNICATOR 5 DUP FROM 0)
+Group member 0 (size 4) is in call 2 to Barrier.
+Group members [1-3] (size 4) have completed call 1 to Barrier.
+Group members [1-3] (size 4) are not in a call to the collectives.
+Total: 6 groups of which 2 are in use.
+
+ |
+
+
+Quadrics specific modes
+The --set-debug, --group and --statistics modes are
QsNet specific. The --deadlock mode
+performs the same function as --group for MPI programs.
+
+Process watch
+The --mpi-watch mode in padb will cause padb to inspect the
parallel job and write a single
+line of output to the screen, each character representing a process in the
parallel job. Each character
+has a different value depending on the state of that process at the time
of sampling. This provides a quick
+way to see the state of the program and if individual ranks are blocked in
comms or actively working.
+
+
+
+When used with the --watch and
--config-option=watch-clears-screen=0 option it becomes possible
+to see progress of the application over time, this trace shows the popular
IMB benchmarking application, those
+familiar with it will immediately see the different stages of benchmark
and how each stage uses more and more processes
+over time with any unused ones blocking in MPI_Barrier.
+
+
+
+u: unexpected messages U: unexpected and other messages
+s: sending messages r: receiving messages m: sending and receiving
+b: Barrier B: Broadcast g: Gather G: AllGather r: reduce: R: AllReduce
+a: alltoall A: alltoalls w: waiting
+.: consuming CPU cycles ,: using CPU but no queue data -: sleeping *: error
+rrbbbbbb
+rrbbbbbb
+rrbbbbbb
+rrbbbbbb
+rrbbbbbb
+ssbbbbbb
+rsbbbbbb
+r.bbbbbb
+mmbbbbbb
+mmbbbbbb
+mmbbbbbb
+mmbbbbbb
+mmbbbbbb
+mmbbbbbb
+mmbbbbbb
+m.bbbbbb
+rrbbbbbb
+r.bbbbbb
+rrbbbbbb
+rrbbbbbb
+rrbbbbbb
+rmbbbbbb
+mmbbbbbb
+mmbbbbbb
+rrrrbbbb
+rrrrbbbb
+rrrrbbbb
+rrrrbbbb
+rrrrbbbb
+rrrrbbbb
+rrrrbbbb
+rrrrbbbb
+rrrrbbbb
+rrrrbbbb
+rrrrbbbb
+mmmmbbbb
+mrmmbbbb
+mmrrbbbb
+mmrmbbbb
+rmmrbbbb
+rrrrrrrr
+rrrrrrrr
+rrrrrrrr
+rrrrrrrr
+rrrrrrrr
+rrrrrrrr
+rrrrrrrr
+rrrrrrrr
+rrrrrrrr
+rrrrrrrr
+mmrmmmrm
+mmrmrmrm
+mmrrrmmr
+mmmmmrrr
+mmmmmrrr
+bmbmmbrb
+mmbbbbbb
+mmbbbbbb
+.mbbbbbb
+mmbbbbbb
+mmbbbbbb
+mmbbbbbb
+mmbbbbbb
+mmbbbbbb
+smbbbbbb
+mmbbbbbb
+mmbbbbbb
+mmbbbbbb
+mmbbbbbb
+mmbbbbbb
+mmmmbbbb
+mmmmbbbb
+mmmmbbbb
+mmmmbbbb
+mmmmbbbb
+mmmmbbbb
+mmmmbbbb
+mmmmbbbb
+mmmmbbbb
+mmmmbbbb
+mmmmbbbb
+mmmmbbbb
+mmmmbbbb
+mmmmbbbb
+mmmmbbbb
+msmmbbbb
+smmsbbbb
+mmmsbbbb
+msmmbbbb
+mmmmbbbb
+mmmmbbbb
+mmmmmmmm
+mmmmmmmm
+mmmmmmmm
+mmmmmmmm
+mmmmmmmm
+mmmmmmmm
+mmmmmmmm
+mmmmmmmm
+mmmmmmmm
+mmmmmmmm
+mmmmmmmm
+mmmmmmmm
+mmmmmmmm
+mmRmmmmR
+mmmmmmmm
+mmmmmmmm
+mmmmmsss
+smmmmmms
+bRbmmssb
+mmmsmsmm
+smmmmmsm
+mmmmsmms
+bmsmmbmR
+RRbbbbbb
+RRbbbbbb
+RRbbbbbb
+RRbbbbbb
+RRbbbbbb
+RRbbbbbb
+RRbbbbbb
+RRbbbbbb
+RRbbbbbb
+RRbbbbbb
+RRRRbbbb
+RRRRbbbb
+RRRRbbbb
+RRRRbbbb
+RRRRbbbb
+RRRRbbbb
+RRRRbbbb
+RRRRbbbb
+RRRRbbbb
+RRRRbbbb
+RRRRbbbb
+RRRRbbbb
+RRRRbbbb
+RRRRbbbb
+RRRRRRRR
+RRRRRRRR
+RRRRRRRR
+RRRRRRRR
+RRRRRRRR
+RRRRRRRR
+RRRRRRRR
+RRRRRRRR
+RRRRRRRR
+RRRRRRRR
+RRRRRRRR
+RRRRRRRR
+RRRRRRRR
+RRRRRRRR
+RRRRRRRR
+RRRRRRRR
+RRRRRRRR
+RRRRRRRR
+RRRRRRRR
+RRRRRRRR
+RRRRRRRR
+RRRRRRRR
+rrbbbbbb
+rrbbbbbb
+rrbbbbbb
+rrbbbbbb
+rrbbbbbb
+rrbbbbbb
+rrbbbbbb
+rrbbbbbb
+rrrrbbbb
+rrrrbbbb
+rrrrbbbb
+rrrrbbbb
+rrrrbbbb
+rrrrbbbb
+rrrrbbbb
+rrrrbbbb
+rrrrbbbb
+rrrrbbbb
+rrrrbbbb
+rrrrbbbb
+rbrBbbbb
+rrrrbbbb
+rrrrbbbb
+rrrrrrrr
+rrrrrrrr
+rrrrrrrr
+rrrrrrrr
+rrrrrrrr
+rrrrrrrr
+rrrrrrrr
+rrrrrrrr
+rrrrrrrr
+rrrrrrrr
+rrrrrrrr
+rrrrrrrr
+rrrrrrrr
+rrrrrrrr
+rrrrrrrr
+rrrrrrrr
+rrrrrrrr
+rrrrrrrr
+rrbRRrrb
+rrrrrrrr
+g.bbbbbb
+..bbbbbb
+..bbbbbb
+..bbbbbb
+..bbbbbb
+..bbbbbb
+..bbbbbb
+....bbbb
+....bbbb
+....bbbb
+....bbbb
+....bbbb
+....bbbb
+....bbbb
+....bbbb
+....bbbb
+....bbbb
+....bbbb
+....bbbb
+....bbbb
+....bbbb
+.R.Rbbbb
+....bbbb
+....bbbb
+....bbbb
+....RRRR
+........
+........
+........
+........
+........
+........
+........
+........
+........
+........
+........
+........
+........
+........
+........
+........
+........
+..bbbbbb
+..bbbbbb
+.bbbbbbb
+..bbbbbb
+..bbbbbb
+..bbbbbb
+..bbbbbb
+..bbbbbb
+....bbbb
+....bbbb
+....bbbb
+....bbbb
+....bbbb
+....bbbb
+....bbbb
+....bbbb
+....bbbb
+....bbbb
+....bbbb
+....bbbb
+....bbbb
+....bbbb
+....bbbb
+....bbbb
+....bbbb
+....bbbb
+....bbbb
+....bbbb
+....bbbb
+....bbbb
+bb.bbbbb
+....bbbb
+....bbbb
+........
+........
+........
+........
+........
+........
+........
+........
+........
+........
+........
+........
+........
+........
+........
+........
+........
+........
+........
+........
+........
+........
+........
+........
+........
+........
+........
+..bbbbbb
+..bbbbbb
+..bbbbbb
+..bbbbbb
+..bbbbbb
+..bbbbbb
+..bbbbbb
+..bbbbbb
+..bbbbbb
+....bbbb
+....bbbb
+....bbbb
+....bbbb
+....bbbb
+....bbbb
+....bbbb
+....bbbb
+....bbbb
+....bbbb
+....bbbb
+....bbbb
+....bbbb
+....bbbb
+....bbbb
+....bbbb
+....bbbb
+....bbbb
+....bbbb
+....bbbb
+........
+........
+........
+........
+........
+....b...
+........
+.R.Rb.b.
+........
+........
+........
+........
+........
+........
+........
+........
+........
+........
+........
+........
+........
+........
+........
+........
+........
+........
+........
+........
+........
+........
+........
+........
+........
+ggbbbbbb
+ggbbbbbb
+ggbbbbbb
+ggbbbbbb
+ggbbbbbb
+ggbbbbbb
+ggbbbbbb
+ggggbbbb
+ggggbbbb
+ggggbbbb
+ggggbbbb
+ggggbbbb
+ggggbbbb
+ggggbbbb
+ggggbbbb
+ggggbbbb
+ggggbbbb
+ggggbbbb
+ggggbbbb
+ggggbbbb
+ggggbbbb
+ggggbbbb
+gggggggg
+gggggggg
+gggggggg
+gggggggg
+gggggggg
+gggggggg
+gggggggg
+gggggggg
+gggggggg
+gggggggg
+gggggggg
+gggggggg
+gggggggg
+gggggggg
+gggggggg
+gggggggg
+gggggggg
+gggggggg
+gggggggg
+gggggggg
+gggggggg
+gggggggg
+gggggggg
+.bbbbbbb
+..bbbbbb
+..bbbbbb
+..bbbbbb
+..bbbbbb
+..bbbbbb
+....bbbb
+....bbbb
+....bbbb
+....bbbb
+....bbbb
+....bbbb
+....bbbb
+....bbbb
+....bbbb
+....bbbb
+....bbbb
+....bbbb
+bbb.bbbb
+....bbbb
+........
+........
+........
+........
+........
+.bbbbb.b
+........
+........
+........
+.b.bb.b.
+r.b.bb.b
+..bbbbbb
+..bbbbbb
+g.bbbbbb
+.Rbbbbbb
+..bbbbbb
+....bbbb
+....bbbb
+....bbbb
+....bbbb
+....bbbb
+....bbbb
+....bbbb
+....bbbb
+....bbbb
+....bbbb
+....bbbb
+........
+........
+........
+........
+bR.RR.Bb
+........
+........
+........
+........
+........
+........
+........
+bRbRR.Rb
+g.gbbbbb
+R.bbbbbb
+....bbbb
+RRB.bbbb
+....bbbb
+.*..bbbb
+bb.bbbbb
+RbRbbRbR
+........
+........
+........
+.*......
+R*...R..
+Rabbbbbb
+aabbbbbb
+aabbbbbb
+aabbbbbb
+aaaabbbb
+aaaabbbb
+aaaabbbb
+aaaabbbb
+aaaabbbb
+aaaabbbb
+aaaabbbb
+aaaabbbb
+aaaabbbb
+aaaabbbb
+aaaaaaaa
+aaaaaaaa
+aaaaaaaa
+aaaaaaaa
+aaaaaaaa
+aaaaaaaa
+aaaaaaaa
+aaaaaaaa
+aaaaaaaa
+aaaaaaaa
+aaaaaaaa
+aaaaaaaa
+aaaaaaaa
+aaaaaaaa
+aaaaaaaa
+aaaaaaaa
+aaaaaaaa
+aaaaaaaa
+aaaaaaaa
+ababbaba
+aaaaa*aa
+..bbbbbb
+g.bbbbbb
+..bbbbbb
+....bbbb
+....bbbb
+....bbbb
+....bbbb
+....bbbb
+....bbbb
+....bbbb
+....bbbb
+.*..bbbb
+....bbbb
+....bbbb
+........
+.....*..
+........
+........
+.....*..
+........
+........
+........
+........
+.b.Bb.b.
+........
+........
+........
+........
+.b.b..b.
+BBbbbbbb
+BBbbbbbb
+BBbbbbbb
+BBbbbbbb
+BBBBbbbb
+BBBBbbbb
+BBBBbbbb
+BBBBbbbb
+BBBBbbbb
+BBBBbbbb
+BBBBbbbb
+BBBBbbbb
+BBBBbbbb
+BBBBBBBB
+BBBBBBBB
+BBBBBBBB
+BBBBBBBB
+BBBBBBBB
+BBBBBBBB
+BBBBBBBB
+BBBBBBBB
+BBBBBBBB
+bbbbbbbb
+********
+
+ |
+
+
Modified: trunk/doc/upload_website
==============================================================================
--- trunk/doc/upload_website (original)
+++ trunk/doc/upload_website Mon Jun 15 10:54:13 2009
@@ -7,20 +7,22 @@
# Load the password from a non-public file ;)
. ~/padb-website-password.txt
-FILES="index.html usage.html download.html email.html extensions.html"
+FILES="index usage download email extensions modes"
for FILE in $FILES
do
echo Uploading $FILE
TFILE=`mktemp`
cat header.html > $TFILE
- cat $FILE >> $TFILE
+ cat $FILE.html >> $TFILE
cat ga.html >> $TFILE
- ftp-upload --host padb.pittman.org.uk -u padb at pittman.co.uk --password
$PASSWORD --as $FILE $TFILE
+ ftp-upload --host padb.pittman.org.uk -u padb at pittman.co.uk --password
$PASSWORD --as $FILE.html $TFILE
+ ftp-upload --host padb.pittman.org.uk -u padb at pittman.co.uk --password
$PASSWORD --as $FILE/index.html $TFILE
rm $TFILE
done
-ftp-upload --host padb.pittman.org.uk -u padb at pittman.co.uk --password
$PASSWORD OpenMPI-padb-groups.patch
+ftp-upload --host padb.pittman.org.uk -u padb at pittman.co.uk --password
$PASSWORD OpenMPI-padb-groups.patch
+ftp-upload --host padb.pittman.org.uk -u padb at pittman.co.uk --password
$PASSWORD --as download/OpenMPI-padb-groups.patch OpenMPI-padb-groups.patch
echo All done.
exit 0
Modified: trunk/doc/usage.html
==============================================================================
--- trunk/doc/usage.html (original)
+++ trunk/doc/usage.html Mon Jun 15 10:54:13 2009
@@ -6,8 +6,8 @@
one resource manager installed or padb can't detect
the correct one use the rmgr configuration
option.
-If no resource manager is found use -O rmgr=local and process
-identifiers will be used instead of job ids.
+
If no resource manager is found you can use -O rmgr=local and
process
+identifiers (pids) will be used instead of job ids.
The --list-rmgrs option can be used to show a list of detected
resource managers and their active jobs.
@@ -22,12 +22,12 @@
Showing list of current jobs
To show a list of currently running jobs for a given user use the
---show-jobs flag.
+--show-jobs option. Alternatively the --list-rmgrs option
+shows a list of detected resource managers along with a list of currently
active jobs for each of them.
To target all jobs
To target all jobs currently running for a given user use the --all
(-a) flag.
-
To target any jobs
To target "any" job currently running for a given user use the
--any (-A) flag.
This differs from targeting all jobs as it will exit with an error if more
@@ -44,18 +44,12 @@
collect. This option is typically used when creating bug reports to
send to third parties or to inspect a job for anomalies.
-Example
-To show a stack trace for process ID 10
-use padb -Ormgr=local --stack-trace 10
-
-
Selecting ranks (Processes)
In modes where data for each process is reported separately it is possible
to restrict which ranks are queried, this is done via the --rank
option.
-Multiple ranks can be selected by specifying --rank multiple times
-or passing a comma separated list.
+Multiple ranks can be selected by specifying --rank multiple times.
@@ -72,27 +66,9 @@
iterating over the more common padb options. For large jobs this can
generate a lot of output so redirecting to a file is recommended.
-Stack Trace
-The --stack-trace (-x) option will cause a stack trace to be
given for
-each selected rank. The stack trace generated by this call is moderately
-terse and gives one function call per line. This option is best combined
-with --tree (-t) to condense stack traces into a tree based
format.
-
-Stack Trace (Longer)
-Fuller stack traces can be obtained by setting the configuration options
-stack-shows-params=1 and stack-shows-locals=1. This can help to narrow
-down a problem however does not work well with tree based stack traces
-so is recommended to be used with the --rank flag.
-
-Message Queues
-For MPI programs the MPI Message Queues can be shown on MPI stacks
-which support it, use the --message-queue (-q) option to
show the
-message queues.
-
-Process info
-To view information about individual processes in a job use the
---proc-info option, best combined with
--proc=format=<format>,
-for example --proc-format=vp,pid,state,pcpu,vmsize,vmrss
+
+
+A list of avaliable modes and their descriptions can be found on the modes page.
@@ -109,12 +85,14 @@
environment variables should be specified in upper case.
- Config file
Config is also loaded from /etc/padb.conf and ~/.padbrc.
Contact the
-mailing lists for help with these files.
+mailing lists for help with these files.
A list of configuration options along with their current values can be
obtained by specifying a key of help with or without a value, i.e. padb
-Ohelp
Example
+
+
ashley at alpha:~$ padb --show-jobs
Error, multiple resource managers detected, use -Ormgr=
@@ -126,3 +104,5 @@
59315
ashley at alpha:~$
+ |
+
From codesite-noreply at google.com Mon Jun 15 21:46:00 2009
From: codesite-noreply at google.com (codesite-noreply at google.com)
Date: Mon, 15 Jun 2009 20:46:00 +0000
Subject: [padb-devel] [padb commit] r50 - Add documentation of the kill mode
and make sure
Message-ID: <001636163cc74dce95046c692513@google.com>
Author: apittman
Date: Mon Jun 15 13:38:38 2009
New Revision: 50
Modified:
trunk/doc/modes.html
trunk/doc/upload_website
Log:
Add documentation of the kill mode and make sure
we upload the ompi patch to the correct directory.
Modified: trunk/doc/modes.html
==============================================================================
--- trunk/doc/modes.html (original)
+++ trunk/doc/modes.html Mon Jun 15 13:38:38 2009
@@ -27,7 +27,7 @@
Padb can be told to loop, performing the same query over and over
again. This is enabled by the --watch flag
-and further controlled by --config-option=interval=<seconds>
and --config-option=watch-clears-screen=<bool>
+and further controlled by --config-option interval=<seconds>
and --config-option watch-clears-screen=<bool>
options, the default values for these options are 10 and 1 respectively.
@@ -178,9 +178,14 @@
MPI Specific modes
-MPI message queues
-The option --message-queue will tell padb to read the MPI
message
+MPI message queues
+
+The option --mpi-queue will tell padb to read the MPI message
queues from your application if possible. Here shown with the
--compress option.
+
+
+The option --message-queue shows the tport queues on QsNet
systems. On non-QsNet systems it automatically falls back
+to --mpi-queue.
+
+Signal delivery
+To deliver signals to processes in a job use the --kill mode
together with the optional
+--signal=<name>> option. No output is produced by this mode.
Quadrics specific modes
The --set-debug, --group and --statistics modes are
QsNet specific. The --deadlock mode
Modified: trunk/doc/upload_website
==============================================================================
--- trunk/doc/upload_website (original)
+++ trunk/doc/upload_website Mon Jun 15 13:38:38 2009
@@ -22,7 +22,7 @@
done
ftp-upload --host padb.pittman.org.uk -u padb at pittman.co.uk --password
$PASSWORD OpenMPI-padb-groups.patch
-ftp-upload --host padb.pittman.org.uk -u padb at pittman.co.uk --password
$PASSWORD --as download/OpenMPI-padb-groups.patch OpenMPI-padb-groups.patch
+ftp-upload --host padb.pittman.org.uk -u padb at pittman.co.uk --password
$PASSWORD --as extensions/OpenMPI-padb-groups.patch
OpenMPI-padb-groups.patch
echo All done.
exit 0
From codesite-noreply at google.com Tue Jun 16 11:28:42 2009
From: codesite-noreply at google.com (codesite-noreply at google.com)
Date: Tue, 16 Jun 2009 10:28:42 +0000
Subject: [padb-devel] [padb commit] r51 - Fix a corner case where the
stack-trace "tree" mode
Message-ID: <0016e64135de820971046c74a331@google.com>
Author: apittman
Date: Tue Jun 16 03:12:06 2009
New Revision: 51
Modified:
trunk/src/padb
Log:
Fix a corner case where the stack-trace "tree" mode
was dropping frames. If one process in the job
had a short stack further processes in the job
erronously got reported as short too.
Modified: trunk/src/padb
==============================================================================
--- trunk/src/padb (original)
+++ trunk/src/padb Tue Jun 16 03:12:06 2009
@@ -1784,7 +1784,7 @@
# hash containing the "txt" of the function name and a further array
# of hash references called "children".
sub go_p {
- my ( $level, $indent, $indentNo, $lines, @tags ) = @_;
+ my ( $level, $lines, @tags ) = @_;
my @peers;
my $prev;
@@ -1795,7 +1795,6 @@
return if ( !defined($tag) );
return if ( !defined( $lines->{$tag} ) );
- return if ( !defined( $lines->{$tag}[$level] ) );
my @identical = ();
my @different = ();
@@ -1817,7 +1816,12 @@
}
}
} else {
- @different = @tags;
+ foreach my $dtag (@tags) {
+ if ( $dtag != $tag ) {
+ push( @different, $dtag );
+ }
+ }
+
}
# Move $endlevel on as far as we can...
@@ -1846,10 +1850,10 @@
$endlevel = ( $#{ $lines->{$tag} } );
}
- printf( "level $level, endlevel $endlevel, identical $#identical\n", )
- if $conf{"tree-verbose"};
+ printf(
+"level $level, endlevel $endlevel, identical:@identical
different:@different\n",
+ ) if $conf{"tree-verbose"};
- my $i = $indent;
for ( my $l = $level ; $l <= $endlevel ; $l++ ) {
my %this;
@@ -1873,8 +1877,7 @@
unshift @identical, $tag;
}
- $prev->{children} =
- go_p( $endlevel + 1, "$i", $indentNo + 1, $lines, @identical );
+ $prev->{children} = go_p( $endlevel + 1, $lines, @identical );
}
printf(
@@ -1882,11 +1885,12 @@
) if $conf{"tree-verbose"};
if (@different) {
- my $new = go_p( $level, "$indent", $indentNo, $lines, @different );
+ my $new = go_p( $level, $lines, @different );
foreach my $n ( @{$new} ) {
push @peers, $n;
}
}
+
return \@peers;
}
@@ -2886,7 +2890,7 @@
}
if ($tree) {
- print show_tree go_p( 0, "", 0, $lines,
+ print show_tree go_p( 0, $lines,
( sort { $a <=> $b } ( keys %$lines ) ) );
} elsif ($compress) {
foreach my $tag ( sort { $a <=> $b } ( keys %$lines ) ) {
From codesite-noreply at google.com Tue Jun 16 13:18:36 2009
From: codesite-noreply at google.com (codesite-noreply at google.com)
Date: Tue, 16 Jun 2009 12:18:36 +0000
Subject: [padb-devel] [padb commit] r52 - Add the --proc-summary mode to the
changelog.
Message-ID: <0016368e1c2f94a9f2046c762cfb@google.com>
Author: apittman
Date: Tue Jun 16 03:13:44 2009
New Revision: 52
Modified:
trunk/src/padb
Log:
Add the --proc-summary mode to the changelog.
Modified: trunk/src/padb
==============================================================================
--- trunk/src/padb (original)
+++ trunk/src/padb Tue Jun 16 03:13:44 2009
@@ -36,6 +36,8 @@
# * inner_main() now uses callbacks for resource manager support.
# * --signal now takes names rather than numbers.
# * Check job is valid when using the --full-report option.
+# * Add a --proc-summary option to replace --proc-info --proc-format
+# This gives a very efficient "job top" program.
# Version 2.2
# * Add a --core-stack option along with --core and --exe to extract stack
From codesite-noreply at google.com Tue Jun 16 13:46:14 2009
From: codesite-noreply at google.com (codesite-noreply at google.com)
Date: Tue, 16 Jun 2009 12:46:14 +0000
Subject: [padb-devel] [padb commit] r53 - Quick hack to make --deadlock work
with input files.
Message-ID: <0016e645ab305efcc6046c768f97@google.com>
Author: apittman
Date: Tue Jun 16 05:45:35 2009
New Revision: 53
Modified:
trunk/src/padb
Log:
Quick hack to make --deadlock work with input files.
The way modes and output formatting works is due to
change soon so just make it work for now and tidy it up
later.
Modified: trunk/src/padb
==============================================================================
--- trunk/src/padb (original)
+++ trunk/src/padb Tue Jun 16 05:45:35 2009
@@ -3554,8 +3554,12 @@
$countoutput = 1
if ( ( defined $mode and $mode eq "stack" ) or $conf{"verbose"} );
- if ($input_file) {
- go_file( $input_file, "input" );
+ if ( defined($input_file) ) {
+ my $m = "input";
+ if ( defined $mode ) {
+ $m = $mode;
+ }
+ go_file( $input_file, $m );
exit(0);
}
@@ -4417,7 +4421,17 @@
sub deadlock_detect {
my ( $handle, $lines ) = @_;
- my $data = $lines->{raw};
+ my $data;
+
+ # XXX This is a bit of a hack to make the deadlock
+ # code work with input files, the whole thing is due
+ # a tidy-up on the full-duplex branch where this should
+ # be solved properly.
+ if ( defined $lines->{raw} ) {
+ $data = $lines->{raw};
+ } else {
+ $data = $lines->{lines};
+ }
# print Dumper $data;
my %coll_data;
From codesite-noreply at google.com Tue Jun 16 16:20:39 2009
From: codesite-noreply at google.com (codesite-noreply at google.com)
Date: Tue, 16 Jun 2009 15:20:39 +0000
Subject: [padb-devel] [padb commit] r55 - Create a full-duplex branch
Message-ID: <0016e645ab30999976046c78b765@google.com>
Author: apittman
Date: Tue Jun 16 07:56:43 2009
New Revision: 55
Added:
branches/full-duplex/
- copied from r54, /trunk/
Log:
Create a full-duplex branch
From codesite-noreply at google.com Tue Jun 16 16:57:13 2009
From: codesite-noreply at google.com (codesite-noreply at google.com)
Date: Tue, 16 Jun 2009 15:57:13 +0000
Subject: [padb-devel] [padb commit] r54 - Add a branches directory for
storing branches on.
Message-ID: <0016e645400665de73046c793a9b@google.com>
Author: apittman
Date: Tue Jun 16 07:55:26 2009
New Revision: 54
Added:
branches/
Log:
Add a branches directory for storing branches on.
From codesite-noreply at google.com Wed Jun 17 12:01:38 2009
From: codesite-noreply at google.com (codesite-noreply at google.com)
Date: Wed, 17 Jun 2009 11:01:38 +0000
Subject: [padb-devel] [padb commit] r57 - Enable warnings for the Branch and
fix anything it throws up.
Message-ID: <0016364ecdfc26abb7046c8937cd@google.com>
Author: apittman
Date: Wed Jun 17 03:26:07 2009
New Revision: 57
Modified:
branches/full-duplex/src/padb
Log:
Enable warnings for the Branch and fix anything it throws up.
Modified: branches/full-duplex/src/padb
==============================================================================
--- branches/full-duplex/src/padb (original)
+++ branches/full-duplex/src/padb Wed Jun 17 03:26:07 2009
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/perl -w
# padb. a simple parallel debugging aid from Quadrics.
@@ -2862,7 +2862,6 @@
}
foreach my $hash (@all) {
my @res;
- my @res;
foreach my $key (@proc_format_array) {
my $value = "??";
if ( defined $hash->{$key} ) {
@@ -5083,7 +5082,7 @@
chomp $l;
my @stats = split( / /, $l );
for ( my $i = 0 ; $i <= $#stats ; $i++ ) {
- output( $vp, "$prefix.$stat_names[$i]: @stats[$i]" );
+ output( $vp, "$prefix.$stat_names[$i]: $stats[$i]" );
}
}
@@ -6105,7 +6104,7 @@
$res->{request} = $cmd;
}
- if ( $cmd->{quit} == 1 ) {
+ if ( defined( $cmd->{quit} ) and ( $cmd->{quit} == 1 ) ) {
exit(0);
}
@@ -6188,8 +6187,7 @@
$netdata->{server} = $server;
$netdata->{key} = $key;
- my $sel = $netdata->{sel};
- my $server = $netdata->{server};
+ my $sel = $netdata->{sel};
while ( my @data = $sel->can_read() ) {
foreach my $s (@data) {
From codesite-noreply at google.com Wed Jun 17 12:21:52 2009
From: codesite-noreply at google.com (codesite-noreply at google.com)
Date: Wed, 17 Jun 2009 11:21:52 +0000
Subject: [padb-devel] [padb commit] r56 - First code to implement
bi-directional communication
Message-ID: <0016e644d5aa89d28b046c897f0a@google.com>
Author: apittman
Date: Wed Jun 17 02:47:07 2009
New Revision: 56
Modified:
branches/full-duplex/src/padb
Log:
First code to implement bi-directional communication
between the outer and the inner processes, currently
it only works for one node and it only works for
--mpi-watch.
Modified: branches/full-duplex/src/padb
==============================================================================
--- branches/full-duplex/src/padb (original)
+++ branches/full-duplex/src/padb Wed Jun 17 02:47:07 2009
@@ -205,6 +205,8 @@
use File::Temp qw(tempfile);
use MIME::Base64;
use Config;
+use IO::Socket;
+use IO::Select;
###############################################################################
#
@@ -377,6 +379,7 @@
$conf{"local-fd-name"} = "/dev/null";
$conf{"stack-strip-above"} =
"elan_waitWord,elan_pollWord,elan_deviceCheck,opal_condition_wait";
+$conf{"full-duplex"} = 0;
# $conf{stack-format} = undef;
@@ -2779,8 +2782,8 @@
# print Dumper $lines;
my $s = "";
- foreach my $l ( sort { $a <=> $b } ( keys %{ $lines->{raw} } ) ) {
- $s .= $lines->{raw}{$l}{state};
+ foreach my $l ( sort { $a <=> $b } ( keys %{ $lines->{from_vpid} } ) )
{
+ $s .= $lines->{from_vpid}{$l}{state};
}
print("$s\n");
}
@@ -3016,6 +3019,228 @@
}
}
+sub connect_to_child {
+ my ( $host, $port, $word ) = @_;
+
+ my $socket = IO::Socket::INET->new(
+ PeerAddr => $host,
+ PeerPort => $port,
+ Proto => 'tcp',
+ ) or die("Failed to connect to child");
+
+ print $socket "hello $word\n";
+
+ #printf("Connecting to $host $port $word\n");
+ return $socket;
+}
+
+sub my_encode {
+ return encode_base64( nfreeze(shift), "" );
+}
+
+sub my_decode {
+ return thaw( decode_base64(shift) );
+}
+
+# We have read data on a socket, process it and call
+# any callback.
+sub extract_line {
+ my ( $handle, $sd ) = @_;
+
+ my $str = $sd->{str};
+
+ # Do this to allow telnet sessions to work.
+ $str =~ s/\r//g;
+
+ # printf("Testing $str\n");
+
+ if ( $str =~ /^([^\n]+)\n(.*)$/ ) {
+
+ # printf("Calling callback for \"$1\"\n");
+ $sd->{line_cb}( $handle, $sd, $1 );
+ $sd->{str} = $2;
+ } else {
+ printf("No match\n");
+ }
+
+ return;
+
+}
+
+sub issue_command_to_inner {
+ my ( $cdata, $cmd ) = @_;
+ my $str = my_encode($cmd);
+ $cdata->{socket}->print("$str\n");
+}
+
+sub command_from_inner {
+ my ( $handle, $cdata, $line ) = @_;
+
+ if ( $line eq "Welcome" ) {
+
+ # printf("Sending data to all childen\n");
+ # Tell all hosts to go.
+
+ my $req;
+ $req->{mode} = $handle->{mode};
+ $req->{cmd} = $handle->{cmd};
+ $req->{jobid} = $handle->{jobid};
+ $req->{cinner} = \%cinner;
+
+ # print Dumper $req;
+ issue_command_to_inner( $cdata, $req );
+
+ return;
+ }
+
+ # A reply from inner.
+ my $d = my_decode($line);
+
+ $allfns{ $handle->{mode} }{out_handler}( undef, $d );
+
+ my $req;
+ $req->{quit} = 1;
+ $handle->{shutdown_in_progress} = 1;
+ issue_command_to_inner( $cdata, $req );
+
+ print Dumper $d if ( $conf{"full-duplex"} eq "debug" );
+
+}
+
+sub go_parallel {
+ my $jobid = shift;
+ my $cmd = shift;
+ my $ncpus = shift;
+ my $raw = shift;
+ my $stats = shift;
+ my $mode = shift;
+ my $h = shift;
+
+ my $errors = 0;
+
+ my $report_errors = 1;
+
+ my $pcmd = {
+ pid => -1,
+ in => "",
+ out => *OUT,
+ err => *ERR,
+ };
+
+ $pcmd->{pid} = open3( $pcmd->{in}, *OUT, *ERR, $cmd )
+ or die "Unable to open3() pcmd: $!\n";
+
+ close $pcmd->{in};
+
+ my $comm_data;
+
+ $comm_data->{mode} = $mode;
+ $comm_data->{cmd} = $cmd;
+ $comm_data->{jobid} = $jobid;
+ $comm_data->{shutdown_in_progress} = 0;
+
+ my $sel = IO::Select->new();
+ $sel->add( $pcmd->{out} );
+ $sel->add( $pcmd->{err} );
+
+ while ( my @live = $sel->can_read() ) {
+ foreach my $h (@live) {
+ if ( $h eq $pcmd->{out} ) {
+ my $line = $h->getline();
+ if ( not defined $line ) {
+ if ( not $comm_data->{shutdown_in_progress} ) {
+ printf("Warning, EOF from ofd\n");
+ }
+ $sel->remove($h);
+ $h->close();
+ next;
+ }
+ my @words = split( " ", $line );
+ if ( $#words == 3 and $words[0] eq "connect" ) {
+
+ my $socket =
+ connect_to_child( $words[1], $words[2], $words[3] );
+ my $cdata;
+ $cdata->{active} = 1;
+ $cdata->{str} = "";
+ $cdata->{socket} = $socket;
+ $cdata->{line_cb} = \&command_from_inner;
+ $comm_data->{sockets}{$socket} = $cdata;
+ $sel->add($socket);
+ next;
+ }
+ if ( $words[0] eq "debug" ) {
+ my $count = $sel->count();
+ print("There are $count sockets\n");
+ next;
+ }
+ print("inner: $line");
+ } elsif ( $h eq $pcmd->{err} ) {
+ my $line = $h->getline();
+
+ if ( not defined $line ) {
+ if ( not $comm_data->{shutdown_in_progress} ) {
+ printf("Warning, EOF from efd\n");
+ }
+ $sel->remove($h);
+ $h->close();
+ next;
+ }
+ printf("einner:$line");
+ } elsif ( defined $comm_data->{sockets}{$h} ) {
+ my $cdata = $comm_data->{sockets}{$h};
+
+ my $data;
+ my $nb = sysread( $h, $data, 1024 );
+
+ #printf("read $data ($nb) from fd\n");
+
+ if ( not defined $data or $nb == 0 ) {
+ if ( not $comm_data->{shutdown_in_progress} ) {
+ printf("EOF from child socket ($nb)\n");
+ }
+ $sel->remove($h);
+ $h->close();
+ next;
+ }
+
+ $cdata->{str} .= $data;
+ extract_line( $comm_data, $cdata );
+
+ } else {
+ printf("Responce from unknown fd $h\n");
+ exit(1);
+ }
+ }
+ my $count = $sel->count();
+ if ( $count == 1 ) {
+ printf("All sockets closed?\n");
+ }
+ }
+
+ waitpid( $pcmd->{pid}, 0 );
+ my $res = $?;
+
+ printf("result from parallel command was $res\n")
+ if ( $conf{"verbose"} );
+
+ if ( $res != 0 ) {
+ my %status = rc_status($res);
+ if ( job_is_running($jobid) ) {
+ if ($report_errors) {
+ printf("Failed to run parallel command (rc =
$status{rc})\n");
+ }
+ } else {
+ printf("Job $jobid is no longer active\n");
+ return 1;
+ }
+ }
+
+ cleanup_pcmd();
+
+ exit(0);
+}
+
sub go_job_once {
my $jobid = shift;
my $cmd = shift;
@@ -3226,7 +3451,13 @@
sleep( $conf{"interval"} );
}
}
- my $errors = go_job_once( $jobid, $cmd, $ncpus, $raw, $stats, $mode,
$h );
+ my $errors;
+ if ( $conf{"full-duplex"} ) {
+ $errors = go_parallel( $jobid, "$cmd --full-duplex",
+ $ncpus, $raw, $stats, $mode, $h );
+ } else {
+ $errors = go_job_once( $jobid, $cmd, $ncpus, $raw, $stats, $mode,
$h );
+ }
cleanup_pcmd();
return $errors;
}
@@ -5505,6 +5736,17 @@
return \%res;
}
+sub mpi_watch_all {
+ my ($list) = @_;
+ my %res;
+ foreach my $proc ( @{$list} ) {
+ my $vp = $proc->{vp};
+ my $pid = $proc->{pid};
+ $res{$vp} = mpi_watch( $vp, $pid );
+ }
+ return \%res;
+}
+
sub show_pid {
my ( $vp, $pid ) = @_;
@@ -5854,6 +6096,154 @@
);
}
+sub command_from_parent {
+ my $cmd = shift;
+ my $res;
+
+ # This is only for debugging.
+ if ( $confInner{verbose} ) {
+ $res->{request} = $cmd;
+ }
+
+ if ( $cmd->{quit} == 1 ) {
+ exit(0);
+ }
+
+ # Setup the environment.
+ foreach my $key ( keys( %{ $cmd->{cinner} } ) ) {
+ $confInner{$key} = $cmd->{cinner}{$key};
+ }
+
+ $confInner{mode} = $cmd->{mode};
+
+ # Find the pids and register them all.
+ $rmgr{ $confInner{rmgr} }{find_pids}( $cmd->{jobid} );
+
+ # Now do the work.
+ $res->{from_vpid} =
+ $allfns{ $cmd->{mode} }{handler_all}( $confInner{"all-pids"} );
+
+ return $res;
+}
+
+sub command_from_outer {
+ my ( $netdata, $cdata, $line ) = @_;
+
+ my $s = $cdata->{socket};
+ if ( not $cdata->{trusted} ) {
+ if ( $line eq "hello $netdata->{key}" ) {
+
+ #printf("Trusting connection from $cdata->{desc}\n");
+ $cdata->{trusted} = 1;
+ $cdata->{str} = "";
+ $s->printf("Welcome\n");
+ $netdata->{parent} = $cdata;
+ } elsif ( $line eq "debug" ) {
+ my $r = Dumper($netdata);
+ $s->printf($r);
+ $s->flush();
+ $netdata->{sel}->remove($s);
+ $s->close();
+ $cdata->{dead} = 1;
+ print("debug\n");
+ } else {
+
+ #printf("Closing connection from $cdata->{desc} (Bad
signon)\n");
+ $netdata->{sel}->remove($s);
+ $s->close();
+ $cdata->{dead} = 1;
+ }
+ return;
+ }
+
+ $cdata->{last_cmd} = my_decode($line);
+ if ( $netdata->{parent} eq $cdata ) {
+ my $res = command_from_parent( my_decode($line) );
+ my $reply = my_encode($res);
+ $cdata->{socket}->printf("$reply\n");
+ }
+}
+
+sub connect_to_outer {
+
+ my $server = IO::Socket::INET->new(
+
+ Reuse => 1,
+ Proto => 'tcp',
+ LocalPort => 37132,
+ Listen => 2,
+ ) or die("not the best start");
+
+ my $lport = $server->sockport();
+ my $hostname = hostname();
+ my $key = "boris";
+ my $signon_text = "connect $hostname $lport $key\n";
+
+ # For now just print the signon code to stdout and let the outer pick
it up.
+ print($signon_text);
+
+ my $netdata;
+ $netdata->{sel} = IO::Select->new();
+ $netdata->{sel}->add($server);
+ $netdata->{server} = $server;
+ $netdata->{key} = $key;
+
+ my $sel = $netdata->{sel};
+ my $server = $netdata->{server};
+
+ while ( my @data = $sel->can_read() ) {
+ foreach my $s (@data) {
+ if ( $s == $server ) {
+ my $new = $server->accept() or die("Failed accept");
+ $sel->add($new);
+ my $peer = getpeername($new);
+ my ( $port, $addr ) = unpack_sockaddr_in($peer);
+ my $ip = inet_ntoa($addr);
+ my $hostname = gethostbyaddr( $addr, AF_INET );
+
+ # printf "New connection from $hostname ($ip) $port\n";
+ my %sinfo;
+ $sinfo{hostname} = $hostname;
+ $sinfo{trusted} = 0;
+ $sinfo{port} = $port;
+ $sinfo{desc} = "$hostname:$port";
+ $sinfo{socket} = $new;
+ $sinfo{line_cb} = \&command_from_outer;
+ $netdata->{connections}{$new} = \%sinfo;
+
+ # $new->printf("Hello from padb\n");
+ #$new->autoflush();
+ next;
+ }
+
+ my $sinfo = $netdata->{connections}{$s};
+ my $d;
+ sysread( $s, $d, 1024 );
+
+ # Dead connection.
+ if ( not defined $d ) {
+ printf("null read from $sinfo->{desc}\n");
+ if ( eof($s) ) {
+ $sel->remove($s);
+ $s->close();
+ $sinfo->{trusted} = 0;
+ $sinfo->{dead} = 1;
+ my $count = $sel->count();
+ printf("EOF from $sinfo->{desc} $count sockets
left\n");
+ }
+ next;
+ }
+
+ $sinfo->{str} .= $d;
+ extract_line( $netdata, $sinfo );
+
+ }
+ }
+ my $count = $sel->count();
+ printf("Thats not supposed to happen count=($count)\n");
+
+}
+
sub inner_main {
$confInner{"slurm-job-step"} = "0";
@@ -5872,6 +6262,7 @@
my @config_options;
my $line_formatted;
my $jobid;
+ my $full_duplex;
my %optionhash = (
"config-option|O=s" => \@config_options,
@@ -5879,7 +6270,8 @@
"line-formatted" => \$line_formatted,
"rank=i" => \@ranks,
"stats-full" => \$stats,
- "verbose|v+" => \$confInner{"verbose"}
+ "verbose|v+" => \$confInner{"verbose"},
+ "full-duplex" => \$full_duplex,
);
my %config_hash;
@@ -5902,6 +6294,12 @@
GetOptions(%optionhash) or die("could not parse options\n");
+ # If this works then nothing below here is needed as all
+ # requests can be sent over the socket.
+ if ($full_duplex) {
+ connect_to_outer();
+ }
+
my $mode;
foreach my $arg ( keys %config_hash ) {
@@ -6113,7 +6511,7 @@
};
$allfns{mpi_watch} = {
- 'handler' => \&mpi_watch,
+ 'handler_all' => \&mpi_watch_all,
'arg_long' => 'mpi-watch',
'help' => "Trace MPI programs",
'pre_out_handler' => \&pre_mpi_watch,
From ashley at pittman.co.uk Thu Jun 18 09:35:34 2009
From: ashley at pittman.co.uk (Ashley Pittman)
Date: Thu, 18 Jun 2009 09:35:34 +0100
Subject: [padb-devel] 2.5 Release candidate available
Message-ID: <1245314134.4226.2.camel@localhost.localdomain>
All,
A 2.5-rc1 candidate is available for download. Assuming no problems are
reported with this I'll make it into an official release Monday or
Tuesday next week.
http://padb.googlecode.com/files/padb-2.5-rc1.tgz
Yours,
Ashley Pittman.
--
Ashley Pittman, Bath, UK.
Padb - A parallel job inspection tool for cluster computing
http://padb.pittman.org.uk
From codesite-noreply at google.com Thu Jun 18 09:37:09 2009
From: codesite-noreply at google.com (codesite-noreply at google.com)
Date: Thu, 18 Jun 2009 08:37:09 +0000
Subject: [padb-devel] [padb commit] r58 - 2.5 release candidate,
change the version number
Message-ID: <0016e644d0a6443278046c9b5014@google.com>
Author: apittman
Date: Thu Jun 18 01:29:09 2009
New Revision: 58
Modified:
trunk/doc/download.html
trunk/doc/index.html
trunk/src/padb
Log:
2.5 release candidate, change the version number
and update the website to reflect this.
Modified: trunk/doc/download.html
==============================================================================
--- trunk/doc/download.html (original)
+++ trunk/doc/download.html Thu Jun 18 01:29:09 2009
@@ -1,15 +1,24 @@
Source code download
-Beta version
+Stable release
+
+Release Candidate
+A 2.5-rc1 release candidate it avaliable for
+download.
+All being well this will lead to a stable release on or around the
+23rd June. I would encourage all users to download this release for
testing.
-The current Beta release (r31) is 2.5-beta and available for download
-here or from
the google
+
+
+Downloads can also me made direct from google
+here or from the google
downloads section.
-
Stable release
+
-At present there are no "stable" releases made since the Quadrics 2.2
-version, a release is scheduled for early June 2009.
+This will be the first "stable" release since the Quarics 2.2 version.
Quadrics release
Modified: trunk/doc/index.html
==============================================================================
--- trunk/doc/index.html (original)
+++ trunk/doc/index.html Thu Jun 18 01:29:09 2009
@@ -10,7 +10,14 @@
for use by programmers and system administrators alike.
-Padb is currently maintained outside of Quadrics by Ashley Pittman
+Padb is currently maintained outside of Quadrics by
+Ashley Pittman,
+I will be attending ISC
+later on this month.
+
+
News
+28-06-09 A 2.5-rc1 release candidate is avaliable to download from the
+ downloads page.
Features
Modified: trunk/src/padb
==============================================================================
--- trunk/src/padb (original)
+++ trunk/src/padb Thu Jun 18 01:29:09 2009
@@ -24,7 +24,7 @@
# Revision history
#
-# Version 2.5 (Beta)
+# Version 2.5
# * First Non-Quadrics version
# * Various stability/bug fixes.
# * Deadlock detect at the MPI Layer rather than the Elan layer
@@ -237,7 +237,7 @@
# Main.
my $prog = basename $0;
-my $version = "2.5-beta";
+my $version = "2.5";
my %conf;
From codesite-noreply at google.com Fri Jun 19 15:40:05 2009
From: codesite-noreply at google.com (codesite-noreply at google.com)
Date: Fri, 19 Jun 2009 14:40:05 +0000
Subject: [padb-devel] [padb commit] r59 - The tree code is now present and
working in at least one case, startup,
Message-ID: <001485f5473414fb21046cb480e7@google.com>
Author: apittman
Date: Fri Jun 19 07:39:30 2009
New Revision: 59
Modified:
branches/full-duplex/src/padb
Log:
The tree code is now present and working in at least one case, startup,
connecting, signon, reply, command processing and clean shutdown now
work correctly across multiple hosts.
Modified: branches/full-duplex/src/padb
==============================================================================
--- branches/full-duplex/src/padb (original)
+++ branches/full-duplex/src/padb Fri Jun 19 07:39:30 2009
@@ -626,7 +626,7 @@
}
# Put the args in a hash so that they can be referenced by name.
- if ( defined $allfns{$mode}{secondary} ) {
+ if ( defined $mode and defined $allfns{$mode}{secondary} ) {
foreach my $sec ( @{ $allfns{$mode}{secondary} } ) {
$secondary_args{ $sec->{arg_long} } = $sec->{value};
}
@@ -2321,9 +2321,11 @@
$mpd_dfile = $fn;
- my $cmd = "mpirun -machinefile $fn -np $i";
+ my $cmd = "mpdrun -machinefile $fn -np $i";
+
+ my $hosts = $#hosts + 1;
- return ( $cmd, undef );
+ return ( $cmd, undef, $hosts );
}
sub mpd_cleanup_pcmd {
@@ -2413,9 +2415,9 @@
}
- if ( $conf{"verbose"} ) {
- print Dumper \%open_jobs;
- }
+ #if ( $conf{"verbose"} ) {
+ # print Dumper \%open_jobs;
+ #}
}
sub open_get_jobs {
@@ -2457,9 +2459,10 @@
$open_dfile = $fn;
my $prefix = find_ompi_prefix();
- my $cmd = "mpirun -machinefile $fn -np $i $prefix";
+ my $cmd = "orterun -machinefile $fn -np $i $prefix";
+ my $hosts = $#hosts + 1;
- return ( $cmd, undef );
+ return ( $cmd, undef, $hosts );
}
sub open_cleanup_pcmd {
@@ -2782,8 +2785,9 @@
# print Dumper $lines;
my $s = "";
- foreach my $l ( sort { $a <=> $b } ( keys %{ $lines->{from_vpid} } ) )
{
- $s .= $lines->{from_vpid}{$l}{state};
+ foreach my $l ( sort { $a <=> $b } ( keys %{ $lines->{target_responce}
} ) )
+ {
+ $s .= $lines->{target_responce}{$l}{state};
}
print("$s\n");
}
@@ -3029,7 +3033,6 @@
print $socket "hello $word\n";
- #printf("Connecting to $host $port $word\n");
return $socket;
}
@@ -3059,13 +3062,58 @@
$sd->{line_cb}( $handle, $sd, $1 );
$sd->{str} = $2;
} else {
- printf("No match\n");
+
+ # Likely just truncated input, wait for more to arrive.
+ # printf("No match ()\n");
}
return;
}
+# For each remote process generate a tree, giving each
+# process a parent and a number of children.
+# Currently just make this a simple "ladder" but should
+# probably be a f-nomial tree.
+sub generate_comm_tree {
+ my ($a) = @_;
+ my @b = @{$a};
+ my $last = "root";
+ my %comm_tree;
+ foreach my $c (@b) {
+ $comm_tree{$c}{parent} = $last;
+ push( @{ $comm_tree{$last}{children} }, $c );
+ $last = $c;
+ }
+
+ return \%comm_tree;
+}
+
+# Called once when we have the socket details of the last child.
+sub connect_to_children {
+ my $comm_data = shift;
+ @{ $comm_data->{host_ids} } = sort( keys( %{ $comm_data->{remote} } )
);
+ $comm_data->{connection_tree} =
+ generate_comm_tree( $comm_data->{host_ids} );
+ my $td = $comm_data->{connection_tree}->{root}{children}[0];
+
+ #printf("I'm connecting to $td\n");
+ my $cdata;
+ $cdata->{socket} = connect_to_child(
+ $td,
+ $comm_data->{remote}{$td}{port},
+ $comm_data->{remote}{$td}{key}
+ );
+ $cdata->{active} = 1;
+ $cdata->{str} = "";
+ $cdata->{line_cb} = \&command_from_inner;
+
+ $comm_data->{sockets}{ $cdata->{socket} } = $cdata;
+ $comm_data->{sel}->add( $cdata->{socket} );
+
+ #print Dumper $comm_data;
+}
+
sub issue_command_to_inner {
my ( $cdata, $cmd ) = @_;
my $str = my_encode($cmd);
@@ -3076,33 +3124,46 @@
my ( $handle, $cdata, $line ) = @_;
if ( $line eq "Welcome" ) {
+ my $req;
+ $req->{mode} = "signon";
+ $req->{connection_tree} = $handle->{connection_tree};
+ $req->{remote} = $handle->{remote};
+ issue_command_to_inner( $cdata, $req );
+ return;
+ }
- # printf("Sending data to all childen\n");
- # Tell all hosts to go.
+ # A reply from inner.
+ my $d = my_decode($line);
+ if ( $handle->{state} eq "connecting" ) {
+ $handle->{state} = "live";
my $req;
$req->{mode} = $handle->{mode};
- $req->{cmd} = $handle->{cmd};
- $req->{jobid} = $handle->{jobid};
+ $req->{jobid} = $handle->{jobid};
$req->{cinner} = \%cinner;
-
- # print Dumper $req;
issue_command_to_inner( $cdata, $req );
-
return;
}
- # A reply from inner.
- my $d = my_decode($line);
+ if ( $handle->{state} eq "live" ) {
+ $handle->{state} = "shutdown";
+ my $req;
+ $req->{mode} = "exit";
+ issue_command_to_inner( $cdata, $req );
+
+
+ $allfns{ $handle->{mode} }{out_handler}( undef, $d );
+ return;
+ }
- $allfns{ $handle->{mode} }{out_handler}( undef, $d );
+ if ( $handle->{state} eq "shutdown" ) {
- my $req;
- $req->{quit} = 1;
- $handle->{shutdown_in_progress} = 1;
- issue_command_to_inner( $cdata, $req );
+ # Nothing to do here.
+ return;
+ }
- print Dumper $d if ( $conf{"full-duplex"} eq "debug" );
+ print("Hmm, unknown state! $handle->{state}\n");
+ return;
}
@@ -3114,6 +3175,7 @@
my $stats = shift;
my $mode = shift;
my $h = shift;
+ my $hosts = shift;
my $errors = 0;
@@ -3133,87 +3195,102 @@
my $comm_data;
- $comm_data->{mode} = $mode;
- $comm_data->{cmd} = $cmd;
- $comm_data->{jobid} = $jobid;
- $comm_data->{shutdown_in_progress} = 0;
+ $comm_data->{mode} = $mode;
+ $comm_data->{hosts} = $hosts;
+ $comm_data->{cmd} = $cmd;
+ $comm_data->{jobid} = $jobid;
+
+ # State, one of "connecting" "live" and "shutdown";
+ $comm_data->{state} = "connecting";
my $sel = IO::Select->new();
$sel->add( $pcmd->{out} );
$sel->add( $pcmd->{err} );
- while ( my @live = $sel->can_read() ) {
- foreach my $h (@live) {
- if ( $h eq $pcmd->{out} ) {
- my $line = $h->getline();
- if ( not defined $line ) {
- if ( not $comm_data->{shutdown_in_progress} ) {
- printf("Warning, EOF from ofd\n");
+ $comm_data->{sel} = $sel;
+ my $start = time();
+
+ while ( $sel->count() > 1 ) {
+ while ( my @live = $sel->can_read(5) ) {
+ foreach my $h (@live) {
+ if ( $h eq $pcmd->{out} ) {
+ my $line = $h->getline();
+ if ( not defined $line ) {
+ if ( not $comm_data->{state} eq "shutdown" ) {
+ printf("Warning, EOF from ofd\n");
+ }
+ $sel->remove($h);
+ $h->close();
+ next;
}
- $sel->remove($h);
- $h->close();
- next;
- }
- my @words = split( " ", $line );
- if ( $#words == 3 and $words[0] eq "connect" ) {
-
- my $socket =
- connect_to_child( $words[1], $words[2], $words[3] );
- my $cdata;
- $cdata->{active} = 1;
- $cdata->{str} = "";
- $cdata->{socket} = $socket;
- $cdata->{line_cb} = \&command_from_inner;
- $comm_data->{sockets}{$socket} = $cdata;
- $sel->add($socket);
- next;
- }
- if ( $words[0] eq "debug" ) {
- my $count = $sel->count();
- print("There are $count sockets\n");
- next;
- }
- print("inner: $line");
- } elsif ( $h eq $pcmd->{err} ) {
- my $line = $h->getline();
-
- if ( not defined $line ) {
- if ( not $comm_data->{shutdown_in_progress} ) {
- printf("Warning, EOF from efd\n");
+ my @words = split( " ", $line );
+ if ( $#words == 3 and $words[0] eq "connect" ) {
+
+ my $host = $words[1];
+
+ $comm_data->{remote}{$host}{port} = $words[2];
+ $comm_data->{remote}{$host}{key} = $words[3];
+ $comm_data->{signons}++;
+
+ if ( $comm_data->{signons} == $comm_data->{hosts}
) {
+ connect_to_children($comm_data);
+ }
+ next;
}
- $sel->remove($h);
- $h->close();
- next;
- }
- printf("einner:$line");
- } elsif ( defined $comm_data->{sockets}{$h} ) {
- my $cdata = $comm_data->{sockets}{$h};
-
- my $data;
- my $nb = sysread( $h, $data, 1024 );
-
- #printf("read $data ($nb) from fd\n");
-
- if ( not defined $data or $nb == 0 ) {
- if ( not $comm_data->{shutdown_in_progress} ) {
- printf("EOF from child socket ($nb)\n");
+ if ( $words[0] eq "debug" ) {
+ my $count = $sel->count();
+ print("There are $count sockets\n");
+ next;
+ }
+ print("inner: $line");
+ } elsif ( $h eq $pcmd->{err} ) {
+ my $line = $h->getline();
+
+ if ( not defined $line ) {
+ if ( not $comm_data->{state} eq "shutdown" ) {
+ printf("Warning, EOF from efd\n");
+ }
+ $sel->remove($h);
+ $h->close();
+ next;
+ }
+ printf("einner:$line");
+ } elsif ( defined $comm_data->{sockets}{$h} ) {
+ my $cdata = $comm_data->{sockets}{$h};
+
+ my $data;
+ my $nb = sysread( $h, $data, 1024 );
+
+ if ( not defined $data or $nb == 0 ) {
+ if ( not $comm_data->{state} eq "shutdown" ) {
+ printf("EOF from child socket ($nb)\n");
+ }
+ $sel->remove($h);
+ $h->close();
+ next;
}
- $sel->remove($h);
- $h->close();
- next;
- }
- $cdata->{str} .= $data;
- extract_line( $comm_data, $cdata );
+ $cdata->{str} .= $data;
+ extract_line( $comm_data, $cdata );
- } else {
- printf("Responce from unknown fd $h\n");
- exit(1);
+ } else {
+ printf("Responce from unknown fd $h\n");
+ exit(1);
+ }
+ }
+ my $count = $sel->count();
+ if ( $count == 1 ) {
+ printf("All sockets closed?\n");
}
}
+ my $t2 = time() - $start;
my $count = $sel->count();
- if ( $count == 1 ) {
- printf("All sockets closed?\n");
+ if ( $count > 0 ) {
+ printf("Still here, time:$t2 comm_count:$count\n");
+ if ( $comm_data->{signons} != $comm_data->{hosts} ) {
+ my $missing = $comm_data->{hosts} - $comm_data->{signons};
+ print("Waiting for signon from $missing hosts.\n");
+ }
}
}
@@ -3375,9 +3452,6 @@
my $key = job_to_key($jobid);
- my $cmd;
- my $ncpus;
-
my $stats;
foreach my $rank (@ranks) {
@@ -3394,10 +3468,14 @@
return 1 unless (@res);
- $cmd = $res[0];
- $ncpus = $res[1];
+ my $cmd = $res[0];
+
+ # These two are only defined by some resource managers.
+ my $ncpus = $res[1];
+ my $hosts = $res[2];
$conf{"verbose"} && defined $ncpus && print "Job has $ncpus cpus\n";
+ $conf{"verbose"} && defined $hosts && print "Job has $hosts hosts\n";
# Some versions of perl like to have a space after the O and report
that
# -ormgr isn't a valid option if it's not there, perhaps this is a bug
@@ -3452,8 +3530,13 @@
}
my $errors;
if ( $conf{"full-duplex"} ) {
+ if ( not defined $hosts ) {
+ printf("Full duplex mode needs to know the host count\n");
+ printf("Which is doesn't for this resource manager:
$conf{rmgr}\n");
+ return 1;
+ }
$errors = go_parallel( $jobid, "$cmd --full-duplex",
- $ncpus, $raw, $stats, $mode, $h );
+ $ncpus, $raw, $stats, $mode, $h, $hosts );
} else {
$errors = go_job_once( $jobid, $cmd, $ncpus, $raw, $stats, $mode,
$h );
}
@@ -6095,17 +6178,114 @@
);
}
-sub command_from_parent {
- my $cmd = shift;
- my $res;
+# Receive a reply from a child.
+# If it's the last reply then combine
+# with others and forward to parent.
+sub reply_from_child {
+ my ( $handle, $sd, $req ) = @_;
+
+ # If it's the first connection over this socket simply
+ # foreward on the signon command.
+ if ( $req eq "Welcome" ) {
+ $sd->{socket}->printf("$handle->{signon_cmd}\n");
+ return;
+ }
+
+ my $r = my_decode($req);
- # This is only for debugging.
- if ( $confInner{verbose} ) {
- $res->{request} = $cmd;
+ # Merge this reply into the local one.
+ $handle->{child_replys}++;
+
+ # $handle->{all_replys}{raw}{ $sd->{hostname} } = $r;
+
+ # Combine the host responces.
+ foreach my $status ( keys( %{ $r->{host_responce} } ) ) {
+ foreach my $host ( keys( %{ $r->{host_responce}{$status} } ) ) {
+ $handle->{all_replys}->{host_responce}{$status}{$host} =
+ $r->{host_responce}{$status}{$host};
+ }
}
- if ( defined( $cmd->{quit} ) and ( $cmd->{quit} == 1 ) ) {
- exit(0);
+ # Combine the target process responces.
+ if ( exists $r->{target_responce} ) {
+ foreach my $tp ( keys( %{ $r->{target_responce} } ) ) {
+ $handle->{all_replys}->{target_responce}{$tp} =
+ $r->{target_responce}{$tp};
+ }
+ }
+
+ # If this is the last reply from a child then report upstream.
+ # print Dumper $handle;
+ if ( $handle->{child_replys} != $handle->{children} ) {
+ my $missing = $handle->{children} - $handle->{child_replys};
+ return;
+ }
+
+ # Send the data upstream.
+ my $reply = $handle->{all_replys};
+
+ reply_to_parent( $handle, $reply );
+ if ($handle->{shutdown} ) {
+ inner_cleanup_and_exit($handle);
+ }
+
+ # Reset local data.
+ $handle->{all_replys} = undef;
+ $handle->{child_replys} = 0;
+ $handle->{target_responce} = undef;
+}
+
+# Receive a command (perl reference) from our parent.
+#
+# When we receive a command:
+# 1) Send it on to our children.
+# 2) Execute it.
+# 3) If we have no children send reply.
+sub command_from_parent {
+ my ( $netdata, $cmd ) = @_;
+
+ if ( $cmd->{mode} eq "signon" ) {
+ $netdata->{signon_cmd} = my_encode($cmd);
+
+ if ( not exists $cmd->{connection_tree}{$confInner{hostname}}{children} )
{
+ $netdata->{children} = 0;
+ return;
+ }
+
+ my @children = @{ $cmd->{connection_tree}{$confInner{hostname}}{children}
};
+ $netdata->{children} = $#children + 1;
+
+ # Only one child is tested so far.
+ foreach my $chostname (@children) {
+ my $socket = connect_to_child(
+ $chostname,
+ $cmd->{remote}{$chostname}{port},
+ $cmd->{remote}{$chostname}{key}
+ );
+ my %cdata;
+ $cdata{socket} = $socket;
+ $cdata{hostname} = $chostname;
+ $cdata{line_cb} = \&reply_from_child;
+ $cdata{state} = "init";
+ $netdata->{sel}->add($socket);
+ $netdata->{connections}{$socket} = \%cdata;
+ push @{ $netdata->{child_sockets} }, $socket;
+ }
+ return;
+ }
+
+ # Forward on to our children before doing any more processing.
+ if ( $netdata->{children} ) {
+ my $req = my_encode($cmd) . "\n";
+ foreach my $child ( @{ $netdata->{child_sockets} } ) {
+ $child->printf($req);
+ $child->flush();
+ }
+ }
+
+ if ( $cmd->{mode} eq "exit" ) {
+ $netdata->{shutdown} = 1;
+ return;
}
# Setup the environment.
@@ -6119,12 +6299,39 @@
$rmgr{ $confInner{rmgr} }{find_pids}( $cmd->{jobid} );
# Now do the work.
- $res->{from_vpid} =
- $allfns{ $cmd->{mode} }{handler_all}( $confInner{"all-pids"} );
+ my $z = $allfns{ $cmd->{mode} }{handler_all}( $confInner{"all-pids"} );
- return $res;
+ $netdata->{target_responce} = $z;
+ $netdata->{all_replys}{target_responce} = $z;
+
+ return;
+}
+
+# Time for the inner process to exit, cleanup all sockets and
+# quit.
+sub inner_cleanup_and_exit {
+ my $netdata = shift;
+ foreach my $h ( $netdata->{sel}->handles() ) {
+ $h->flush();
+ $h->close();
+ }
+ exit(0);
}
+# Send a reply to our parent, put a status of "ok" on for this
+# host.
+sub reply_to_parent {
+ my ( $netdata, $cmd ) = @_;
+
+ $cmd->{host_responce}{ok}{ hostname() } = 1;
+
+ my $reply = my_encode($cmd);
+ $netdata->{parent}->{socket}->printf("$reply\n");
+}
+
+# Process a single line of input onto a socket we are
+# listening on. This is probably our parent (who may
+# be the outer process) but it needs to be authenticated.
sub command_from_outer {
my ( $netdata, $cdata, $line ) = @_;
@@ -6146,36 +6353,40 @@
$cdata->{dead} = 1;
print("debug\n");
} else {
-
- #printf("Closing connection from $cdata->{desc} (Bad
signon)\n");
+ printf("Closing connection from $cdata->{desc} (Bad
signon)\n");
$netdata->{sel}->remove($s);
$s->close();
$cdata->{dead} = 1;
}
return;
}
-
- $cdata->{last_cmd} = my_decode($line);
- if ( $netdata->{parent} eq $cdata ) {
- my $res = command_from_parent( my_decode($line) );
- my $reply = my_encode($res);
- $cdata->{socket}->printf("$reply\n");
+
+ command_from_parent( $netdata, my_decode($line) );
+
+ if ( $netdata->{children} == 0 ) {
+ my $res;
+ $res->{target_responce} = $netdata->{target_responce};
+ reply_to_parent( $netdata, $res );
+ $netdata->{target_responce} = undef;
+
+ if ($netdata->{shutdown} ) {
+ inner_cleanup_and_exit($netdata);
+ }
}
}
-sub connect_to_outer {
+# Loop forever in the inner process.
+sub inner_loop_for_comms {
my $server = IO::Socket::INET->new(
-
- Reuse => 1,
- Proto => 'tcp',
- LocalPort => 37132,
- Listen => 2,
- ) or die("not the best start");
+ Reuse => 1,
+ Proto => 'tcp',
+ Listen => 2,
+ ) or die("Failed to create local port");
my $lport = $server->sockport();
my $hostname = hostname();
- my $key = "boris";
+ my $key = rand();
my $signon_text = "connect $hostname $lport $key\n";
# For now just print the signon code to stdout and let the outer pick
it up.
@@ -6186,6 +6397,7 @@
$netdata->{sel}->add($server);
$netdata->{server} = $server;
$netdata->{key} = $key;
+ $netdata->{shutdown} = 0;
my $sel = $netdata->{sel};
@@ -6199,7 +6411,7 @@
my $ip = inet_ntoa($addr);
my $hostname = gethostbyaddr( $addr, AF_INET );
- # printf "New connection from $hostname ($ip) $port\n";
+ #printf "New connection from $hostname ($ip) $port\n";
my %sinfo;
$sinfo{hostname} = $hostname;
$sinfo{trusted} = 0;
@@ -6216,18 +6428,18 @@
my $sinfo = $netdata->{connections}{$s};
my $d;
- sysread( $s, $d, 1024 );
+ my $count = sysread( $s, $d, 1024 );
# Dead connection.
- if ( not defined $d ) {
- printf("null read from $sinfo->{desc}\n");
+ if ( not defined $d or $count eq 0 ) {
+
+ # printf("null read from $sinfo->{desc}\n");
if ( eof($s) ) {
$sel->remove($s);
$s->close();
$sinfo->{trusted} = 0;
$sinfo->{dead} = 1;
- my $count = $sel->count();
- printf("EOF from $sinfo->{desc} $count sockets
left\n");
+ my $scount = $sel->count();
}
next;
}
@@ -6251,6 +6463,7 @@
$confInner{"edb"} = find_edb();
$confInner{"minfo"} = find_minfo();
$confInner{"open-ps"} = "";
+ $confInner{"hostname"} = hostname();
# The different options this script can perform. One (and only one) of
# these must be set.
@@ -6295,7 +6508,7 @@
# If this works then nothing below here is needed as all
# requests can be sent over the socket.
if ($full_duplex) {
- connect_to_outer();
+ inner_loop_for_comms();
}
my $mode;
@@ -6331,7 +6544,6 @@
# Load some non user-modifiable data into conf now
$confInner{"lineformatted"} = $line_formatted;
- $confInner{"hostname"} = hostname();
$confInner{"myld"} = $ENV{"LD_LIBRARY_PATH"};
From codesite-noreply at google.com Sat Jun 20 00:41:53 2009
From: codesite-noreply at google.com (codesite-noreply at google.com)
Date: Fri, 19 Jun 2009 23:41:53 +0000
Subject: [padb-devel] [padb commit] r60 - Tidy up the show_proc_all code,
add a proc_output() function
Message-ID: <0016e645ab30ae18d8046cbc117a@google.com>
Author: apittman
Date: Fri Jun 19 15:53:31 2009
New Revision: 60
Modified:
branches/full-duplex/src/padb
Log:
Tidy up the show_proc_all code, add a proc_output() function
which takes a key/value pair rather than just calling output().
Call show_proc_format() properly via a callback rather than have
a special case for it.
Also I obviously forgot to re-indent the code before the previous
commit so re-indent everything.
Modified: branches/full-duplex/src/padb
==============================================================================
--- branches/full-duplex/src/padb (original)
+++ branches/full-duplex/src/padb Fri Jun 19 15:53:31 2009
@@ -2796,7 +2796,7 @@
# XXX: proc-sort-key should probably sort on column headers as
# well as keys.
sub show_proc_format {
- my ( $nlines, $mode, $handle ) = @_;
+ my ( $handle, $nlines ) = @_;
my $lines = $nlines->{lines};
@@ -2927,8 +2927,6 @@
print("$data\n");
}
}
- } elsif ( $mode eq "proc-summary" ) {
- show_proc_format( $nlines, $mode, $handle );
}
}
@@ -3128,7 +3126,7 @@
$req->{mode} = "signon";
$req->{connection_tree} = $handle->{connection_tree};
$req->{remote} = $handle->{remote};
- issue_command_to_inner( $cdata, $req );
+ issue_command_to_inner( $cdata, $req );
return;
}
@@ -3139,7 +3137,7 @@
$handle->{state} = "live";
my $req;
$req->{mode} = $handle->{mode};
- $req->{jobid} = $handle->{jobid};
+ $req->{jobid} = $handle->{jobid};
$req->{cinner} = \%cinner;
issue_command_to_inner( $cdata, $req );
return;
@@ -3150,8 +3148,7 @@
my $req;
$req->{mode} = "exit";
issue_command_to_inner( $cdata, $req );
-
-
+
$allfns{ $handle->{mode} }{out_handler}( undef, $d );
return;
}
@@ -3228,7 +3225,7 @@
my $host = $words[1];
- $comm_data->{remote}{$host}{port} = $words[2];
+ $comm_data->{remote}{$host}{port} = $words[2];
$comm_data->{remote}{$host}{key} = $words[3];
$comm_data->{signons}++;
@@ -3261,7 +3258,7 @@
my $data;
my $nb = sysread( $h, $data, 1024 );
- if ( not defined $data or $nb == 0 ) {
+ if ( not defined $data or $nb == 0 ) {
if ( not $comm_data->{state} eq "shutdown" ) {
printf("EOF from child socket ($nb)\n");
}
@@ -3278,15 +3275,17 @@
exit(1);
}
}
- my $count = $sel->count();
- if ( $count == 1 ) {
- printf("All sockets closed?\n");
- }
+
+ #my $count = $sel->count();
+ #if ( $count == 1 ) {
+ # printf("All sockets closed?\n");
+ #}
}
my $t2 = time() - $start;
my $count = $sel->count();
if ( $count > 0 ) {
- printf("Still here, time:$t2 comm_count:$count\n");
+
+ #printf("Still here, time:$t2 comm_count:$count\n");
if ( $comm_data->{signons} != $comm_data->{hosts} ) {
my $missing = $comm_data->{hosts} - $comm_data->{signons};
print("Waiting for signon from $missing hosts.\n");
@@ -5127,28 +5126,24 @@
sub show_task_file {
my ( $vp, $file, $prefix ) = @_;
- if ( defined $prefix ) {
- $prefix = "$prefix: ";
- } else {
- $prefix = "";
- }
return unless ( -f $file );
open( FD, "$file" ) or return;
my @all = ;
close FD;
foreach my $l (@all) {
chomp $l;
- output( $vp, "$prefix$l" );
+ if ( defined $prefix ) {
+ proc_output( $vp, $prefix, $l );
+ } else {
+ my ( $key, $value ) = split( ":", $l );
+ $value =~ s/^[ \t]+//g;
+ proc_output( $vp, $key, $value );
+ }
}
}
sub show_task_stat_file {
- my ( $vp, $file, $prefix ) = @_;
- if ( defined $prefix ) {
- $prefix = "$prefix";
- } else {
- $prefix = "";
- }
+ my ( $vp, $file ) = @_;
my @stat_names =
qw(pid comm state ppid pgrp session tty_nr tpgid flags minflt
cminflt majflt cmajflt utime stime cutime cstime priority nice
@@ -5165,7 +5160,7 @@
chomp $l;
my @stats = split( / /, $l );
for ( my $i = 0 ; $i <= $#stats ; $i++ ) {
- output( $vp, "$prefix.$stat_names[$i]: $stats[$i]" );
+ proc_output( $vp, "stat.$stat_names[$i]", $stats[$i] );
}
}
@@ -5177,27 +5172,16 @@
if ( $confInner{"proc-shows-proc"} ) {
my $exe = readlink "$dir/exe";
if ( defined $exe ) {
- output $vp, "exe:$exe";
+ proc_output( $vp, "exe", $exe );
}
- # pcpu is calculated from /proc elsewhere.
- # This isn't either, ps reports time
- # as a percentage since the program started so
- # isn't live as the top-reported figure is.
-
- #my $pcpu = `ps --pid $pid -o pcpu= 2>/dev/null`;
- #chomp($pcpu);
- #if ( $pcpu != "" ) {
- # output( $vp, "pcpu:$pcpu%" );
- #}
-
show_task_file( $vp, "$dir/status" );
show_task_file( $vp, "$dir/wchan", "wchan" );
show_task_file( $vp, "$dir/stat", "stat" );
if ( $confInner{"proc-shows-stat"}
or $confInner{mode} eq "proc-summary" )
{
- show_task_stat_file( $vp, "$dir/stat", "stat" );
+ show_task_stat_file( $vp, "$dir/stat" );
}
if ( -f "$dir/maps" ) {
@@ -5223,7 +5207,7 @@
}
foreach my $rail ( sort keys %totals ) {
my $total = $totals{$rail} / 1024;
- output( $vp, "sdram$rail: $total kb" );
+ proc_output( $vp, "sdram$rail", "$total kb" );
}
}
}
@@ -5260,10 +5244,10 @@
}
foreach my $fd (@all_fddata) {
if ( defined $fd->{pos} ) {
- output( $vp,
- "fd$fd->{fd}: $fd->{target} \($fd->{pos}
$fd->{flags}\)" );
+ proc_output( $vp, "fd$fd->{fd}",
+ "$fd->{target} \($fd->{pos} $fd->{flags}\)" );
} else {
- output( $vp, "fd$fd->{fd}: $fd->{target}" );
+ proc_output( $vp, "fd$fd->{fd}", $fd->{target} );
}
}
}
@@ -5308,6 +5292,11 @@
return $jiffies;
}
+sub proc_output {
+ my ( $vp, $key, $value ) = @_;
+ output( $vp, "$key: $value" );
+}
+
sub show_proc_all {
my ($list) = @_;
@@ -5382,7 +5371,7 @@
my $used = ( $jused / $elapsed ) * $cpucount * 100;
my $used_str = sprintf( "%d", $used );
- output( $vp, "pcpu: $used_str" );
+ proc_output( $vp, "pcpu", $used_str );
}
}
@@ -5390,7 +5379,7 @@
my ( $vp, $pid ) = @_;
if ( $confInner{"proc-shows-proc"} ) {
- output( $vp, "hostname:$confInner{hostname}" );
+ proc_output( $vp, "hostname", $confInner{hostname} );
}
if ( -d "/proc/$pid/task" and $confInner{"proc-shows-proc"} ) {
@@ -6225,7 +6214,7 @@
my $reply = $handle->{all_replys};
reply_to_parent( $handle, $reply );
- if ($handle->{shutdown} ) {
+ if ( $handle->{shutdown} ) {
inner_cleanup_and_exit($handle);
}
@@ -6243,42 +6232,46 @@
# 3) If we have no children send reply.
sub command_from_parent {
my ( $netdata, $cmd ) = @_;
-
+
if ( $cmd->{mode} eq "signon" ) {
$netdata->{signon_cmd} = my_encode($cmd);
-
- if ( not exists $cmd->{connection_tree}{$confInner{hostname}}{children} )
{
- $netdata->{children} = 0;
- return;
- }
-
- my @children = @{ $cmd->{connection_tree}{$confInner{hostname}}{children}
};
- $netdata->{children} = $#children + 1;
-
- # Only one child is tested so far.
- foreach my $chostname (@children) {
- my $socket = connect_to_child(
- $chostname,
- $cmd->{remote}{$chostname}{port},
- $cmd->{remote}{$chostname}{key}
- );
- my %cdata;
- $cdata{socket} = $socket;
- $cdata{hostname} = $chostname;
- $cdata{line_cb} = \&reply_from_child;
- $cdata{state} = "init";
- $netdata->{sel}->add($socket);
- $netdata->{connections}{$socket} = \%cdata;
- push @{ $netdata->{child_sockets} }, $socket;
- }
- return;
+
+ if (
+ not
+ exists $cmd->{connection_tree}{ $confInner{hostname}
}{children} )
+ {
+ $netdata->{children} = 0;
+ return;
+ }
+
+ my @children =
+ @{ $cmd->{connection_tree}{ $confInner{hostname} }{children} };
+ $netdata->{children} = $#children + 1;
+
+ # Only one child is tested so far.
+ foreach my $chostname (@children) {
+ my $socket = connect_to_child(
+ $chostname,
+ $cmd->{remote}{$chostname}{port},
+ $cmd->{remote}{$chostname}{key}
+ );
+ my %cdata;
+ $cdata{socket} = $socket;
+ $cdata{hostname} = $chostname;
+ $cdata{line_cb} = \&reply_from_child;
+ $cdata{state} = "init";
+ $netdata->{sel}->add($socket);
+ $netdata->{connections}{$socket} = \%cdata;
+ push @{ $netdata->{child_sockets} }, $socket;
+ }
+ return;
}
# Forward on to our children before doing any more processing.
if ( $netdata->{children} ) {
- my $req = my_encode($cmd) . "\n";
+ my $req = my_encode($cmd) . "\n";
foreach my $child ( @{ $netdata->{child_sockets} } ) {
- $child->printf($req);
+ $child->printf($req);
$child->flush();
}
}
@@ -6360,18 +6353,18 @@
}
return;
}
-
+
command_from_parent( $netdata, my_decode($line) );
-
+
if ( $netdata->{children} == 0 ) {
- my $res;
- $res->{target_responce} = $netdata->{target_responce};
- reply_to_parent( $netdata, $res );
- $netdata->{target_responce} = undef;
-
- if ($netdata->{shutdown} ) {
- inner_cleanup_and_exit($netdata);
- }
+ my $res;
+ $res->{target_responce} = $netdata->{target_responce};
+ reply_to_parent( $netdata, $res );
+ $netdata->{target_responce} = undef;
+
+ if ( $netdata->{shutdown} ) {
+ inner_cleanup_and_exit($netdata);
+ }
}
}
@@ -6379,8 +6372,8 @@
sub inner_loop_for_comms {
my $server = IO::Socket::INET->new(
- Reuse => 1,
- Proto => 'tcp',
+ Reuse => 1,
+ Proto => 'tcp',
Listen => 2,
) or die("Failed to create local port");
@@ -6395,8 +6388,8 @@
my $netdata;
$netdata->{sel} = IO::Select->new();
$netdata->{sel}->add($server);
- $netdata->{server} = $server;
- $netdata->{key} = $key;
+ $netdata->{server} = $server;
+ $netdata->{key} = $key;
$netdata->{shutdown} = 0;
my $sel = $netdata->{sel};
@@ -6695,6 +6688,7 @@
$allfns{"proc-summary"} = {
'handler_all' => \&show_proc_all,
+ 'out_handler' => \&show_proc_format,
'arg_long' => 'proc-summary',
'help' => "Show process information in top format",
'options_i' => { "column-seperator" => " ", }
From codesite-noreply at google.com Sat Jun 20 22:33:25 2009
From: codesite-noreply at google.com (codesite-noreply at google.com)
Date: Sat, 20 Jun 2009 21:33:25 +0000
Subject: [padb-devel] [padb commit] r61 - Find processes as part of the
startup phase as report basic meta-data back
Message-ID: <0016364ecdfc1b8384046cce64d5@google.com>
Author: apittman
Date: Sat Jun 20 11:47:51 2009
New Revision: 61
Modified:
branches/full-duplex/src/padb
Log:
Find processes as part of the startup phase as report basic meta-data back
to the outer process (state,pid,exe) although don't do anything with this
information yet.
Cleanup the proc-summary code so it passes datatypes back from the innter to
the outer and clean up a lot of the outer code accordingly.
Add some debugging options to full-duplex.
On mpd pass the process mapping from the outer to the inner via a datatype
so as not to require files.
Modified: branches/full-duplex/src/padb
==============================================================================
--- branches/full-duplex/src/padb (original)
+++ branches/full-duplex/src/padb Sat Jun 20 11:47:51 2009
@@ -2392,7 +2392,11 @@
} else {
my @elems = split( /\|/, $l );
- if ( $#elems == 6 ) {
+ if ( $#elems == 4 ) {
+ my $nprocs = $elems[3];
+ $nprocs =~ s/ //g;
+ $open_jobs{$job}{nprocs} = $nprocs;
+ } elsif ( $#elems == 6 ) {
my $host = $elems[4];
$host =~ s/ //g;
@@ -2416,7 +2420,7 @@
}
#if ( $conf{"verbose"} ) {
- # print Dumper \%open_jobs;
+ #print Dumper \%open_jobs;
#}
}
@@ -2462,7 +2466,7 @@
my $cmd = "orterun -machinefile $fn -np $i $prefix";
my $hosts = $#hosts + 1;
- return ( $cmd, undef, $hosts );
+ return ( $cmd, $open_jobs{$job}{nprocs}, $hosts );
}
sub open_cleanup_pcmd {
@@ -2795,11 +2799,11 @@
# Nicely format process information.
# XXX: proc-sort-key should probably sort on column headers as
# well as keys.
+# Idealy we'd know what format we wanted and only ask the nodes
+# to report relevent info, for now they still report everything.
sub show_proc_format {
my ( $handle, $nlines ) = @_;
- my $lines = $nlines->{lines};
-
my @proc_format_array;
my %proc_format_header;
my $show_fields = 0;
@@ -2826,21 +2830,21 @@
}
my @all;
+ my $lines = $nlines->{target_responce};
foreach my $tag ( sort ( keys %$lines ) ) {
my %hash;
$hash{vp} = $tag;
- foreach my $data ( @{ $lines->{$tag} } ) {
- if ( $data =~ /([\w\.]+)\:[ \t]*(.+)/ ) {
- my $key = lc($1);
-
- next unless defined $proc_format_lengths{$key} or
$show_fields;
+ foreach my $key ( keys( %{ $lines->{$tag} } ) ) {
- if ( length($2) > $proc_format_lengths{$key} ) {
- $proc_format_lengths{$key} = length($2);
- }
+ my $value = $lines->{$tag}{$key};
+ next unless defined $proc_format_lengths{$key} or $show_fields;
- $hash{$key} = $2;
+ if ( length($value) > $proc_format_lengths{$key} ) {
+ $proc_format_lengths{$key} = length($value);
}
+
+ $hash{$key} = $value;
+
}
if ($show_fields) {
my @fields = sort ( keys(%hash) );
@@ -2861,8 +2865,6 @@
}
my $line = join( $separator, @res );
print "$line\n";
-
- #print "@proc_format_array\n";
}
foreach my $hash (@all) {
my @res;
@@ -2876,7 +2878,6 @@
my $line = join( $separator, @res );
print "$line\n";
}
-
}
sub show_results {
@@ -2965,6 +2966,11 @@
}
}
+sub default_output_handler {
+ my $d = shift;
+ print Dumper $d;
+}
+
sub go_file {
my $file = shift;
my $mode = shift;
@@ -3115,17 +3121,33 @@
sub issue_command_to_inner {
my ( $cdata, $cmd ) = @_;
my $str = my_encode($cmd);
+ if ( $conf{"full-duplex"} eq "debug" ) {
+ printf( "Sending command to inner, %d bytes\n", length($str) );
+ print( Dumper($cmd) );
+
+ }
$cdata->{socket}->print("$str\n");
}
sub command_from_inner {
- my ( $handle, $cdata, $line ) = @_;
+ my ( $comm_data, $cdata, $line ) = @_;
if ( $line eq "Welcome" ) {
my $req;
$req->{mode} = "signon";
- $req->{connection_tree} = $handle->{connection_tree};
- $req->{remote} = $handle->{remote};
+ $req->{connection_tree} = $comm_data->{connection_tree};
+ $req->{remote} = $comm_data->{remote};
+
+ # Also send over some of the per-run (as opposed to per-mode)
+ # configuration options.
+ # XXX: Need to send over scripts and other stuff here as well.
+ $req->{jobconfig}{jobid} = $comm_data->{jobid};
+ $req->{jobconfig}{rmgr} = $conf{rmgr};
+
+ if ( $conf{rmgr} eq "orte" ) {
+ $req->{jobconfig}{"orte-data"} = $open_jobs{
$comm_data->{jobid} };
+ }
+
issue_command_to_inner( $cdata, $req );
return;
}
@@ -3133,33 +3155,46 @@
# A reply from inner.
my $d = my_decode($line);
- if ( $handle->{state} eq "connecting" ) {
- $handle->{state} = "live";
+ if ( $conf{"full-duplex"} eq "debug" ) {
+ printf( "Reply from inner, %d bytes\n", length($line) );
+ print( Dumper($d) );
+ }
+
+ if ( $comm_data->{state} eq "connecting" ) {
+
+ #XXX: Check all target_processes are here.
+ print Dumper $d;
+
+ $comm_data->{state} = "live";
my $req;
- $req->{mode} = $handle->{mode};
- $req->{jobid} = $handle->{jobid};
+ $req->{mode} = $comm_data->{mode};
+ $req->{jobid} = $comm_data->{jobid};
$req->{cinner} = \%cinner;
issue_command_to_inner( $cdata, $req );
return;
}
- if ( $handle->{state} eq "live" ) {
- $handle->{state} = "shutdown";
+ if ( $comm_data->{state} eq "live" ) {
+ $comm_data->{state} = "shutdown";
my $req;
$req->{mode} = "exit";
issue_command_to_inner( $cdata, $req );
- $allfns{ $handle->{mode} }{out_handler}( undef, $d );
+ if ( defined($allfns{ $comm_data->{mode} }{out_handler})) {
+ $allfns{ $comm_data->{mode} }{out_handler}( undef, $d );
+ } else {
+ default_output_handler($d);
+ }
return;
}
- if ( $handle->{state} eq "shutdown" ) {
+ if ( $comm_data->{state} eq "shutdown" ) {
# Nothing to do here.
return;
}
- print("Hmm, unknown state! $handle->{state}\n");
+ print("Hmm, unknown state! $comm_data->{state}\n");
return;
}
@@ -5292,14 +5327,24 @@
return $jiffies;
}
+my $proc_info;
+
sub proc_output {
my ( $vp, $key, $value ) = @_;
- output( $vp, "$key: $value" );
+ if ( $confInner{mode} eq "proc-summary" ) {
+ $proc_info->{$vp}{ lc($key) } = $value;
+ } else {
+ output( $vp, "$key: $value" );
+ }
}
sub show_proc_all {
my ($list) = @_;
+ if ( $confInner{mode} eq "proc-summary" ) {
+ $proc_info = undef;
+ }
+
my @all;
foreach my $proc ( @{$list} ) {
@@ -5373,6 +5418,10 @@
proc_output( $vp, "pcpu", $used_str );
}
+
+ if ( $confInner{mode} eq "proc-summary" ) {
+ return $proc_info;
+ }
}
sub show_proc {
@@ -5823,16 +5872,18 @@
debug( $vp, "Looking at $vp, pid: $pid" );
- if ( defined $allfns{ $confInner{mode} }{handler} ) {
- my $res = $allfns{ $confInner{mode} }{handler}( $vp, $pid );
- if ( defined $allfns{ $confInner{mode} }{out_handler} ) {
- output_dtype( $vp, $res );
- }
- } else {
+ if ( $confInner{pre}
+ or not defined( $allfns{ $confInner{mode} }{handler} ) )
+ {
my %d;
$d{pid} = $pid;
$d{vp} = $vp;
push( @{ $confInner{"all-pids"} }, \%d );
+ } else {
+ my $res = $allfns{ $confInner{mode} }{handler}( $vp, $pid );
+ if ( defined $allfns{ $confInner{mode} }{out_handler} ) {
+ output_dtype( $vp, $res );
+ }
}
}
@@ -6084,7 +6135,15 @@
sub open_find_pids {
my $job = shift;
- open_get_data( $confInner{"open-ps"} );
+
+ # Be careful here, we are the inner process then load data from
+ # the outer.
+ if ( defined $confInner{"orte-data"} ) {
+ %open_jobs = ();
+ $open_jobs{$job} = $confInner{"orte-data"};
+ } else {
+ open_get_data( $confInner{"open-ps"} );
+ }
my $hostname = hostname();
foreach my $rank ( keys( %{ $open_jobs{$job}{ranks}{$hostname} } ) ) {
@@ -6224,6 +6283,34 @@
$handle->{target_responce} = undef;
}
+# Find and report pids as part of the signon protocol, we should
+# also report name
+sub inner_find_pids {
+ my ( $netdata, $cmd ) = @_;
+
+ # Cache config data and search for pids, storing
+ # data in $netdata->{target_responce} and $netdata->??
+
+ if ( $cmd->{jobconfig}{rmgr} eq "orte" ) {
+ $confInner{"orte-data"}{ $cmd->{jobconfig}{jobid} } =
+ $cmd->{jobconfig}{"orte-data"};
+ $confInner{"orte-data"} = $cmd->{jobconfig}{"orte-data"};
+ }
+ $confInner{pre} = 1;
+
+ $rmgr{ $cmd->{jobconfig}{rmgr} }{find_pids}( $cmd->{jobconfig}{jobid}
);
+
+ foreach my $proc ( @{ $confInner{"all-pids"} } ) {
+ my $pid = $proc->{pid};
+ my $vp = $proc->{vp};
+ my $name = readlink("/proc/$pid/exe");
+ my $state = find_from_status( $pid, "State" );
+ $netdata->{target_responce}{$vp}->{pid} = $pid;
+ $netdata->{target_responce}{$vp}->{name} = $name;
+ $netdata->{target_responce}{$vp}->{state} = $state;
+ }
+}
+
# Receive a command (perl reference) from our parent.
#
# When we receive a command:
@@ -6241,6 +6328,7 @@
exists $cmd->{connection_tree}{ $confInner{hostname}
}{children} )
{
$netdata->{children} = 0;
+ inner_find_pids( $netdata, $cmd );
return;
}
@@ -6264,6 +6352,7 @@
$netdata->{connections}{$socket} = \%cdata;
push @{ $netdata->{child_sockets} }, $socket;
}
+ inner_find_pids( $netdata, $cmd );
return;
}
@@ -6288,14 +6377,11 @@
$confInner{mode} = $cmd->{mode};
- # Find the pids and register them all.
- $rmgr{ $confInner{rmgr} }{find_pids}( $cmd->{jobid} );
-
# Now do the work.
- my $z = $allfns{ $cmd->{mode} }{handler_all}( $confInner{"all-pids"} );
+ my $res = $allfns{ $cmd->{mode} }{handler_all}( $confInner{"all-pids"}
);
- $netdata->{target_responce} = $z;
- $netdata->{all_replys}{target_responce} = $z;
+ $netdata->{target_responce} = $res;
+ $netdata->{all_replys}{target_responce} = $res;
return;
}
From codesite-noreply at google.com Mon Jun 22 00:21:39 2009
From: codesite-noreply at google.com (codesite-noreply at google.com)
Date: Sun, 21 Jun 2009 23:21:39 +0000
Subject: [padb-devel] [padb commit] r62 - Replace the output() function with
one that just stores strings
Message-ID: <0016364ecdfcff900f046ce40489@google.com>
Author: apittman
Date: Sun Jun 21 14:20:32 2009
New Revision: 62
Modified:
branches/full-duplex/src/padb
Log:
Replace the output() function with one that just stores strings
rather than calling printf. Pass the strings back over the sockets
as part of the protocol in a {target_output} key to match {target_responce}
Implement a default_output_handler() function which can take this and
display it in the normal (tree/compress/tagged) form.
Also add a extra paramater to the handler_all function, this may
change syntax but for now it's a hash of paramaters as specified
in the allfns hash.
Modified: branches/full-duplex/src/padb
==============================================================================
--- branches/full-duplex/src/padb (original)
+++ branches/full-duplex/src/padb Sun Jun 21 14:20:32 2009
@@ -2967,8 +2967,58 @@
}
sub default_output_handler {
- my $d = shift;
- print Dumper $d;
+ my ( $mode, $d ) = @_;
+
+ # Could warn on missing output here...
+
+ my $lines = $d->{target_output};
+
+ if ( $mode eq "stack" or $input_file ) {
+ if ( $strip_below_main or $strip_above_wait ) {
+ strip_stack_traces($lines);
+ }
+ }
+
+ if ($tree) {
+ print show_tree go_p( 0, $lines,
+ ( sort { $a <=> $b } ( keys %$lines ) ) );
+
+ } elsif ($compress) {
+
+ foreach my $tag ( sort { $a <=> $b } ( keys %$lines ) ) {
+ next if ( !defined( $lines->{$tag} ) );
+ my @identical = ();
+ foreach my $tag2 ( keys %$lines ) {
+ next if ( $tag2 eq $tag );
+ if ( cmp_list( \@{ $lines->{$tag} }, \@{ $lines->{$tag2} }
) ) {
+ push( @identical, $tag2 );
+ delete( $lines->{$tag2} );
+ }
+ }
+ print("----------------\n");
+ printf( "%s\n", join( ",", compress( @identical, $tag ) ) );
+ print("----------------\n");
+ foreach my $data ( @{ $lines->{$tag} } ) {
+ print("$data\n");
+ }
+ }
+ } elsif ($compress_C) {
+ foreach my $tag ( sort { $a <=> $b } ( keys %$lines ) ) {
+ print("----------------\n");
+ print("$tag\n");
+ print("----------------\n");
+ foreach my $data ( @{ $lines->{$tag} } ) {
+ print("$data\n");
+ }
+ }
+ } else {
+ foreach my $process ( sort( keys( %{ $d->{target_output} } ) ) ) {
+ foreach my $line ( @{ $d->{target_output}{$process} } ) {
+ print "$process:$line\n";
+ }
+ }
+
+ }
}
sub go_file {
@@ -3160,15 +3210,38 @@
print( Dumper($d) );
}
- if ( $comm_data->{state} eq "connecting" ) {
+ if ( ( $comm_data->{state} eq "connecting" )
+ or ( $comm_data->{state} eq "live" and $watch ) )
+ {
+
+ if ( $comm_data->{state} eq "connecting" ) {
- #XXX: Check all target_processes are here.
- print Dumper $d;
+ #XXX: Check all target_processes are here.
+ # print Dumper $d;
+ }
+
+ # Watch mode, show the output and then loop.
+ if ( $comm_data->{state} eq "live" ) {
+ maybe_clear_screen();
+ if ( defined( $allfns{ $comm_data->{mode} }{out_handler} ) ) {
+ $allfns{ $comm_data->{mode} }{out_handler}( undef, $d );
+ } else {
+ default_output_handler( $comm_data->{mode}, $d );
+ }
+ sleep( $conf{interval} );
+ }
$comm_data->{state} = "live";
my $req;
- $req->{mode} = $comm_data->{mode};
- $req->{jobid} = $comm_data->{jobid};
+ $req->{mode} = $comm_data->{mode};
+
+ # Send along the secondary args.
+ if ( defined $allfns{ $comm_data->{mode} }{secondary} ) {
+ foreach my $sec ( @{ $allfns{ $comm_data->{mode} }{secondary}
} ) {
+ $req->{cargs}{ $sec->{arg_long} } = $sec->{value};
+ }
+ }
+
$req->{cinner} = \%cinner;
issue_command_to_inner( $cdata, $req );
return;
@@ -3180,11 +3253,11 @@
$req->{mode} = "exit";
issue_command_to_inner( $cdata, $req );
- if ( defined($allfns{ $comm_data->{mode} }{out_handler})) {
- $allfns{ $comm_data->{mode} }{out_handler}( undef, $d );
- } else {
- default_output_handler($d);
- }
+ if ( defined( $allfns{ $comm_data->{mode} }{out_handler} ) ) {
+ $allfns{ $comm_data->{mode} }{out_handler}( undef, $d );
+ } else {
+ default_output_handler( $comm_data->{mode}, $d );
+ }
return;
}
@@ -3310,11 +3383,6 @@
exit(1);
}
}
-
- #my $count = $sel->count();
- #if ( $count == 1 ) {
- # printf("All sockets closed?\n");
- #}
}
my $t2 = time() - $start;
my $count = $sel->count();
@@ -3550,18 +3618,18 @@
}
# This makes thing easier...
- if ($watch) {
- while (1) {
- maybe_clear_screen();
- my $errors =
- go_job_once( $jobid, $cmd, $ncpus, $raw, $stats, $mode, $h );
- if ( $errors != 0 ) {
- cleanup_pcmd();
- return $errors;
- }
- sleep( $conf{"interval"} );
- }
- }
+ #if ($watch) {
+ ## while (1) {
+ # maybe_clear_screen();
+ # my $errors =
+ # go_job_once( $jobid, $cmd, $ncpus, $raw, $stats, $mode, $h
);
+ # if ( $errors != 0 ) {
+ # cleanup_pcmd();
+ # return $errors;
+ # }
+ # sleep( $conf{"interval"} );
+ # }
+ #}
my $errors;
if ( $conf{"full-duplex"} ) {
if ( not defined $hosts ) {
@@ -3988,17 +4056,13 @@
}
}
+my %inner_output;
+
sub output {
my ( $vp, $str ) = @_;
- if ( $confInner{"lineformatted"} ) {
- if ( defined $vp ) {
- print "$vp:$str\n";
- } else {
- print "$confInner{hostname}.-1:ERROR: $str\n";
- }
- } else {
- print "$str\n";
- }
+
+ push( @{ $inner_output{$vp} }, $str );
+
}
sub p_die {
@@ -4570,7 +4634,7 @@
}
sub show_mpi_queue {
- my ( $vp, $pid ) = @_;
+ my ( $carg, $vp, $pid ) = @_;
my @mq = fetch_mpi_queue( $vp, $pid );
foreach my $o (@mq) {
@@ -4591,7 +4655,7 @@
# Ideally handle all this at a higher level...
sub show_mpi_queue_for_deadlock_all {
- my ($list) = @_;
+ my ( $carg, $list ) = @_;
my @all;
@@ -5339,7 +5403,7 @@
}
sub show_proc_all {
- my ($list) = @_;
+ my ( $carg, $list ) = @_;
if ( $confInner{mode} eq "proc-summary" ) {
$proc_info = undef;
@@ -5469,7 +5533,7 @@
# but also that the resulting stack traces will have less artifacts
because running
# processes bunch up behind the non-running ones.
sub stack_trace_from_pids {
- my ($list) = @_;
+ my ( $carg, $list ) = @_;
my @all;
@@ -5602,13 +5666,13 @@
}
sub kill_proc {
- my ( $vp, $pid ) = @_;
- my $signal = uc( $confInner{args}{signal} );
+ my ( $cargs, $vp, $pid ) = @_;
+ my $signal = uc( $cargs->{signal} );
kill( $signal, $pid );
}
sub show_queue {
- my ( $vp, $pid ) = @_;
+ my ( $carg, $vp, $pid ) = @_;
# Nobble the LD_LIBRARY_PATH to give etrace the best chance of working.
my %remote_env = get_remote_env($pid);
@@ -5623,7 +5687,7 @@
return if ( $lines != 0 );
- show_mpi_queue( $vp, $pid );
+ show_mpi_queue( $carg, $vp, $pid );
}
sub show_clever_full_stack {
@@ -5676,7 +5740,7 @@
}
sub show_full_stacks {
- my ($list) = @_;
+ my ( $carg, $list ) = @_;
if (0) {
@@ -5700,7 +5764,7 @@
}
sub set_debug {
- my ( $vp, $pid ) = @_;
+ my ( $carg, $vp, $pid ) = @_;
run_command( $vp,
"edb --key $confInner{key} --debug=$confInner{args}{dflag} --target-vp=$vp"
);
@@ -5857,7 +5921,7 @@
}
sub mpi_watch_all {
- my ($list) = @_;
+ my ( $carg, $list ) = @_;
my %res;
foreach my $proc ( @{$list} ) {
my $vp = $proc->{vp};
@@ -6226,6 +6290,18 @@
);
}
+sub default_handler_all {
+ my ( $cmd, $list ) = @_;
+ my %gres;
+ foreach my $proc ( @{$list} ) {
+ my $vp = $proc->{vp};
+ my $pid = $proc->{pid};
+ $gres{$vp} =
+ $allfns{ $cmd->{mode} }{handler}( $cmd->{cargs}, $vp, $pid );
+ }
+ return \%gres;
+}
+
# Receive a reply from a child.
# If it's the last reply then combine
# with others and forward to parent.
@@ -6262,6 +6338,13 @@
}
}
+ # Save any output we've got from this node.
+ foreach my $key ( keys(%inner_output) ) {
+ $r->{target_output}{$key} = $inner_output{$key};
+ }
+
+ %inner_output = ();
+
# If this is the last reply from a child then report upstream.
# print Dumper $handle;
if ( $handle->{child_replys} != $handle->{children} ) {
@@ -6378,8 +6461,14 @@
$confInner{mode} = $cmd->{mode};
# Now do the work.
- my $res = $allfns{ $cmd->{mode} }{handler_all}( $confInner{"all-pids"}
);
-
+ my $res;
+ if ( defined $allfns{ $cmd->{mode} }{handler_all} ) {
+ $res =
+ $allfns{ $cmd->{mode} }{handler_all}( $cmd->{cargs},
+ $confInner{"all-pids"} );
+ } else {
+ $res = default_handler_all( $cmd, $confInner{"all-pids"} );
+ }
$netdata->{target_responce} = $res;
$netdata->{all_replys}{target_responce} = $res;
@@ -6445,6 +6534,14 @@
if ( $netdata->{children} == 0 ) {
my $res;
$res->{target_responce} = $netdata->{target_responce};
+
+ # Save any output we've got from this node.
+ foreach my $key ( keys(%inner_output) ) {
+ $res->{target_output}{$key} = $inner_output{$key};
+ }
+
+ %inner_output = ();
+
reply_to_parent( $netdata, $res );
$netdata->{target_responce} = undef;
@@ -6705,27 +6802,32 @@
sub common_main {
- # Long command line option.
- $allfns{queue}{arg_long} = "message-queue";
+ # The quasi-authorative list of modes padb can operate in.
+
+ # Notes on the callback functions and paramaters.
+
+ # handler Called in the inner for each target process.
+ # param: ??, $vp, $pid
+
+# handler_all Called once in the the inner and should iterate over each
target process.
+# ??, $vp, $pid
- # Short command line option (optional).
- $allfns{queue}{arg_short} = "q";
+# These two functions can eitehr return a value, and have it passed to the
output handler
+# or call output() and use the default_output_handler().
- # Handler to be called for each vp, called with ($vp,$pid) on the
- # correct host for each vp.
- $allfns{queue}{handler} = \&show_queue;
-
- # Handler to be called in the outer when command line option is set.
- # $allfns{queue}{cmdline} = \&command_line_queue;
-
- # Output handlers,
- # If {out_handler} is set (to a function) assume $line_formatted and
- # call that fn with the output.
- # If {pre_out_handler} is set call this function once at start of day,
- # save it's return value and pass this to {out_handler} later.
+ # out_handler Called once in the outer to display the output
+ # pre_out_handler Called once in the outer to display any header.
- # Help text for this function.
- $allfns{queue}{help} = "Show the message queues";
+ # TODO:
+ # --mode= on the command line?
+ # Sort out secondary and options_i so they are handled in the same way.
+
+ $allfns{queue} = {
+ 'arg_long' => "message-queue",
+ 'arg_short' => "q",
+ 'handler' => \&show_queue,
+ 'help' => "Show the message queues",
+ };
$allfns{kill} = {
'handler' => \&kill_proc,
@@ -6740,10 +6842,6 @@
]
};
- # There are a number of things to consider though, are there any output
- # filters that can be used with this function and are the args options
- # to the inner code or the output filter (or can they just be set for
both)
-
$allfns{mqueue} = {
'handler' => \&show_mpi_queue,
'arg_long' => 'mpi-queue',
@@ -6836,7 +6934,6 @@
}
}
}
-
}
# Now run some actual code.
From ashley at pittman.co.uk Wed Jun 24 08:51:23 2009
From: ashley at pittman.co.uk (Ashley Pittman)
Date: Wed, 24 Jun 2009 08:51:23 +0100
Subject: [padb-devel] 2.5 release on website.
Message-ID: <1245829883.3886.1.camel@localhost.localdomain>
All,
I've uploaded the 2.5 release to the website this morning, the code is
exactly the same as the release candidate made last week.
Ashley Pittman.
--
Ashley Pittman, Bath, UK.
Padb - A parallel job inspection tool for cluster computing
http://padb.pittman.org.uk
From codesite-noreply at google.com Wed Jun 24 12:47:42 2009
From: codesite-noreply at google.com (codesite-noreply at google.com)
Date: Wed, 24 Jun 2009 11:47:42 +0000
Subject: [padb-devel] [padb commit] r64 - Update the website to point to the
new release.
Message-ID: <001485f54734c5dc68046d16ac8c@google.com>
Author: apittman
Date: Wed Jun 24 01:08:32 2009
New Revision: 64
Modified:
trunk/doc/download.html
Log:
Update the website to point to the new release.
Modified: trunk/doc/download.html
==============================================================================
--- trunk/doc/download.html (original)
+++ trunk/doc/download.html Wed Jun 24 01:08:32 2009
@@ -1,24 +1,17 @@
-Source code download
-Stable release
+Current stable release
-Release Candidate
-A 2.5-rc1 release candidate it avaliable for
-download.
-All being well this will lead to a stable release on or around the
-23rd June. I would encourage all users to download this release for
testing.
+The latest stable release, 2.5 can be downloaded direct from the google downloads section.
-Downloads can also me made direct from google
-here or from the google
-downloads section.
+This is first "stable" release since the Quarics 2.2 version.
-
+
Source code download
+
+Source code can be downloaded direct from google at the
+downloads section.
-This will be the first "stable" release since the Quarics 2.2 version.
Quadrics release
@@ -28,15 +21,15 @@
in the QSNET2LIBS package. No further releases are expected
from quadrics at this time.
-Source code access
+Development and Subversion access
Subversion access to padb is hosted by
google where you can
browse the
source online.
Alternatively to checkout a read-only copy of the source run the
-following command from your desktop.
+following command from your machine.
-svn checkout http://padb.googlecode.com/svn/trunk/ padb-read-only
+svn checkout http://padb.googlecode.com/svn/trunk/ padb
From codesite-noreply at google.com Wed Jun 24 13:05:21 2009
From: codesite-noreply at google.com (codesite-noreply at google.com)
Date: Wed, 24 Jun 2009 12:05:21 +0000
Subject: [padb-devel] [padb commit] r63 - add a inner-callback mode which
doesn't rely on stdout
Message-ID: <001636b43254e3779a046d16eb2c@google.com>
Author: apittman
Date: Wed Jun 24 01:04:19 2009
New Revision: 63
Modified:
branches/full-duplex/src/padb
Log:
add a inner-callback mode which doesn't rely on stdout
forwarding from the parallel processes to the mpirun
command. This should fix the problems on mpd/mpich2
Modified: branches/full-duplex/src/padb
==============================================================================
--- branches/full-duplex/src/padb (original)
+++ branches/full-duplex/src/padb Wed Jun 24 01:04:19 2009
@@ -243,6 +243,8 @@
my %conf;
+my $secret;
+
# Config options the inner knows about, only forward options if they are
in this list.
my @inner_conf = qw(edb edbopt minfo rmgr scripts slurm-job-step verbose);
@@ -280,11 +282,12 @@
};
$rmgr{"mpd"} = {
- 'is_installed' => \&mpd_is_installed,
- 'get_active_jobs' => \&mpd_get_jobs,
- 'setup_pcmd' => \&mpd_setup_pcmd,
- 'cleanup_pcmd' => \&mpd_cleanup_pcmd,
- 'find_pids' => \&mpd_find_pids,
+ 'is_installed' => \&mpd_is_installed,
+ 'get_active_jobs' => \&mpd_get_jobs,
+ 'setup_pcmd' => \&mpd_setup_pcmd,
+ 'cleanup_pcmd' => \&mpd_cleanup_pcmd,
+ 'find_pids' => \&mpd_find_pids,
+ 'require_inner_callback' => 1,
};
$rmgr{"orte"} = {
@@ -379,7 +382,8 @@
$conf{"local-fd-name"} = "/dev/null";
$conf{"stack-strip-above"} =
"elan_waitWord,elan_pollWord,elan_deviceCheck,opal_condition_wait";
-$conf{"full-duplex"} = 0;
+$conf{"full-duplex"} = 0;
+$conf{"inner-callback"} = 0;
# $conf{stack-format} = undef;
@@ -3161,6 +3165,7 @@
$cdata->{active} = 1;
$cdata->{str} = "";
$cdata->{line_cb} = \&command_from_inner;
+ $cdata->{eof_cb} = \&eof_from_inner;
$comm_data->{sockets}{ $cdata->{socket} } = $cdata;
$comm_data->{sel}->add( $cdata->{socket} );
@@ -3272,6 +3277,47 @@
}
+sub handle_signon {
+ my ( $comm_data, $host, $port, $key ) = @_;
+
+ $comm_data->{remote}{$host}{port} = $port;
+ $comm_data->{remote}{$host}{key} = $key;
+ $comm_data->{signons}++;
+
+ if ( $comm_data->{signons} == $comm_data->{hosts} ) {
+ connect_to_children($comm_data);
+ }
+}
+
+sub hello_from_inner {
+ my ( $comm_data, $cdata, $line ) = @_;
+
+ # Children connect back with "Hello $outerkey $hostname $port
$innernkey";
+ my @words = split( " ", $line );
+ if ( $#words != 4 or $words[0] ne "Hello" or $words[1] ne $secret ) {
+ printf("Bad signon $line\n");
+ return 0;
+ }
+
+ handle_signon( $comm_data, $words[2], $words[3], $words[4] );
+
+ if ( $comm_data->{signons} == $comm_data->{hosts} ) {
+
+ # Don't listen on this port any more;
+ $comm_data->{sel}->remove( $comm_data->{listen} );
+ $comm_data->{listen}->close();
+
+ }
+}
+
+sub eof_from_inner {
+ my ( $comm_data, $cdata ) = @_;
+
+ if ( $comm_data->{state} ne "shutdown" ) {
+ printf("Unexpected EOF from child socket ($comm_data->{state})\n");
+ }
+}
+
sub go_parallel {
my $jobid = shift;
my $cmd = shift;
@@ -3282,6 +3328,23 @@
my $h = shift;
my $hosts = shift;
+ my $comm_data;
+
+ my $sel = IO::Select->new();
+ if ( $conf{"inner-callback"} ) {
+ my $sl = IO::Socket::INET->new(
+ Reuse => 1,
+ Proto => 'tcp',
+ Listen => 2,
+ ) or die("Failed to create local port");
+
+ $comm_data->{listen} = $sl;
+ my $port = $sl->sockport();
+ my $hostname = hostname();
+ $cmd .= " --outer=$hostname:$port";
+ $sel->add($sl);
+ }
+
my $errors = 0;
my $report_errors = 1;
@@ -3298,17 +3361,15 @@
close $pcmd->{in};
- my $comm_data;
-
- $comm_data->{mode} = $mode;
- $comm_data->{hosts} = $hosts;
- $comm_data->{cmd} = $cmd;
- $comm_data->{jobid} = $jobid;
+ $comm_data->{mode} = $mode;
+ $comm_data->{hosts} = $hosts;
+ $comm_data->{cmd} = $cmd;
+ $comm_data->{jobid} = $jobid;
+ $comm_data->{signons} = 0;
# State, one of "connecting" "live" and "shutdown";
$comm_data->{state} = "connecting";
- my $sel = IO::Select->new();
$sel->add( $pcmd->{out} );
$sel->add( $pcmd->{err} );
@@ -3331,18 +3392,10 @@
my @words = split( " ", $line );
if ( $#words == 3 and $words[0] eq "connect" ) {
- my $host = $words[1];
-
- $comm_data->{remote}{$host}{port} = $words[2];
- $comm_data->{remote}{$host}{key} = $words[3];
- $comm_data->{signons}++;
-
- if ( $comm_data->{signons} == $comm_data->{hosts}
) {
- connect_to_children($comm_data);
- }
+ handle_signon( $comm_data, $words[1], $words[2],
+ $words[3] );
next;
- }
- if ( $words[0] eq "debug" ) {
+ } elsif ( $words[0] eq "debug" ) {
my $count = $sel->count();
print("There are $count sockets\n");
next;
@@ -3366,23 +3419,34 @@
my $data;
my $nb = sysread( $h, $data, 1024 );
- if ( not defined $data or $nb == 0 ) {
- if ( not $comm_data->{state} eq "shutdown" ) {
- printf("EOF from child socket ($nb)\n");
+ if ( $nb == 0 ) {
+ if ( defined( $cdata->{eof_cb} ) ) {
+ $cdata->{eof_cb}( $comm_data, $cdata );
}
$sel->remove($h);
$h->close();
- next;
+ } else {
+ $cdata->{str} .= $data;
+ extract_line( $comm_data, $cdata );
}
+ } elsif ( exists( $comm_data->{listen} )
+ and $h eq $comm_data->{listen} )
+ {
+
+ # It's a new socket on our listening port.
+ my $new = $h->accept();
+ $sel->add($new);
+ my %cdata;
+ $cdata{str} = "";
+ $cdata{line_cb} = \&hello_from_inner;
- $cdata->{str} .= $data;
- extract_line( $comm_data, $cdata );
+ $comm_data->{sockets}{$new} = \%cdata;
} else {
printf("Responce from unknown fd $h\n");
exit(1);
}
- }
+ } #for...
}
my $t2 = time() - $start;
my $count = $sel->count();
@@ -3537,6 +3601,34 @@
}
+sub find_padb_secret {
+
+ my $file = "$ENV{HOME}/.padb-secret";
+ if ( !-f $file ) {
+ printf("No secret file ($file)\n");
+ return;
+ }
+ my (
+ $dev, $ino, $mode, $nlink, $uid, $gid, $rdev,
+ $size, $atime, $mtime, $ctime, $blksize, $blocks
+ ) = stat($file);
+
+ # Check that the file is mode 100600 (Octal)
+ if ( $mode != 33152 ) {
+ printf("Wrong permissions on secret file, should be 0600
($file)\n");
+ }
+
+ open( SFD, $file ) or return;
+ my @l = ;
+ close(SFD);
+ if ( $#l != 0 ) {
+ return;
+ }
+ if ( $l[0] =~ /^secret=(\w+)$/ ) {
+ return $1;
+ }
+}
+
sub go_job {
my $jobid = shift;
my $mode = shift;
@@ -3556,6 +3648,22 @@
my $stats;
+ if ( defined $rmgr{ $conf{rmgr} }{require_inner_callback}
+ and $rmgr{ $conf{rmgr} }{require_inner_callback} )
+ {
+ $conf{"inner-callback"} = 1;
+ }
+
+ if ( $conf{"inner-callback"} ) {
+ $secret = find_padb_secret();
+
+ if ( not defined $secret ) {
+ printf("Error: No secret\n");
+ exit(1);
+ }
+
+ }
+
foreach my $rank (@ranks) {
$rops .= " --rank=$rank";
}
@@ -6553,6 +6661,7 @@
# Loop forever in the inner process.
sub inner_loop_for_comms {
+ my ($outerloc) = @_;
my $server = IO::Socket::INET->new(
Reuse => 1,
@@ -6560,13 +6669,27 @@
Listen => 2,
) or die("Failed to create local port");
- my $lport = $server->sockport();
- my $hostname = hostname();
- my $key = rand();
- my $signon_text = "connect $hostname $lport $key\n";
+ my $lport = $server->sockport();
+ my $hostname = hostname();
+ my $key = rand();
+
+ if ( defined $outerloc ) {
+ my ( $ohost, $oport ) = split( ":", $outerloc );
+ my $os = IO::Socket::INET->new(
+ PeerAddr => $ohost,
+ PeerPort => $oport,
+ Proto => 'tcp',
+ ) or die("Failed to connect to outer");
+ my $secret = find_padb_secret();
+ die("No secret") if not defined $secret;
+ $os->print("Hello $secret $hostname $lport $key\n");
+ $os->close();
+ } else {
# For now just print the signon code to stdout and let the outer pick
it up.
- print($signon_text);
+ my $signon_text = "connect $hostname $lport $key\n";
+ print($signon_text);
+ }
my $netdata;
$netdata->{sel} = IO::Select->new();
@@ -6650,6 +6773,7 @@
my $line_formatted;
my $jobid;
my $full_duplex;
+ my $outerloc;
my %optionhash = (
"config-option|O=s" => \@config_options,
@@ -6659,6 +6783,7 @@
"stats-full" => \$stats,
"verbose|v+" => \$confInner{"verbose"},
"full-duplex" => \$full_duplex,
+ "outer=s" => \$outerloc,
);
my %config_hash;
@@ -6684,7 +6809,7 @@
# If this works then nothing below here is needed as all
# requests can be sent over the socket.
if ($full_duplex) {
- inner_loop_for_comms();
+ inner_loop_for_comms($outerloc);
}
my $mode;
From codesite-noreply at google.com Wed Jun 24 13:13:27 2009
From: codesite-noreply at google.com (codesite-noreply at google.com)
Date: Wed, 24 Jun 2009 12:13:27 +0000
Subject: [padb-devel] [padb commit] r65 - Big cleanup of the full-duplex
code, it's now the
Message-ID: <0016368e1b11e0d136046d170853@google.com>
Author: apittman
Date: Wed Jun 24 01:10:44 2009
New Revision: 65
Modified:
branches/full-duplex/src/padb
Log:
Big cleanup of the full-duplex code, it's now the
default and works for all modes. Started removing the old
code. Still to do:
Re-instate the QsNet stats modes.
Implement full-report to use a single inner instance.
There is still some more code which can now be removed.
Modified: branches/full-duplex/src/padb
==============================================================================
--- branches/full-duplex/src/padb (original)
+++ branches/full-duplex/src/padb Wed Jun 24 01:10:44 2009
@@ -349,9 +349,6 @@
my %allfns;
-my $line_formatted = 0;
-my $countoutput = 0;
-
my %cinner; # Config options to be passed to inner.
my $rem_jobid;
@@ -2884,6 +2881,7 @@
}
}
+# XXX: Now only called when loading things from file.
sub show_results {
my ( $nlines, $mode, $handle ) = @_;
@@ -3045,10 +3043,6 @@
return;
}
- if ( not $line_formatted ) {
- die("input file specified but no formatting selected\n");
- }
-
open( PCMD, "$file" ) or die "$prog: cant open file $file: $!\n";
my @data = ;
close(PCMD);
@@ -3199,6 +3193,10 @@
$req->{jobconfig}{jobid} = $comm_data->{jobid};
$req->{jobconfig}{rmgr} = $conf{rmgr};
+ if ( $#ranks != -1 ) {
+ @{ $req->{ranks} } = @ranks;
+ }
+
if ( $conf{rmgr} eq "orte" ) {
$req->{jobconfig}{"orte-data"} = $open_jobs{
$comm_data->{jobid} };
}
@@ -3322,7 +3320,6 @@
my $jobid = shift;
my $cmd = shift;
my $ncpus = shift;
- my $raw = shift;
my $stats = shift;
my $mode = shift;
my $h = shift;
@@ -3478,127 +3475,7 @@
}
}
- cleanup_pcmd();
-
- exit(0);
-}
-
-sub go_job_once {
- my $jobid = shift;
- my $cmd = shift;
- my $ncpus = shift;
- my $raw = shift;
- my $stats = shift;
- my $mode = shift;
- my $h = shift;
-
- my $errors = 0;
-
- my $report_errors = 1;
-
- $report_errors = 0 if ($full_report);
-
- my $pcmd = {
- pid => -1,
- in => "",
- out => *OUT,
- err => *ERR,
- };
-
- # According to the docs there is potential for deadlock here
- # if the amount of data coming in is enough to fill the buffers
- # We should really use IO::select it's not clear to me how
- # you detect EOF in that case and this works for now.
-
- $pcmd->{pid} = open3( $pcmd->{in}, *OUT, *ERR, $cmd )
- or die "Unable to open3() pcmd: $!\n";
-
- close $pcmd->{in};
-
- {
- my %lines;
- my @data;
-
- if ($raw) {
- my $handle = $pcmd->{out};
- while (<$handle>) {
- my $line = $_;
- print("$line");
- }
- } else {
- if ($stats) {
- local $/ = "\n\n";
- my $handle = $pcmd->{out};
- while (<$handle>) {
- s/\n//g;
- push @data, $_;
- }
- } elsif ($line_formatted) {
- my $handle = $pcmd->{out};
- while (<$handle>) {
- my $line = $_;
- process_line( $line, \%lines );
- }
- post_process_lines( \%lines );
- }
- }
-
- my $handle = $pcmd->{err};
- while (<$handle>) {
- my $line = $_;
- if ($report_errors) {
- print( STDERR "Error ($jobid,$mode): $line" );
- }
- $errors++;
- }
-
- close $pcmd->{in};
- close $pcmd->{out};
- close $pcmd->{err};
-
- waitpid( $pcmd->{pid}, 0 );
- my $res = $?;
-
- printf("result from parallel command was $res\n")
- if ( $conf{"verbose"} );
-
- if ( $res != 0 ) {
- my %status = rc_status($res);
- if ( job_is_running($jobid) ) {
- if ($report_errors) {
- printf(
- "Failed to run parallel command (rc =
$status{rc})\n");
- }
- } else {
- printf("Job $jobid is no longer active\n");
- return 1;
- }
- }
-
- if ($stats) {
- if ( $conf{"stats-raw"} ) {
- local $, = "\n\n";
- print @data;
- print "\n";
- } else {
- my $s = read_stats(@data);
- show_stats($s);
- }
- } elsif ($line_formatted) {
- if ( defined $ncpus ) {
- for ( my $vp = 0 ; $vp < $ncpus ; $vp++ ) {
- push(
- @{ $lines{lines}{$vp} },
- "no output for this process"
- ) if ( not defined $lines{lines}{$vp}[0] );
- }
- }
- show_results( \%lines, $mode, $h );
- }
- }
-
- return $errors;
-
+ return 0;
}
sub find_padb_secret {
@@ -3632,19 +3509,6 @@
sub go_job {
my $jobid = shift;
my $mode = shift;
- my $rops = "";
-
- if ( defined $mode ) {
- $rops .= " --$allfns{$mode}{arg_long}";
-
- if ( defined $allfns{$mode}{secondary} ) {
- foreach my $sec ( @{ $allfns{$mode}{secondary} } ) {
- $rops .= " --$sec->{arg_long}=$sec->{value}";
- }
- }
- }
-
- my $key = job_to_key($jobid);
my $stats;
@@ -3664,10 +3528,6 @@
}
- foreach my $rank (@ranks) {
- $rops .= " --rank=$rank";
- }
-
$conf{"verbose"} && print "Attaching to job $jobid\n";
$rem_jobid = $jobid;
@@ -3687,69 +3547,41 @@
$conf{"verbose"} && defined $ncpus && print "Job has $ncpus cpus\n";
$conf{"verbose"} && defined $hosts && print "Job has $hosts hosts\n";
- # Some versions of perl like to have a space after the O and report
that
- # -ormgr isn't a valid option if it's not there, perhaps this is a bug
- # in GetOptions but for now just work around it.
- foreach my $opt ( keys %cinner ) {
- $rops .= " -O $opt=\"$cinner{$opt}\"";
- }
-
# Maybe do it this way, edb works best when run with the same
LD_LIBRARY_PATH
# as the application. It's very important when running the message queue
# extraction code but less so here. You may find you get linker errors
though
# although they shouldn't be to hard to work around.
- # Another problem, if using slurm then the key isn't valid, you need to
- # convert from jobId to key locally on the node, hence you need to use
- # a padb-helper process
- if ( $stats_total or $group ) {
- $stats = 1;
- if ( defined $key ) {
- $cmd .=
- " $conf{edb} --stats-raw --parallel --key=$key
$conf{edbopt}";
- } else {
- $cmd .=
- " $0 --inner --jobid=$rem_jobid $rops --stats-full
$conf{edbopt}";
- }
- } else {
- $rops .= " --line-formatted" if ( $line_formatted or $#ranks != 0
);
- $cmd .= " $0 --inner --jobid=$rem_jobid" . $rops;
- }
+ # Another problem, if using slurm then the key isn't valid, you need to
+ # convert from jobId to key locally on the node, hence you need to use
+ # a padb-helper process
+ #if ( $stats_total or $group ) {
+ # $stats = 1;
+ # if ( defined $key ) {
+ # $cmd .=
+ # " $conf{edb} --stats-raw --parallel --key=$key
$conf{edbopt}";
+ # } else {
+ # $cmd .=
+ # " $0 --inner --jobid=$rem_jobid $rops --stats-full
$conf{edbopt}";
+ # }
+ #} else {
+ $cmd .= " $0 --inner --full-duplex";
- ( $conf{"verbose"} > 1 or $conf{"showcmd"} ) && print "$cmd\n";
+ #}
- my $raw = ( ( not $stats ) and ( not $line_formatted ) );
+ ( $conf{"verbose"} > 1 or $conf{"showcmd"} ) && print "$cmd\n";
my $h;
if ( defined $allfns{$mode}{pre_out_handler} ) {
$h = $allfns{$mode}{pre_out_handler}($ncpus);
}
- # This makes thing easier...
- #if ($watch) {
- ## while (1) {
- # maybe_clear_screen();
- # my $errors =
- # go_job_once( $jobid, $cmd, $ncpus, $raw, $stats, $mode, $h
);
- # if ( $errors != 0 ) {
- # cleanup_pcmd();
- # return $errors;
- # }
- # sleep( $conf{"interval"} );
- # }
- #}
- my $errors;
- if ( $conf{"full-duplex"} ) {
- if ( not defined $hosts ) {
- printf("Full duplex mode needs to know the host count\n");
- printf("Which is doesn't for this resource manager:
$conf{rmgr}\n");
- return 1;
- }
- $errors = go_parallel( $jobid, "$cmd --full-duplex",
- $ncpus, $raw, $stats, $mode, $h, $hosts );
- } else {
- $errors = go_job_once( $jobid, $cmd, $ncpus, $raw, $stats, $mode,
$h );
+ if ( not defined $hosts ) {
+ printf("Full duplex mode needs to know the host count\n");
+ printf("Which is doesn't for this resource manager:
$conf{rmgr}\n");
+ return 1;
}
+ my $errors = go_parallel( $jobid, $cmd, $ncpus, $stats, $mode, $h,
$hosts );
cleanup_pcmd();
return $errors;
}
@@ -3956,12 +3788,12 @@
# Bit of a cheat here, do two things at once...
# This should probably me modified to work better on
# non Quadrics systems.
- my $res;
- $stats_total = 1;
- $group = 1;
- $res = go_job( $full_report, "full-report" );
- undef $stats_total;
- undef $group;
+ #my $res;
+ #$stats_total = 1;
+ #$group = 1;
+ #$res = go_job( $full_report, "stats" );
+ #undef $stats_total;
+ #undef $group;
# Don't exit on failure here.
#if ( $res != 0 ) {
@@ -3970,8 +3802,6 @@
printf("\n");
- $line_formatted = 1;
-
$compress = 1;
go_job( $full_report, "queue" );
undef $compress;
@@ -3979,7 +3809,6 @@
printf("\n");
$strip_above_wait = 0;
- $countoutput = 1;
$tree = 1;
go_job( $full_report, "stack" );
undef $tree;
@@ -4026,21 +3855,13 @@
);
}
- $line_formatted = ( grep { $_ } ( $compress, $compress_C, $tree ) );
- if ( $line_formatted > 1 ) {
+ my $style_count = ( grep { $_ } ( $compress, $compress_C, $tree ) );
+ if ( $style_count > 1 ) {
cmdline_error(
"$prog: Error: only specify one of --compress, --compress-long or --tree\n"
);
}
- if ( defined $mode && $mode eq "proc-summary" ) {
- $line_formatted = 1;
- }
-
- if ( defined $mode && defined $allfns{$mode}{out_handler} ) {
- $line_formatted = 1;
- }
-
if ( not $input_file
and
( ( grep { $_ } ( $stats_total, $group, $have_allfns_option ) ) !=
1 )
@@ -4066,17 +3887,6 @@
cmdline_error("$prog: Error: --tree only works with
--stack-trace\n");
}
- if ( ( ( grep { $_ } ($stats_total) ) == 1 )
- and $line_formatted )
- {
- cmdline_error(
-"$prog: Error: requested output not compatible with requested formatting\n"
- );
- }
-
- $countoutput = 1
- if ( ( defined $mode and $mode eq "stack" ) or $conf{"verbose"} );
-
if ( defined($input_file) ) {
my $m = "input";
if ( defined $mode ) {
@@ -4583,7 +4393,7 @@
my $offset = 0;
my $str = "";
my @s = gdb_read_raw( $gdb, $strp, 128 );
- return undef if ( $s[0] eq undef );
+ return undef if ( not defined( $s[0] ) );
foreach my $d (@s) {
my $v = hex($d);
return $str if ( $v == 0 );
@@ -4620,7 +4430,7 @@
$stats->{symbol}++;
} elsif ( $cmd eq "data" ) {
my @r = gdb_read_raw( $gdb, $params[0], $params[1] );
- if ( $r[0] ne undef ) {
+ if ( defined( $r[0] ) ) {
$res = "@r";
$stats->{datareads}++;
$stats->{databytes} += $params[1];
@@ -4765,6 +4575,7 @@
sub show_mpi_queue_for_deadlock_all {
my ( $carg, $list ) = @_;
+ my $ret;
my @all;
foreach my $proc ( @{$list} ) {
@@ -4793,7 +4604,9 @@
my $gdb = $proc->{gdb};
my @mq = fetch_mpi_queue_gdb( $vp, $pid, $gdb );
- output_dtype( $vp, \@mq );
+ $ret->{$vp} = \@mq;
+
+ #output_dtype( $vp, \@mq );
}
foreach my $proc (@all) {
@@ -4801,6 +4614,7 @@
gdb_detach($gdb);
gdb_quit($gdb);
}
+ return $ret;
}
sub go_deadlock_detect {
@@ -4845,10 +4659,10 @@
}
}
- #print Dumper \%ad;
my $ret = "";
my $i_count = 0; # Interesting groups.
- foreach my $gid ( sort { $a <=> $b } keys %ad ) {
+ #foreach my $gid ( sort { $a <=> $b } keys %ad ) {
+ foreach my $gid ( sort keys %ad ) {
if ( $#target_groups != -1 ) {
next unless defined $tg[$gid];
@@ -4946,8 +4760,8 @@
# code work with input files, the whole thing is due
# a tidy-up on the full-duplex branch where this should
# be solved properly.
- if ( defined $lines->{raw} ) {
- $data = $lines->{raw};
+ if ( defined $lines->{target_responce} ) {
+ $data = $lines->{target_responce};
} else {
$data = $lines->{lines};
}
@@ -6044,19 +5858,10 @@
debug( $vp, "Looking at $vp, pid: $pid" );
- if ( $confInner{pre}
- or not defined( $allfns{ $confInner{mode} }{handler} ) )
- {
- my %d;
- $d{pid} = $pid;
- $d{vp} = $vp;
- push( @{ $confInner{"all-pids"} }, \%d );
- } else {
- my $res = $allfns{ $confInner{mode} }{handler}( $vp, $pid );
- if ( defined $allfns{ $confInner{mode} }{out_handler} ) {
- output_dtype( $vp, $res );
- }
- }
+ my %d;
+ $d{pid} = $pid;
+ $d{vp} = $vp;
+ push( @{ $confInner{"all-pids"} }, \%d );
}
sub maybe_show_pid {
@@ -6487,8 +6292,17 @@
$cmd->{jobconfig}{"orte-data"};
$confInner{"orte-data"} = $cmd->{jobconfig}{"orte-data"};
}
- $confInner{pre} = 1;
+# Save the rank list if supplied, if there is no list then assume all,
should probably
+# be sent over as part of the request rather than the header ready for
when padb
+# can handle multiple commands over one run.
+# XXX: We also need to handle the case where modes don't accept partial
input, for example
+# deadlock detect where this shouldn't be passed.
+ if ( exists( $cmd->{ranks} ) ) {
+ @ranks = @{ $cmd->{ranks} };
+ }
+
+# Query the resource manager to find the pids, they'll be added to
the "all-pids" field.
$rmgr{ $cmd->{jobconfig}{rmgr} }{find_pids}( $cmd->{jobconfig}{jobid}
);
foreach my $proc ( @{ $confInner{"all-pids"} } ) {
@@ -6500,6 +6314,7 @@
$netdata->{target_responce}{$vp}->{name} = $name;
$netdata->{target_responce}{$vp}->{state} = $state;
}
+
}
# Receive a command (perl reference) from our parent.
@@ -6770,7 +6585,6 @@
# Local vars to help with command line parsing
my @config_options;
- my $line_formatted;
my $jobid;
my $full_duplex;
my $outerloc;
@@ -6778,7 +6592,6 @@
my %optionhash = (
"config-option|O=s" => \@config_options,
"jobid=i" => \$jobid,
- "line-formatted" => \$line_formatted,
"rank=i" => \@ranks,
"stats-full" => \$stats,
"verbose|v+" => \$confInner{"verbose"},
@@ -6806,47 +6619,14 @@
GetOptions(%optionhash) or die("could not parse options\n");
+ $confInner{"myld"} = $ENV{"LD_LIBRARY_PATH"};
+
# If this works then nothing below here is needed as all
# requests can be sent over the socket.
- if ($full_duplex) {
- inner_loop_for_comms($outerloc);
- }
+ inner_loop_for_comms($outerloc);
+ exit(0);
my $mode;
-
- foreach my $arg ( keys %config_hash ) {
- next unless defined $config_hash{$arg};
- $mode = $arg;
- }
-
- $confInner{"mode"} = $mode;
-
- # Put the args in a hash so that they can be referenced by name.
- if ( defined $allfns{$mode}{secondary} ) {
- foreach my $sec ( @{ $allfns{$mode}{secondary} } ) {
- $confInner{"args"}{ $sec->{arg_long} } = $sec->{value};
- }
- }
-
- # Load all config options from the command line, unlike the outer
- # code we don't check them to be valid here, any set on the outer
- # command line are automatically passed on and they might not mean
- # anything to us so silently ignore them.
- foreach my $config_option (@config_options) {
- my @pairs = split( ",", $config_option );
- foreach my $pair (@pairs) {
- my ( $name, $val ) = split( "=", $pair );
- if ( not defined $confInner{$name} ) {
- debug undef, "Unknown option $name";
- }
- $confInner{$name} = $val;
- }
- }
-
- # Load some non user-modifiable data into conf now
- $confInner{"lineformatted"} = $line_formatted;
-
- $confInner{"myld"} = $ENV{"LD_LIBRARY_PATH"};
# $rjobid is used for accessing the stats on slurm
# systems, on rms it's just the jobId but on combined
From daniel.kidger at googlemail.com Wed Jun 24 13:27:17 2009
From: daniel.kidger at googlemail.com (Daniel Kidger)
Date: Wed, 24 Jun 2009 13:27:17 +0100
Subject: [padb-devel] Support for various batch engines
Message-ID: <37e88ea60906240527k7c34b2efu65e9335e4159574f@mail.gmail.com>
Hi
Can you clarify what support padb has for LSF ?
i.e. does the choice of job submission tool affect padb's functionality - or
is it only to do with finding and attaching to the relevant processes?
Daniel
-------------- next part --------------
An HTML attachment was scrubbed...
URL:
From ashley at pittman.co.uk Wed Jun 24 14:42:24 2009
From: ashley at pittman.co.uk (Ashley Pittman)
Date: Wed, 24 Jun 2009 14:42:24 +0100
Subject: [padb-devel] Support for various batch engines
In-Reply-To: <37e88ea60906240527k7c34b2efu65e9335e4159574f@mail.gmail.com>
References: <37e88ea60906240527k7c34b2efu65e9335e4159574f@mail.gmail.com>
Message-ID: <1245850944.3882.4.camel@localhost.localdomain>
On Wed, 2009-06-24 at 13:27 +0100, Daniel Kidger wrote:
> Hi
>
> Can you clarify what support padb has for LSF ?
Very little of use I suspect. padb works on the resource manager lever
rather than the scheduler lever and my understanding is that LSF is more
of a scheduler. Padb does support lsf on rms allowing users to specify
jobs by their lsf id but in this case it translates it a rms id and uses
rms as the resource manager.
I'm not familiar with LSF, does it actually start jobs or task that
process out to another program?
> i.e. does the choice of job submission tool affect padb's
> functionality - or is it only to do with finding and attaching to the
> relevant processes?
It's all to do with finding relevant processes, padb doesn't make great
demands on the resource manager as long as it can start jobs and
translate from rank to pid somehow.
Ashley Pittman,
--
Ashley Pittman, Bath, UK.
Padb - A parallel job inspection tool for cluster computing
http://padb.pittman.org.uk
From codesite-noreply at google.com Sun Jun 28 16:40:26 2009
From: codesite-noreply at google.com (codesite-noreply at google.com)
Date: Sun, 28 Jun 2009 15:40:26 +0000
Subject: [padb-devel] [padb commit] r66 - Remove some old Tru64 code which
isn't need on Linux.
Message-ID: <0016e644b9507c7db6046d6a6494@google.com>
Author: apittman
Date: Sun Jun 28 08:16:41 2009
New Revision: 66
Modified:
branches/full-duplex/src/padb
Log:
Remove some old Tru64 code which isn't need on Linux.
Modified: branches/full-duplex/src/padb
==============================================================================
--- branches/full-duplex/src/padb (original)
+++ branches/full-duplex/src/padb Sun Jun 28 08:16:41 2009
@@ -5881,41 +5881,19 @@
}
}
-my %proc_data;
-
-sub load_all_proc_info {
- my $cmd = "ps -eo pid,ppid,user,comm";
- open( PS, "$cmd|" );
- my @pids = ();
- close(PS);
- foreach my $pid (@pids) {
- my ( $pid, $ppid, $user, $cmd ) = split( " ", $pid );
- next if $pid eq "PID";
- $proc_data{$pid}{PPid} = $ppid;
-
- # $proc{$pid}{user} = $user;
- $proc_data{$pid}{Name} = $cmd;
- }
-}
-
sub find_from_status {
my $pid = shift;
my $key = shift;
- if ( -f "/proc/version" ) {
- open( PCMD, "/proc/$pid/status" ) or return;
- while () {
- my $l = $_;
- if ( $l =~ /$key:\t+(\w+)/ ) {
- close PCMD;
- return $1;
- }
+ open( PCMD, "/proc/$pid/status" ) or return;
+ while () {
+ my $l = $_;
+ if ( $l =~ /$key:\t+(\w+)/ ) {
+ close PCMD;
+ return $1;
}
- close PCMD;
- } else {
- load_all_proc_info() if ( keys(%proc_data) eq 0 );
- return $proc_data{$pid}{$key} if ( defined $proc_data{$pid}{$key}
);
}
+ close PCMD;
return;
}
From codesite-noreply at google.com Sun Jun 28 16:44:35 2009
From: codesite-noreply at google.com (codesite-noreply at google.com)
Date: Sun, 28 Jun 2009 15:44:35 +0000
Subject: [padb-devel] [padb commit] r67 - Now the process map is
communicated via sockets remove the tempory
Message-ID: <0016e644b95054c0d2046d6a73b8@google.com>
Author: apittman
Date: Sun Jun 28 08:33:26 2009
New Revision: 67
Modified:
branches/full-duplex/src/padb
Log:
Now the process map is communicated via sockets remove the tempory
file containing process map when using Orte.
Modified: branches/full-duplex/src/padb
==============================================================================
--- branches/full-duplex/src/padb (original)
+++ branches/full-duplex/src/padb Sun Jun 28 08:33:26 2009
@@ -2358,7 +2358,6 @@
my %open_jobs;
sub open_get_data {
- my ($filename) = @_;
# Simply return if called more than once.
if ( keys(%open_jobs) != 0 ) {
@@ -2367,16 +2366,10 @@
}
my $hostname = hostname();
my $job;
- my @out;
- if ( defined $filename ) {
- open( OPEN, $filename ) or return;
- @out = ;
- close OPEN;
- } else {
- open( OPEN, "ompi-ps|" ) or return;
- @out = ;
- close OPEN;
- }
+
+ open( OPEN, "ompi-ps|" ) or return;
+ my @out = ;
+ close OPEN;
# Handle being called multiple times, zero the hash every
# time we are called. Of course we could just return the
@@ -2433,24 +2426,12 @@
}
my $open_dfile;
-my $open_tfile;
sub open_setup_pcmd {
my $job = shift;
open_get_data();
- my ( $th, $tn ) = tempfile(".padb.XXXX");
-
- open( my $oh, "ompi-ps|" );
- while (<$oh>) {
- print $th $_;
- }
- close $th;
- $cinner{"open-ps"} = $tn;
-
- $open_tfile = $tn;
-
my @hosts = keys %{ $open_jobs{$job}{hosts} };
my $i = @hosts;
@@ -2472,7 +2453,6 @@
sub open_cleanup_pcmd {
unlink($open_dfile) if ( defined($open_dfile) );
- unlink($open_tfile) if ( defined($open_tfile) );
}
###############################################################################
@@ -6097,7 +6077,7 @@
%open_jobs = ();
$open_jobs{$job} = $confInner{"orte-data"};
} else {
- open_get_data( $confInner{"open-ps"} );
+ open_get_data();
}
my $hostname = hostname();
@@ -6554,7 +6534,6 @@
$confInner{"rmgr"} = "auto";
$confInner{"edb"} = find_edb();
$confInner{"minfo"} = find_minfo();
- $confInner{"open-ps"} = "";
$confInner{"hostname"} = hostname();
# The different options this script can perform. One (and only one) of
From codesite-noreply at google.com Sun Jun 28 20:01:27 2009
From: codesite-noreply at google.com (codesite-noreply at google.com)
Date: Sun, 28 Jun 2009 19:01:27 +0000
Subject: [padb-devel] [padb commit] r68 - Clean up the way commands are sent
from the outer to the inner,
Message-ID: <0016e641db1264c578046d6d3365@google.com>
Author: apittman
Date: Sun Jun 28 11:04:08 2009
New Revision: 68
Modified:
branches/full-duplex/src/padb
Log:
Clean up the way commands are sent from the outer to the inner,
generate a list (array) of commands before starting and issue
them one at a time until finished. This greatly simplifies the
command_from_inner() function and is a big step towards getting
full-report working again.
Modified: branches/full-duplex/src/padb
==============================================================================
--- branches/full-duplex/src/padb (original)
+++ branches/full-duplex/src/padb Sun Jun 28 11:04:08 2009
@@ -2751,7 +2751,6 @@
}
sub pre_mpi_watch {
- my ($cpus) = @_;
my $header = <{socket}->print("$str\n");
}
-sub command_from_inner {
- my ( $comm_data, $cdata, $line ) = @_;
+sub first_command {
+ my $comm_data = shift;
- if ( $line eq "Welcome" ) {
+ my $req;
+ $req->{mode} = "signon";
+ $req->{connection_tree} = $comm_data->{connection_tree};
+ $req->{remote} = $comm_data->{remote};
+
+ # Also send over some of the per-run (as opposed to per-mode)
+ # configuration options.
+ # XXX: Need to send over scripts and other stuff here as well.
+ $req->{jobconfig}{jobid} = $comm_data->{jobid};
+ $req->{jobconfig}{rmgr} = $conf{rmgr};
+
+ if ( $#ranks != -1 ) {
+ @{ $req->{ranks} } = @ranks;
+ }
+
+ if ( $conf{rmgr} eq "orte" ) {
+ $req->{jobconfig}{"orte-data"} = $open_jobs{ $comm_data->{jobid} };
+ }
+
+ $req->{cinner} = \%cinner;
+
+ return $req;
+}
+
+my @commands;
+
+sub push_command {
+ my ( $mode, $args ) = @_;
+
+ my %cmd;
+ $cmd{mode} = $mode;
+ $cmd{args} = $args if defined($args);
+ push @commands, \%cmd;
+}
+
+sub next_command {
+ my $comm_data = shift;
+
+ if ( $#commands == -1 ) {
my $req;
- $req->{mode} = "signon";
- $req->{connection_tree} = $comm_data->{connection_tree};
- $req->{remote} = $comm_data->{remote};
-
- # Also send over some of the per-run (as opposed to per-mode)
- # configuration options.
- # XXX: Need to send over scripts and other stuff here as well.
- $req->{jobconfig}{jobid} = $comm_data->{jobid};
- $req->{jobconfig}{rmgr} = $conf{rmgr};
+ $req->{mode} = "exit";
+ return $req;
+ }
- if ( $#ranks != -1 ) {
- @{ $req->{ranks} } = @ranks;
- }
+ my $cmd;
+
+ if ($watch) {
+ $cmd = $commands[0];
+ } else {
+ $cmd = shift(@commands);
+ }
+
+ my $req;
+ $req->{mode} = $cmd->{mode};
- if ( $conf{rmgr} eq "orte" ) {
- $req->{jobconfig}{"orte-data"} = $open_jobs{
$comm_data->{jobid} };
+ if ( defined $cmd->{args} ) {
+ $req->{cargs} = $cmd->{args};
+ }
+
+ # Send along the secondary args.
+ if ( defined $allfns{ $req->{mode} }{secondary} ) {
+ foreach my $sec ( @{ $allfns{ $req->{mode} }{secondary} } ) {
+ $req->{cargs}{ $sec->{arg_long} } = $sec->{value};
}
+ }
+ return $req;
+}
+
+sub command_from_inner {
+ my ( $comm_data, $cdata, $line ) = @_;
+
+ # Initial signon from child.
+ if ( $line eq "Welcome" ) {
+ my $req = first_command($comm_data);
+ $comm_data->{current_req} = $req;
issue_command_to_inner( $cdata, $req );
return;
}
@@ -3193,66 +3246,52 @@
print( Dumper($d) );
}
- if ( ( $comm_data->{state} eq "connecting" )
- or ( $comm_data->{state} eq "live" and $watch ) )
- {
-
- if ( $comm_data->{state} eq "connecting" ) {
-
- #XXX: Check all target_processes are here.
- # print Dumper $d;
- }
-
- # Watch mode, show the output and then loop.
- if ( $comm_data->{state} eq "live" ) {
- maybe_clear_screen();
- if ( defined( $allfns{ $comm_data->{mode} }{out_handler} ) ) {
- $allfns{ $comm_data->{mode} }{out_handler}( undef, $d );
- } else {
- default_output_handler( $comm_data->{mode}, $d );
- }
- sleep( $conf{interval} );
- }
-
+ # The inner process has signed on.
+ if ( $comm_data->{current_req}->{mode} eq "signon" ) {
+ $comm_data->{current_req} = next_command($comm_data);
+ issue_command_to_inner( $cdata, $comm_data->{current_req} );
$comm_data->{state} = "live";
- my $req;
- $req->{mode} = $comm_data->{mode};
-
- # Send along the secondary args.
- if ( defined $allfns{ $comm_data->{mode} }{secondary} ) {
- foreach my $sec ( @{ $allfns{ $comm_data->{mode} }{secondary}
} ) {
- $req->{cargs}{ $sec->{arg_long} } = $sec->{value};
- }
- }
- $req->{cinner} = \%cinner;
- issue_command_to_inner( $cdata, $req );
+ #XXX: Check all target_processes are here.
+ # print Dumper $d;
return;
}
- if ( $comm_data->{state} eq "live" ) {
+ # The inner process is about to exit.
+ if ( $comm_data->{current_req}->{mode} eq "exit" ) {
$comm_data->{state} = "shutdown";
- my $req;
- $req->{mode} = "exit";
+ return;
+ }
+
+ # We have received a reply to a request, send the next
+ # request first and then display this reply. If in
+ # watch mode display the reply, sleep and then send
+ # the next request.
+ my $req = next_command($comm_data);
+ if ( not $watch ) {
issue_command_to_inner( $cdata, $req );
+ }
- if ( defined( $allfns{ $comm_data->{mode} }{out_handler} ) ) {
- $allfns{ $comm_data->{mode} }{out_handler}( undef, $d );
- } else {
- default_output_handler( $comm_data->{mode}, $d );
- }
- return;
+ maybe_clear_screen();
+
+ # Mode here is the mode for the reply we just got, this
+ # may not be the same thing as the request we are currently
+ # sending.
+ my $mode = $comm_data->{current_req}->{mode};
+ if ( defined( $allfns{$mode}{out_handler} ) ) {
+ $allfns{$mode}{out_handler}( undef, $d );
+ } else {
+ default_output_handler( $mode, $d );
}
- if ( $comm_data->{state} eq "shutdown" ) {
+ $comm_data->{current_req} = $req;
- # Nothing to do here.
- return;
+ if ($watch) {
+ sleep( $conf{interval} );
+ issue_command_to_inner( $cdata, $req );
}
- print("Hmm, unknown state! $comm_data->{state}\n");
return;
-
}
sub handle_signon {
@@ -3300,9 +3339,6 @@
my $jobid = shift;
my $cmd = shift;
my $ncpus = shift;
- my $stats = shift;
- my $mode = shift;
- my $h = shift;
my $hosts = shift;
my $comm_data;
@@ -3338,7 +3374,6 @@
close $pcmd->{in};
- $comm_data->{mode} = $mode;
$comm_data->{hosts} = $hosts;
$comm_data->{cmd} = $cmd;
$comm_data->{jobid} = $jobid;
@@ -3490,8 +3525,6 @@
my $jobid = shift;
my $mode = shift;
- my $stats;
-
if ( defined $rmgr{ $conf{rmgr} }{require_inner_callback}
and $rmgr{ $conf{rmgr} }{require_inner_callback} )
{
@@ -3551,17 +3584,12 @@
( $conf{"verbose"} > 1 or $conf{"showcmd"} ) && print "$cmd\n";
- my $h;
- if ( defined $allfns{$mode}{pre_out_handler} ) {
- $h = $allfns{$mode}{pre_out_handler}($ncpus);
- }
-
if ( not defined $hosts ) {
printf("Full duplex mode needs to know the host count\n");
printf("Which is doesn't for this resource manager:
$conf{rmgr}\n");
return 1;
}
- my $errors = go_parallel( $jobid, $cmd, $ncpus, $stats, $mode, $h,
$hosts );
+ my $errors = go_parallel( $jobid, $cmd, $ncpus, $hosts );
cleanup_pcmd();
return $errors;
}
@@ -3780,17 +3808,26 @@
# exit 1;
#}
+ push_command("queue");
+
+ my %c;
+ $c{"strip-above-wait"} = 0;
+ push_command( "stack", \%c );
+ go_job($full_report);
+ exit(0);
+
printf("\n");
$compress = 1;
- go_job( $full_report, "queue" );
+
+ go_job($full_report);
undef $compress;
printf("\n");
$strip_above_wait = 0;
$tree = 1;
- go_job( $full_report, "stack" );
+ go_job($full_report);
undef $tree;
exit 0;
@@ -3915,7 +3952,12 @@
printf "\nCollecting information for job '$jobid'\n\n"
if ( $conf{"verbose"} or ( $#jobids > 0 ) );
- go_job( $jobid, $mode );
+ if ( defined $allfns{$mode}{pre_out_handler} ) {
+ $allfns{$mode}{pre_out_handler}();
+ }
+
+ push_command($mode);
+ go_job($jobid);
}
}
@@ -6287,6 +6329,11 @@
if ( $cmd->{mode} eq "signon" ) {
$netdata->{signon_cmd} = my_encode($cmd);
+ # Setup the environment.
+ foreach my $key ( keys( %{ $cmd->{cinner} } ) ) {
+ $confInner{$key} = $cmd->{cinner}{$key};
+ }
+
if (
not
exists $cmd->{connection_tree}{ $confInner{hostname}
}{children} )
@@ -6332,11 +6379,6 @@
if ( $cmd->{mode} eq "exit" ) {
$netdata->{shutdown} = 1;
return;
- }
-
- # Setup the environment.
- foreach my $key ( keys( %{ $cmd->{cinner} } ) ) {
- $confInner{$key} = $cmd->{cinner}{$key};
}
$confInner{mode} = $cmd->{mode};
From codesite-noreply at google.com Sun Jun 28 20:34:15 2009
From: codesite-noreply at google.com (codesite-noreply at google.com)
Date: Sun, 28 Jun 2009 19:34:15 +0000
Subject: [padb-devel] [padb commit] r69 - Don't prefix output with rank when
only targetting one rank.
Message-ID: <00163698966faf897b046d6da85c@google.com>
Author: apittman
Date: Sun Jun 28 12:01:08 2009
New Revision: 69
Modified:
branches/full-duplex/src/padb
Log:
Don't prefix output with rank when only targetting one rank.
Modified: branches/full-duplex/src/padb
==============================================================================
--- branches/full-duplex/src/padb (original)
+++ branches/full-duplex/src/padb Sun Jun 28 12:01:08 2009
@@ -2990,12 +2990,16 @@
}
}
} else {
+ my $nprocesses = keys( %{ $d->{target_output} } );
foreach my $process ( sort( keys( %{ $d->{target_output} } ) ) ) {
foreach my $line ( @{ $d->{target_output}{$process} } ) {
- print "$process:$line\n";
+ if ( $nprocesses == 1 ) {
+ print "$line\n";
+ } else {
+ print "$process:$line\n";
+ }
}
}
-
}
}
From codesite-noreply at google.com Mon Jun 29 10:51:45 2009
From: codesite-noreply at google.com (codesite-noreply at google.com)
Date: Mon, 29 Jun 2009 09:51:45 +0000
Subject: [padb-devel] [padb commit] r70 - Handle per-mode config options
better, rather than put them into
Message-ID: <001636283b7450535d046d79a3ac@google.com>
Author: apittman
Date: Mon Jun 29 02:45:54 2009
New Revision: 70
Modified:
branches/full-duplex/src/padb
Log:
Handle per-mode config options better, rather than put them into
the global conf hash make them mode dependant. Pass these options
around from the outer to the inner and pass by reference to the
callback functions in the inner process.
Update the config managament code accordingly and change the callback
to use a local $carg rahter than the global %conf
Modified: branches/full-duplex/src/padb
==============================================================================
--- branches/full-duplex/src/padb (original)
+++ branches/full-duplex/src/padb Mon Jun 29 02:45:54 2009
@@ -374,15 +374,10 @@
$conf{"stats-name"} = undef;
$conf{"stats-raw"} = 0;
$conf{"scripts"} = "bash,sh,dash,ash,perl,xterm";
-$conf{"stack-strip-below"} = "main";
$conf{"lsf-job-offset"} = 1;
$conf{"local-fd-name"} = "/dev/null";
-$conf{"stack-strip-above"} =
- "elan_waitWord,elan_pollWord,elan_deviceCheck,opal_condition_wait";
-$conf{"full-duplex"} = 0;
-$conf{"inner-callback"} = 0;
-
-# $conf{stack-format} = undef;
+$conf{"full-duplex"} = 1;
+$conf{"inner-callback"} = 0;
# Tuning options.
$conf{"prun-timeout"} = 120;
@@ -390,8 +385,7 @@
$conf{"rmgr"} = "auto";
# These settings are passed onto inner only.
-$conf{"edbopt"} = "";
-$conf{"mpi-dll"} = "auto";
+$conf{"edbopt"} = "";
$conf{"edb"} = find_edb();
$conf{"minfo"} = find_minfo();
@@ -547,9 +541,6 @@
my $core_name;
my $exe_name;
-my $proc_format =
- "vp=vpid,hostname,pid,vmsize,vmrss,stat.state=S,pcpu=%cpu,name=command";
-
my $input_file;
my $compress;
my $compress_C;
@@ -595,7 +586,6 @@
"strip-above-wait!" => \$strip_above_wait,
"watch!" => \$watch,
"local-stats" => \$local_stats,
- "proc-format=s" => \$proc_format,
"show-jobs" => \$show_jobs,
"norc" => \$norc,
"config-file=s" => \$configfile
@@ -612,8 +602,8 @@
}
if ( defined $allfns{$arg}{options_i} ) {
foreach my $o ( keys( %{ $allfns{$arg}{options_i} } ) ) {
- $conf{$o} = $allfns{$arg}{options_i}{$o};
- $ic_names{$o}++;
+ $conf{mode_options}{$arg}{$o} =
$allfns{$arg}{options_i}{$o};
+ $conf{mode_options_reverse}{$o}{$arg} = 1;
}
}
}
@@ -2699,8 +2689,10 @@
my %above;
my %below;
- map { $above{$_}++ } split( ",", $conf{"stack-strip-above"} );
- map { $below{$_}++ } split( ",", $conf{"stack-strip-below"} );
+ map { $above{$_}++ }
+ split( ",", $conf{mode_options}{stack}{"stack-strip-above"} );
+ map { $below{$_}++ }
+ split( ",", $conf{mode_options}{stack}{"stack-strip-below"} );
foreach my $tag ( keys %$lines ) {
@@ -2779,7 +2771,7 @@
# Idealy we'd know what format we wanted and only ask the nodes
# to report relevent info, for now they still report everything.
sub show_proc_format {
- my ( $handle, $nlines ) = @_;
+ my ( $carg, $nlines ) = @_;
my @proc_format_array;
my %proc_format_header;
@@ -2787,9 +2779,9 @@
my %proc_format_lengths;
- my $separator = $conf{"column-seperator"};
+ my $separator = $carg->{"column-seperator"};
- my @columns = split( ",", $proc_format );
+ my @columns = split( ",", $carg->{"proc-format"} );
foreach my $column (@columns) {
$show_fields = 1 if ( $column eq "fields" );
@@ -2866,7 +2858,7 @@
my $lines = $nlines->{lines};
if ( defined $allfns{$mode}{out_handler} ) {
- $allfns{$mode}{out_handler}( $handle, $nlines );
+ $allfns{$mode}{out_handler}( undef, $nlines );
return;
}
@@ -3188,9 +3180,12 @@
my @commands;
+# Push a command onto the list of commands to be executed.
sub push_command {
my ( $mode, $args ) = @_;
+ # $args = $conf{mode_options}{$mode};
+
my %cmd;
$cmd{mode} = $mode;
$cmd{args} = $args if defined($args);
@@ -3283,7 +3278,7 @@
# sending.
my $mode = $comm_data->{current_req}->{mode};
if ( defined( $allfns{$mode}{out_handler} ) ) {
- $allfns{$mode}{out_handler}( undef, $d );
+ $allfns{$mode}{out_handler}( $conf{mode_options}{$mode}, $d );
} else {
default_output_handler( $mode, $d );
}
@@ -3527,7 +3522,6 @@
sub go_job {
my $jobid = shift;
- my $mode = shift;
if ( defined $rmgr{ $conf{rmgr} }{require_inner_callback}
and $rmgr{ $conf{rmgr} }{require_inner_callback} )
@@ -3582,7 +3576,7 @@
# " $0 --inner --jobid=$rem_jobid $rops --stats-full
$conf{edbopt}";
# }
#} else {
- $cmd .= " $0 --inner --full-duplex";
+ $cmd .= " $0 --inner";
#}
@@ -3618,13 +3612,20 @@
my ( $key, $value ) = @_;
printf("Setting '$key' to '$value'\n") if ( $conf{"verbose"} );
- if ( not exists $conf{$key} ) {
+ if ( !exists $conf{$key} and !exists $conf{mode_options_reverse}{$key}
) {
printf( STDERR
"Warning, unknown config option '$key' value '$value'.\n" );
}
- $conf{$key} = $value;
+ if ( exists $conf{$key} ) {
+ $conf{$key} = $value;
+ } else {
+ foreach my $mode ( keys( %{ $conf{mode_options_reverse}{$key} } )
) {
+ $conf{mode_options}{$mode}{$key} = $value;
+ }
+ }
+ # Mark this variable to be passed onto the inner processes.
if ( defined $ic_names{$key} ) {
$cinner{$key} = $value;
}
@@ -3657,6 +3658,14 @@
config_set( $key, $ENV{"PADB_$name"} );
}
}
+
+ foreach my $key ( keys( %{ $conf{mode_options_reverse} } ) ) {
+ my $name = uc($key);
+ $name =~ s/\-/\_/g;
+ if ( defined $ENV{"PADB_$name"} ) {
+ config_set( $key, $ENV{"PADB_$name"} );
+ }
+ }
}
sub config_help {
@@ -3665,12 +3674,14 @@
my $max_len = 0;
foreach my $key ( keys(%conf) ) {
+ next if ( ref( $conf{$key} ) eq "HASH" );
if ( length $key > $max_len ) {
$max_len = length $key;
}
}
foreach my $key ( sort( keys(%conf) ) ) {
+ next if ( ref( $conf{$key} ) eq "HASH" );
my $name = $key;
$name =~ s/\_/\-/g;
if ( defined $conf{$key} ) {
@@ -3679,6 +3690,16 @@
printf( " %$max_len" . "s = unset\n", $name );
}
}
+
+ foreach my $mode ( sort( keys( %{ $conf{mode_options} } ) ) ) {
+ printf("Options for mode '$mode'\n");
+ foreach my $key ( sort( keys( %{ $conf{mode_options}{$mode} } ) )
) {
+ printf(
+ " %$max_len" . "s = '$conf{mode_options}{$mode}{$key}'\n",
+ $key
+ );
+ }
+ }
}
sub outer_main {
@@ -3724,7 +3745,9 @@
exit(1);
}
- if ( !exists $conf{$name} ) {
+ if ( !exists $conf{$name}
+ and !exists $conf{mode_options_reverse}{$name} )
+ {
printf("Error, unknown config option '$name'\n");
config_help();
exit(1);
@@ -3960,7 +3983,7 @@
$allfns{$mode}{pre_out_handler}();
}
- push_command($mode);
+ push_command( $mode, $conf{mode_options}{$mode} );
go_job($jobid);
}
}
@@ -4534,7 +4557,7 @@
}
sub fetch_mpi_queue {
- my ( $vp, $pid ) = @_;
+ my ( $carg, $vp, $pid ) = @_;
my $g = gdb_start();
kill( "CONT", $pid );
my $p = gdb_attach( $g, $pid );
@@ -4543,8 +4566,8 @@
return;
}
- if ( $confInner{"mpi-dll"} ne "auto" ) {
- $ENV{MPINFO_DLL} = $confInner{"mpi-dll"};
+ if ( $carg->{"mpi-dll"} ne "auto" ) {
+ $ENV{MPINFO_DLL} = $carg->{"mpi-dll"};
} else {
my $base = gdb_var_addr( $g, "MPIR_dll_name" );
if ( !defined $base ) {
@@ -4562,10 +4585,10 @@
# As above but take a gdb handle
sub fetch_mpi_queue_gdb {
- my ( $vp, $pid, $g ) = @_;
+ my ( $carg, $vp, $pid, $g ) = @_;
- if ( $confInner{"mpi-dll"} ne "auto" ) {
- $ENV{MPINFO_DLL} = $confInner{"mpi-dll"};
+ if ( $carg->{"mpi-dll"} ne "auto" ) {
+ $ENV{MPINFO_DLL} = $carg->{"mpi-dll"};
} else {
my $base = gdb_var_addr( $g, "MPIR_dll_name" );
if ( !defined $base ) {
@@ -4580,23 +4603,12 @@
sub show_mpi_queue {
my ( $carg, $vp, $pid ) = @_;
- my @mq = fetch_mpi_queue( $vp, $pid );
+ my @mq = fetch_mpi_queue( $carg, $vp, $pid );
foreach my $o (@mq) {
output( $vp, $o );
}
}
-# Should do something clever here with handler_all so we get a single
-# consistent sample from the individual nodes, the handler_all code
-# doesn't do anything with output_dtype() yet however so give that
-# a miss for the time being.
-sub show_mpi_queue_for_deadlock {
- my ( $vp, $pid ) = @_;
-
- my @mq = fetch_mpi_queue( $vp, $pid );
- return \@mq;
-}
-
# Ideally handle all this at a higher level...
sub show_mpi_queue_for_deadlock_all {
my ( $carg, $list ) = @_;
@@ -4629,7 +4641,7 @@
my $pid = $proc->{pid};
my $gdb = $proc->{gdb};
- my @mq = fetch_mpi_queue_gdb( $vp, $pid, $gdb );
+ my @mq = fetch_mpi_queue_gdb( $carg, $vp, $pid, $gdb );
$ret->{$vp} = \@mq;
#output_dtype( $vp, \@mq );
@@ -5214,9 +5226,9 @@
}
sub show_task_dir {
- my ( $vp, $pid, $dir ) = @_;
+ my ( $carg, $vp, $pid, $dir ) = @_;
- if ( $confInner{"proc-shows-proc"} ) {
+ if ( $carg->{"proc-shows-proc"} ) {
my $exe = readlink "$dir/exe";
if ( defined $exe ) {
proc_output( $vp, "exe", $exe );
@@ -5225,9 +5237,7 @@
show_task_file( $vp, "$dir/status" );
show_task_file( $vp, "$dir/wchan", "wchan" );
show_task_file( $vp, "$dir/stat", "stat" );
- if ( $confInner{"proc-shows-stat"}
- or $confInner{mode} eq "proc-summary" )
- {
+ if ( $carg->{"proc-shows-stat"} ) {
show_task_stat_file( $vp, "$dir/stat" );
}
@@ -5258,7 +5268,7 @@
}
}
}
- if ( $confInner{"proc-shows-fds"} ) {
+ if ( $carg->{"proc-shows-fds"} ) {
opendir( FDS, "$dir/fd" );
my @fds = readdir(FDS);
closedir(FDS);
@@ -5274,7 +5284,7 @@
# New fdinfo data, it's verbose so only enable it
# if requested by -O proc-shows-fds=full
- if ( $confInner{"proc-shows-fds"} eq "full" ) {
+ if ( $carg->{"proc-shows-fds"} eq "full" ) {
if ( -f "$dir/fdinfo/$fd" ) {
open( FDI, "$dir/fdinfo/$fd" );
my @fdi = ();
@@ -5298,7 +5308,7 @@
}
}
}
- if ( $confInner{"proc-shows-maps"} ) {
+ if ( $carg->{"proc-shows-maps"} ) {
show_task_file( $vp, "$dir/maps", "maps" );
}
}
@@ -5353,82 +5363,85 @@
sub show_proc_all {
my ( $carg, $list ) = @_;
- if ( $confInner{mode} eq "proc-summary" ) {
- $proc_info = undef;
- }
+ $proc_info = undef;
my @all;
- foreach my $proc ( @{$list} ) {
- my $pid = $proc->{pid};
- open( $proc->{handle}, "/proc/$pid/stat" );
- }
+ my $jiffies_start;
+ if ( $carg->{"proc-shows-proc"} ) {
+ foreach my $proc ( @{$list} ) {
+ my $pid = $proc->{pid};
+ open( $proc->{handle}, "/proc/$pid/stat" );
+ }
- open( SFD, "/proc/stat\n" );
+ open( SFD, "/proc/stat\n" );
- # Begin critical path.
- my $stat = ;
+ # Begin critical path.
+ my $stat = ;
- foreach my $proc ( @{$list} ) {
- my $pid = $proc->{pid};
- my $h = $proc->{handle};
- $proc->{stat_start} = <$h>;
- seek( $proc->{handle}, 0, 0 );
- }
+ foreach my $proc ( @{$list} ) {
+ my $pid = $proc->{pid};
+ my $h = $proc->{handle};
+ $proc->{stat_start} = <$h>;
+ seek( $proc->{handle}, 0, 0 );
+ }
- seek( SFD, 0, 0 );
- my $stat2 = ;
+ seek( SFD, 0, 0 );
+ my $stat2 = ;
- # End critical path.
+ # End critical path.
- my $jiffies_start = add_and_divide_jiffies( $stat, $stat2 );
+ $jiffies_start = add_and_divide_jiffies( $stat, $stat2 );
+ }
foreach my $proc ( @{$list} ) {
my $vp = $proc->{vp};
my $pid = $proc->{pid};
- show_proc( $vp, $pid );
+ show_proc( $carg, $vp, $pid );
}
- sleep(1);
+ if ( $carg->{"proc-shows-proc"} ) {
+ sleep(1);
- seek( SFD, 0, 0 );
+ seek( SFD, 0, 0 );
- # Begin critical path.
- $stat = ;
+ # Begin critical path.
+ my $stat = ;
- foreach my $proc ( @{$list} ) {
- my $pid = $proc->{pid};
- my $h = $proc->{handle};
- $proc->{stat_end} = <$h>;
- close( $proc->{handle} );
- }
+ foreach my $proc ( @{$list} ) {
+ my $pid = $proc->{pid};
+ my $h = $proc->{handle};
+ $proc->{stat_end} = <$h>;
+ close( $proc->{handle} );
+ }
- seek( SFD, 0, 0 );
- $stat2 = ;
+ seek( SFD, 0, 0 );
+ my $stat2 = ;
- # End critical path.
+ # End critical path.
- my $cpucount = 0;
- while () {
- if ( $_ =~ /^cpu\d/ ) {
- $cpucount++;
+ my $cpucount = 0;
+ while () {
+ if ( $_ =~ /^cpu\d/ ) {
+ $cpucount++;
+ }
}
- }
- close(SFD);
+ close(SFD);
- my $jiffies_end = add_and_divide_jiffies( $stat, $stat2 );
+ my $jiffies_end = add_and_divide_jiffies( $stat, $stat2 );
- my $elapsed = $jiffies_end - $jiffies_start;
+ my $elapsed = $jiffies_end - $jiffies_start;
- foreach my $proc ( @{$list} ) {
- my $vp = $proc->{vp};
- my $jpre = stat_to_jiffies( $proc->{stat_start} );
- my $jpost = stat_to_jiffies( $proc->{stat_end} );
- my $jused = $jpost - $jpre;
- my $used = ( $jused / $elapsed ) * $cpucount * 100;
- my $used_str = sprintf( "%d", $used );
+ foreach my $proc ( @{$list} ) {
+ my $vp = $proc->{vp};
+ my $jpre = stat_to_jiffies( $proc->{stat_start} );
+ my $jpost = stat_to_jiffies( $proc->{stat_end} );
+ my $jused = $jpost - $jpre;
+ my $used = ( $jused / $elapsed ) * $cpucount * 100;
+ my $used_str = sprintf( "%d", $used );
- proc_output( $vp, "pcpu", $used_str );
+ proc_output( $vp, "pcpu", $used_str );
+ }
}
if ( $confInner{mode} eq "proc-summary" ) {
@@ -5437,13 +5450,13 @@
}
sub show_proc {
- my ( $vp, $pid ) = @_;
+ my ( $carg, $vp, $pid ) = @_;
- if ( $confInner{"proc-shows-proc"} ) {
+ if ( $carg->{"proc-shows-proc"} ) {
proc_output( $vp, "hostname", $confInner{hostname} );
}
- if ( -d "/proc/$pid/task" and $confInner{"proc-shows-proc"} ) {
+ if ( -d "/proc/$pid/task" and $carg->{"proc-shows-proc"} ) {
# 2.6 kernel. (ntpl)
opendir( DIR, "/proc/$pid/task" );
@@ -5452,10 +5465,10 @@
foreach my $task (@tasks) {
next if ( $task eq "." );
next if ( $task eq ".." );
- show_task_dir( $vp, $pid, "/proc/$pid/task/$task" );
+ show_task_dir( $carg, $vp, $pid, "/proc/$pid/task/$task" );
}
} else {
- show_task_dir( $vp, $pid, "/proc/$pid" );
+ show_task_dir( $carg, $vp, $pid, "/proc/$pid" );
}
}
@@ -5530,7 +5543,9 @@
$ok = 0;
if ( defined $gdb ) {
- if ( $confInner{"stack-shows-params"} ) {
+ if ( $carg->{"stack-shows-params"}
+ or $carg->{"stack-shows-locals"} )
+ {
@threads = gdb_dump_frames_per_thread( $gdb, 1 );
} else {
@threads = gdb_dump_frames_per_thread($gdb);
@@ -5551,7 +5566,7 @@
}
$tries++;
} while ( ( $ok != 1 )
- and ( $tries < $confInner{"gdb-retry-count"} ) );
+ and ( $tries < $carg->{"gdb-retry-count"} ) );
if ( not defined $threads[0]{id} ) {
output( $vp, "Could not extract stack trace from application"
);
@@ -5578,7 +5593,7 @@
next unless exists $$frame{level};
next unless exists $$frame{func};
- if ( $confInner{"stack-shows-params"} ) {
+ if ( $carg->{"stack-shows-params"} ) {
my @a;
foreach my $arg ( @{ $frame->{params} } ) {
if ( defined $frame->{vals}{$arg} ) {
@@ -5591,16 +5606,6 @@
my $file = $frame->{file} || "?";
my $line = $frame->{line} || "?";
output( $vp, "$frame->{func}($a) at $file:$line" );
-
- if ( $confInner{"stack-shows-locals"} ) {
- foreach my $arg ( @{ $frame->{locals} } ) {
- if ( defined $frame->{vals}{$arg} ) {
- output( $vp, " $arg =
$frame->{vals}{$arg}" );
- } else {
- output( $vp, " $arg = ??" );
- }
- }
- }
} else {
output( $vp,
( $$frame{func} || "?" )
@@ -5608,6 +5613,16 @@
. ( $$frame{file} || "?" ) . ":"
. ( $$frame{line} || "?" ) );
}
+ if ( $carg->{"stack-shows-locals"} ) {
+ foreach my $arg ( @{ $frame->{locals} } ) {
+ if ( defined $frame->{vals}{$arg} ) {
+ output( $vp, " $arg = $frame->{vals}{$arg}" );
+ } else {
+ output( $vp, " $arg = ??" );
+ }
+ }
+ }
+
}
}
}
@@ -5732,14 +5747,14 @@
# Load a file for use in MPI_Watch.
sub mpi_watch_load {
- my $file = shift;
+ my ($carg) = @_;
# File is a csv file,
# Name,c,function1,function2
- if ( defined $confInner{"mpi-watch-file"} ) {
+ if ( defined $carg->{"mpi-watch-file"} ) {
my %fns;
- my $f = $confInner{"mpi-watch-file"};
+ my $f = $carg->{"mpi-watch-file"};
open( MW, $f ) or return;
my @d = ();
close(MW);
@@ -5780,9 +5795,9 @@
# * - error.
sub mpi_watch {
- my ( $vp, $pid ) = @_;
+ my ( $carg, $vp, $pid ) = @_;
- my @mq = fetch_mpi_queue( $vp, $pid );
+ my @mq = fetch_mpi_queue( $carg, $vp, $pid );
my $sm = 0;
my $rm = 0;
my $um = 0;
@@ -5790,7 +5805,7 @@
my %res;
- my $fns = mpi_watch_load();
+ my $fns = mpi_watch_load($carg);
my $fnmode;
my $fnreal;
@@ -5874,7 +5889,7 @@
foreach my $proc ( @{$list} ) {
my $vp = $proc->{vp};
my $pid = $proc->{pid};
- $res{$vp} = mpi_watch( $vp, $pid );
+ $res{$vp} = mpi_watch( $carg, $vp, $pid );
}
return \%res;
}
@@ -6292,8 +6307,6 @@
# data in $netdata->{target_responce} and $netdata->??
if ( $cmd->{jobconfig}{rmgr} eq "orte" ) {
- $confInner{"orte-data"}{ $cmd->{jobconfig}{jobid} } =
- $cmd->{jobconfig}{"orte-data"};
$confInner{"orte-data"} = $cmd->{jobconfig}{"orte-data"};
}
@@ -6314,7 +6327,6 @@
my $vp = $proc->{vp};
my $name = readlink("/proc/$pid/exe");
my $state = find_from_status( $pid, "State" );
- $netdata->{target_responce}{$vp}->{pid} = $pid;
$netdata->{target_responce}{$vp}->{name} = $name;
$netdata->{target_responce}{$vp}->{state} = $state;
}
@@ -6582,41 +6594,10 @@
$confInner{"minfo"} = find_minfo();
$confInner{"hostname"} = hostname();
- # The different options this script can perform. One (and only one) of
- # these must be set.
- my $stats;
-
# Local vars to help with command line parsing
- my @config_options;
- my $jobid;
- my $full_duplex;
my $outerloc;
- my %optionhash = (
- "config-option|O=s" => \@config_options,
- "jobid=i" => \$jobid,
- "rank=i" => \@ranks,
- "stats-full" => \$stats,
- "verbose|v+" => \$confInner{"verbose"},
- "full-duplex" => \$full_duplex,
- "outer=s" => \$outerloc,
- );
-
- my %config_hash;
-
- foreach my $arg ( keys %allfns ) {
- $optionhash{ $allfns{$arg}{arg} } = \$config_hash{$arg};
- foreach my $sec ( @{ $allfns{$arg}{secondary} } ) {
- $sec->{value} = $sec->{default};
- $optionhash{ $sec->{arg} } = \$sec->{value};
- }
-
- if ( defined $allfns{$arg}{options_i} ) {
- foreach my $o ( keys( %{ $allfns{$arg}{options_i} } ) ) {
- $confInner{$o} = $allfns{$arg}{options_i}{$o};
- }
- }
- }
+ my %optionhash = ( "outer=s" => \$outerloc, );
Getopt::Long::Configure("bundling");
@@ -6629,53 +6610,6 @@
inner_loop_for_comms($outerloc);
exit(0);
- my $mode;
-
- # $rjobid is used for accessing the stats on slurm
- # systems, on rms it's just the jobId but on combined
- # slurm/rms systems it's modifed to be the rms id
- # and the jobid is left as the slurm job id.
- my $rjobid = $jobid;
- if ( exists $ENV{"SLURM_PROCID"} ) {
- $rjobid = get_rms_jobid($jobid);
- }
-
- if ( defined $rjobid ) {
- $confInner{"key"} = ( $rjobid << 9 ) - 1;
- }
-
- if ($stats) {
-
- # Takes a RMS job id.
- inner_show_stats($rjobid);
- exit(0);
- }
-
- # Handle resource managers better, simply call a callback
- # as the outer does.
- # As usual there is a special case, on Slurm systems
- # running QsNet you can have the RMS kernel module loaded
- # and these need to be handled differently so deal with
- # them first and then go to the standard callback.
-
- if ( ( $confInner{rmgr} eq "slurm" ) and ( -d "/proc/rms" ) ) {
-
- # Takes a RMS job id.
- rms_find_pids($rjobid);
- } else {
- if ( not defined $rmgr{ $confInner{rmgr} }{find_pids} ) {
- printf("Error, rmgr $confInner{rmgr} has no find_pids
callback\n");
- exit(1);
- }
- $rmgr{ $confInner{rmgr} }{find_pids}($jobid);
- }
-
- if ( defined $allfns{$mode}{handler_all} ) {
-
- $allfns{$mode}{handler_all}( $confInner{"all-pids"} );
- }
-
- exit(0);
}
###############################################################################
@@ -6735,6 +6669,8 @@
'arg_short' => "q",
'handler' => \&show_queue,
'help' => "Show the message queues",
+ 'options_i' => { "mpi-dll" => "auto", }
+
};
$allfns{kill} = {
@@ -6754,7 +6690,8 @@
'handler' => \&show_mpi_queue,
'arg_long' => 'mpi-queue',
'arg_short' => 'Q',
- 'help' => "Show MPI message queues"
+ 'help' => "Show MPI message queues",
+ 'options_i' => { "mpi-dll" => "auto", }
};
$allfns{deadlock} = {
@@ -6763,6 +6700,8 @@
'arg_short' => 'j',
'help' => "Run deadlock detection algorithm",
'out_handler' => \&deadlock_detect,
+ 'options_i' => { "mpi-dll" => "auto", }
+
};
$allfns{pinfo} = {
@@ -6783,7 +6722,21 @@
'out_handler' => \&show_proc_format,
'arg_long' => 'proc-summary',
'help' => "Show process information in top format",
- 'options_i' => { "column-seperator" => " ", }
+ 'options_i' => {
+ "column-seperator" => " ",
+ "proc-shows-proc" => 1,
+ "proc-shows-fds" => 0,
+ "proc-shows-maps" => 0,
+ "proc-shows-stat" => 1
+ },
+ 'secondary' => [
+ {
+ 'arg_long' => 'proc-format',
+ 'type' => 's',
+ 'default' =>
+'vp=vpid,hostname,pid,vmsize,vmrss,stat.state=S,pcpu=%cpu,name=command'
+ }
+ ]
};
@@ -6795,7 +6748,10 @@
'options_i' => {
"stack-shows-params" => 0,
"stack-shows-locals" => 0,
- "gdb-retry-count" => 3
+ "gdb-retry-count" => 3,
+ "stack-strip-above" =>
+"elan_waitWord,elan_pollWord,elan_deviceCheck,opal_condition_wait",
+ "stack-strip-below" => "main",
}
};
From codesite-noreply at google.com Mon Jun 29 12:36:35 2009
From: codesite-noreply at google.com (codesite-noreply at google.com)
Date: Mon, 29 Jun 2009 11:36:35 +0000
Subject: [padb-devel] [padb commit] r73 - Make the deadlock code use proper
mode specific options
Message-ID: <0016364ed0d0437f29046d7b1ac6@google.com>
Author: apittman
Date: Mon Jun 29 04:20:06 2009
New Revision: 73
Modified:
branches/full-duplex/src/padb
Log:
Make the deadlock code use proper mode specific options
rather than using the general conf hash. Update the
output slightly to show group members even if no
collectives are in operation.
Modified: branches/full-duplex/src/padb
==============================================================================
--- branches/full-duplex/src/padb (original)
+++ branches/full-duplex/src/padb Mon Jun 29 04:20:06 2009
@@ -364,9 +364,7 @@
$conf{"stats-sort-key"} = "vp";
$conf{"stats-reverse"} = 0;
$conf{"stats-short"} = 0;
-$conf{"show-group-members"} = 0;
$conf{"show-all-stats"} = 0;
-$conf{"show-all-groups"} = 0;
$conf{"interval"} = 10;
$conf{"watch-clears-screen"} = 1;
$conf{"stats-name"} = undef;
@@ -377,6 +375,12 @@
$conf{"full-duplex"} = 1;
$conf{"inner-callback"} = 0;
+# These two are used by deadlock and QsNet group
+# code, they need migrating in the group code
+# when I have access to a test system again.
+#$conf{"show-group-members"} = 0;
+#$conf{"show-all-groups"} = 0;
+
# Tuning options.
$conf{"prun-timeout"} = 120;
$conf{"prun-exittimeout"} = 120;
@@ -4569,7 +4573,7 @@
return;
}
- if ( $carg->{"mpi-dll"} ne "auto" ) {
+ if ( defined $carg->{"mpi-dll"} ) {
$ENV{MPINFO_DLL} = $carg->{"mpi-dll"};
} else {
my $base = gdb_var_addr( $g, "MPIR_dll_name" );
@@ -4590,7 +4594,7 @@
sub fetch_mpi_queue_gdb {
my ( $carg, $vp, $pid, $g ) = @_;
- if ( $carg->{"mpi-dll"} ne "auto" ) {
+ if ( defined $carg->{"mpi-dll"} ) {
$ENV{MPINFO_DLL} = $carg->{"mpi-dll"};
} else {
my $base = gdb_var_addr( $g, "MPIR_dll_name" );
@@ -4659,9 +4663,8 @@
}
sub go_deadlock_detect {
- my ($cd) = @_;
+ my ( $carg, $cd ) = @_;
- # print Dumper $cd;
my %ad;
my @tg;
@@ -4678,11 +4681,18 @@
my $gd = $rd->{$g};
my $gid = $gd->{id};
+ if ( $gd->{size} == 1 ) {
+ $gid = "$gd->{id}($process)";
+ }
+ if ( defined $gd->{ranks}{0} ) {
+ $gid = "$gd->{id}($gd->{ranks}{0})";
+ }
+
if ( $#target_groups != -1 ) {
next unless defined $tg[$gid];
}
- if ( $gd->{size} > 1 ) {
+ if ( $gd->{size} > 0 ) {
$ad{$gid}{map}[ $gd->{rank} ] = $process;
}
$ad{$gid}{size} = $gd->{size};
@@ -4703,6 +4713,7 @@
my $ret = "";
my $i_count = 0; # Interesting groups.
#foreach my $gid ( sort { $a <=> $b } keys %ad ) {
+
foreach my $gid ( sort keys %ad ) {
if ( $#target_groups != -1 ) {
@@ -4713,12 +4724,12 @@
# Maybe show the group members, hope that the user doesn't turn
# this on unless also setting target_groups!
- if ( $conf{"show-group-members"} ) {
+ if ( $carg->{"show-group-members"} ) {
$gstr .= "group has $ad{$gid}{size} members\n";
- if ( defined $ad{$gid}{size} and $gid != 1 ) {
+ if ( defined $ad{$gid}{size} ) {
for ( my $ident = 0 ; $ident < $ad{$gid}{size} ; $ident++
) {
$gstr .=
- "group member[$ident] =>
vp[$ad{$gid}{map}[$ident]]\n";
+ "group member[$ident] =>
grank[$ad{$gid}{map}[$ident]]\n";
}
}
}
@@ -4756,15 +4767,14 @@
}
}
} else {
- next unless ( $conf{"show-all-groups"} );
+ next unless ( $carg->{"show-all-groups"} );
+ $ret .= $gstr;
+ $gstr = "";
}
{
my @inactive;
foreach my $ident ( sort keys %{ $ad{$gid}{'idents'} } ) {
-
- # if ( $ad{$gid}{'idents'}{$ident}{'statistics'}
- # and not defined
$ad{$gid}{'idents'}{$ident}{'active'} )
if ( not defined $ad{$gid}{'idents'}{$ident}{'active'} ) {
push( @inactive, $ident );
}
@@ -4794,7 +4804,7 @@
}
sub deadlock_detect {
- my ( $handle, $lines ) = @_;
+ my ( $carg, $lines ) = @_;
my $data;
# XXX This is a bit of a hack to make the deadlock
@@ -4836,7 +4846,7 @@
# print Dumper \%coll_data;
- my $r = go_deadlock_detect \%coll_data;
+ my $r = go_deadlock_detect( $carg, \%coll_data );
print $r;
}
@@ -6671,7 +6681,7 @@
'arg_short' => "q",
'handler' => \&show_queue,
'help' => "Show the message queues",
- 'options_i' => { "mpi-dll" => "auto", }
+ 'options_i' => { "mpi-dll" => undef, }
};
@@ -6693,7 +6703,7 @@
'arg_long' => 'mpi-queue',
'arg_short' => 'Q',
'help' => "Show MPI message queues",
- 'options_i' => { "mpi-dll" => "auto", }
+ 'options_i' => { "mpi-dll" => undef, }
};
$allfns{deadlock} = {
@@ -6702,7 +6712,11 @@
'arg_short' => 'j',
'help' => "Run deadlock detection algorithm",
'out_handler' => \&deadlock_detect,
- 'options_i' => { "mpi-dll" => "auto", }
+ 'options_i' => {
+ "mpi-dll" => undef,
+ "show-group-members" => 0,
+ "show-all-groups" => 0,
+ }
};
@@ -6773,7 +6787,7 @@
'pre_out_handler' => \&pre_mpi_watch,
'out_handler' => \&show_mpi_watch,
'options_i' => {
- "mpi-dll" => "auto",
+ "mpi-dll" => undef,
"mpi-watch-file" => undef
}
};
From codesite-noreply at google.com Mon Jun 29 14:18:50 2009
From: codesite-noreply at google.com (codesite-noreply at google.com)
Date: Mon, 29 Jun 2009 13:18:50 +0000
Subject: [padb-devel] [padb commit] r71 - Move two more config options from
the global namespace into
Message-ID: <0016368e2056e850ff046d7c8757@google.com>
Author: apittman
Date: Mon Jun 29 03:15:39 2009
New Revision: 71
Modified:
branches/full-duplex/src/padb
Log:
Move two more config options from the global namespace into
the proc-summary mode namespace.
Modified: branches/full-duplex/src/padb
==============================================================================
--- branches/full-duplex/src/padb (original)
+++ branches/full-duplex/src/padb Mon Jun 29 03:15:39 2009
@@ -362,8 +362,6 @@
# Output options.
$conf{"stats-sort-key"} = "vp";
-$conf{"proc-sort-key"} = "vp";
-$conf{"proc-show-header"} = 1;
$conf{"stats-reverse"} = 0;
$conf{"stats-short"} = 0;
$conf{"show-group-members"} = 0;
@@ -2823,9 +2821,9 @@
push @all, \%hash;
}
- @all = sort_proc_hashes( $conf{"proc-sort-key"}, @all );
+ @all = sort_proc_hashes( $carg->{"proc-sort-key"}, @all );
- if ( $conf{"proc-show-header"} ) {
+ if ( $carg->{"proc-show-header"} ) {
my @res;
foreach my $key (@proc_format_array) {
my $l .= sprintf( "%-$proc_format_lengths{$key}s",
@@ -3694,10 +3692,15 @@
foreach my $mode ( sort( keys( %{ $conf{mode_options} } ) ) ) {
printf("Options for mode '$mode'\n");
foreach my $key ( sort( keys( %{ $conf{mode_options}{$mode} } ) )
) {
- printf(
- " %$max_len" . "s = '$conf{mode_options}{$mode}{$key}'\n",
- $key
- );
+ if ( defined $conf{mode_options}{$mode}{$key} ) {
+ printf(
+ " %$max_len" . "s
= '$conf{mode_options}{$mode}{$key}'\n",
+ $key
+ );
+ } else {
+ printf( " %$max_len" . "s = undef\n", $key );
+
+ }
}
}
}
@@ -5729,8 +5732,7 @@
sub set_debug {
my ( $carg, $vp, $pid ) = @_;
run_command( $vp,
-"edb --key $confInner{key} --debug=$confInner{args}{dflag} --target-vp=$vp"
- );
+ "edb --key $confInner{key} --debug=$carg->{dflag} --target-vp=$vp"
);
}
my $mpi_watch_data = < 1,
"proc-shows-fds" => 0,
"proc-shows-maps" => 0,
- "proc-shows-stat" => 1
+ "proc-shows-stat" => 1,
+ "proc-sort-key" => "vp",
+ "proc-show-header" => 1,
},
'secondary' => [
{
From codesite-noreply at google.com Mon Jun 29 14:24:01 2009
From: codesite-noreply at google.com (codesite-noreply at google.com)
Date: Mon, 29 Jun 2009 13:24:01 +0000
Subject: [padb-devel] [padb commit] r72 - Fix a perl warning about reading
unitialied variable.
Message-ID: <0016364ed6507b612a046d7c9ac9@google.com>
Author: apittman
Date: Mon Jun 29 03:31:26 2009
New Revision: 72
Modified:
branches/full-duplex/src/padb
Log:
Fix a perl warning about reading unitialied variable.
Modified: branches/full-duplex/src/padb
==============================================================================
--- branches/full-duplex/src/padb (original)
+++ branches/full-duplex/src/padb Mon Jun 29 03:31:26 2009
@@ -6808,7 +6808,7 @@
common_main();
-if ( $ARGV[0] eq "--inner" ) {
+if ( $#ARGV >= 0 and $ARGV[0] eq "--inner" ) {
shift @ARGV;
inner_main();
} else {
From codesite-noreply at google.com Mon Jun 29 14:28:14 2009
From: codesite-noreply at google.com (codesite-noreply at google.com)
Date: Mon, 29 Jun 2009 13:28:14 +0000
Subject: [padb-devel] [padb commit] r74 - Do what I can with QsNet stats
modes, write skeleton
Message-ID: <0016e643486a8cee7a046d7ca958@google.com>
Author: apittman
Date: Mon Jun 29 04:59:58 2009
New Revision: 74
Modified:
branches/full-duplex/src/padb
Log:
Do what I can with QsNet stats modes, write skeleton
allfns options for them and more the config options from
the global conf to the local ones. I need access to a QsNet
system to resurect this code however.
Modified: branches/full-duplex/src/padb
==============================================================================
--- branches/full-duplex/src/padb (original)
+++ branches/full-duplex/src/padb Mon Jun 29 04:59:58 2009
@@ -361,14 +361,8 @@
$conf{slurm_job_step} = "0";
# Output options.
-$conf{"stats-sort-key"} = "vp";
-$conf{"stats-reverse"} = 0;
-$conf{"stats-short"} = 0;
-$conf{"show-all-stats"} = 0;
$conf{"interval"} = 10;
$conf{"watch-clears-screen"} = 1;
-$conf{"stats-name"} = undef;
-$conf{"stats-raw"} = 0;
$conf{"scripts"} = "bash,sh,dash,ash,perl,xterm";
$conf{"lsf-job-offset"} = 1;
$conf{"local-fd-name"} = "/dev/null";
@@ -530,9 +524,6 @@
# Number of functions provided on the command line from the allfns hash.
my $have_allfns_option = 0;
-my $stats_total;
-my $group;
-
my $full_report;
my $core_stack;
my $list_rmgrs;
@@ -564,33 +555,31 @@
my $mode;
my %optionhash = (
- "verbose|v+" => \$conf{verbose},
- "user|u=s" => \$user,
- "rank|r=i" => \@ranks,
- "group-id=i" => \@target_groups,
- "help|h" => \&usage,
- "all|a" => \$all,
- "any|A" => \$any,
- "statistics-total|stat|sta|st|s" => \$stats_total,
- "version|V" => \&show_version,
- "compress|c" => \$compress,
- "compress-long|C" => \$compress_C,
- "group|g" => \$group,
- "tree|t" => \$tree,
- "input-file|file|i=s" => \$input_file,
- "config-option|O=s" => \@config_options,
- "full-report=s" => \$full_report,
- "core-stack" => \$core_stack,
- "core=s" => \$core_name,
- "exe=s" => \$exe_name,
- "list-rmgrs" => \$list_rmgrs,
- "strip-below-main!" => \$strip_below_main,
- "strip-above-wait!" => \$strip_above_wait,
- "watch!" => \$watch,
- "local-stats" => \$local_stats,
- "show-jobs" => \$show_jobs,
- "norc" => \$norc,
- "config-file=s" => \$configfile
+ "verbose|v+" => \$conf{verbose},
+ "user|u=s" => \$user,
+ "rank|r=i" => \@ranks,
+ "group-id=i" => \@target_groups,
+ "help|h" => \&usage,
+ "all|a" => \$all,
+ "any|A" => \$any,
+ "version|V" => \&show_version,
+ "compress|c" => \$compress,
+ "compress-long|C" => \$compress_C,
+ "tree|t" => \$tree,
+ "input-file|file|i=s" => \$input_file,
+ "config-option|O=s" => \@config_options,
+ "full-report=s" => \$full_report,
+ "core-stack" => \$core_stack,
+ "core=s" => \$core_name,
+ "exe=s" => \$exe_name,
+ "list-rmgrs" => \$list_rmgrs,
+ "strip-below-main!" => \$strip_below_main,
+ "strip-above-wait!" => \$strip_above_wait,
+ "watch!" => \$watch,
+ "local-stats" => \$local_stats,
+ "show-jobs" => \$show_jobs,
+ "norc" => \$norc,
+ "config-file=s" => \$configfile
);
my %config_hash;
@@ -1414,6 +1403,9 @@
return;
}
+ my $stats_total = 0;
+ my $group = 0;
+
if ($stats_total) {
if ( $conf{"stats-short"} ) {
@@ -1730,7 +1722,7 @@
my $s = read_stats(@data);
- $stats_total = 1;
+ # $stats_total = 1;
show_stats($s);
}
@@ -3001,21 +2993,18 @@
my $file = shift;
my $mode = shift;
- if ( $stats_total or $group ) {
- my @data;
-
- open( PCMD, "$file" ) or die "$prog: cant open file $file: $!\n";
- local $/ = "\n\n";
- while () {
- s/\n//g;
- push @data, $_;
- }
- my $s = read_stats(@data);
-
- show_stats($s);
-
- return;
- }
+ #if ( $stats_total or $group ) {
+ # my @data;
+ # open( PCMD, "$file" ) or die "$prog: cant open file $file: $!\n";
+ # local $/ = "\n\n";
+ # while () {
+ # s/\n//g;
+ # push @data, $_;
+ # }
+ # my $s = read_stats(@data);
+ # show_stats($s);
+ # return;
+ #}
open( PCMD, "$file" ) or die "$prog: cant open file $file: $!\n";
my @data = ;
@@ -3914,9 +3903,7 @@
}
if ( not $input_file
- and
- ( ( grep { $_ } ( $stats_total, $group, $have_allfns_option ) ) !=
1 )
- or ( $have_allfns_option > 1 ) )
+ and ( $have_allfns_option > 1 ) )
{
cmdline_error(
"$prog: Error: you must specify only one of -x, -S, -s, -g, -q, -X or
--kill\n"
@@ -6804,6 +6791,39 @@
'default' => '0'
}
]
+ };
+
+ # These next two don't work currently pending access to a QsNet system
+ # for testing. In the new full-duplex world startup is a little
different
+ # and these functions need updating.
+ # In particular the following need to be addressed.
+ # the callback paramaters are probably wrong.
+ # The shared memory key needs to be calculated.
+ # Config options need to be read locally rather than globally
+ $allfns{qsnet_stats} = {
+ 'handler_all' => \&inner_show_stats,
+ 'out_handler' => \&show_stats,
+ 'arg_long' => 'statistics-total',
+ 'arg_short' => 's',
+ 'help' => "Show the job-wide statistics.",
+ 'options_i' => {
+ "stats-name" => undef,
+ "stats-sort-key" => "vp",
+ "stats-reverse" => 0,
+ "stats-short" => 0,
+ "show-all-stats" => 0,
+ }
+ };
+ $allfns{qsnet_groups} = {
+ 'handler_all' => \&inner_show_stats,
+ 'out_handler' => \&group_status,
+ 'arg_long' => 'group',
+ 'arg_short' => 'g',
+ 'help' => "Show the state of collective operations
(groups).",
+ 'options_i' => {
+ "show-group-members" => 0,
+ "show-all-groups" => 0,
+ }
};
# Make a getopt string out of each of the optional options.
From codesite-noreply at google.com Mon Jun 29 15:03:31 2009
From: codesite-noreply at google.com (codesite-noreply at google.com)
Date: Mon, 29 Jun 2009 14:03:31 +0000
Subject: [padb-devel] [padb commit] r75 - Add a output-mode paramater to
push_command() and pass
Message-ID: <0016368e1c2fbcc211046d7d27b0@google.com>
Author: apittman
Date: Mon Jun 29 06:43:42 2009
New Revision: 75
Modified:
branches/full-duplex/src/padb
Log:
Add a output-mode paramater to push_command() and pass
this value back to default_output_handler. Provide a
sensible value both when running in a mode but also do
the right thing wrt the full-report mode.
Modified: branches/full-duplex/src/padb
==============================================================================
--- branches/full-duplex/src/padb (original)
+++ branches/full-duplex/src/padb Mon Jun 29 06:43:42 2009
@@ -2931,11 +2931,15 @@
}
sub default_output_handler {
- my ( $mode, $d ) = @_;
+ my ( $req, $d ) = @_;
# Could warn on missing output here...
- my $lines = $d->{target_output};
+ my $lines = $d->{target_output};
+ my $mode = $req->{mode};
+ my $output = "raw";
+
+ $output = $req->{out_format} if defined $req->{out_format};
if ( $mode eq "stack" or $input_file ) {
if ( $strip_below_main or $strip_above_wait ) {
@@ -2943,11 +2947,11 @@
}
}
- if ($tree) {
+ if ( $output eq "tree" ) {
print show_tree go_p( 0, $lines,
( sort { $a <=> $b } ( keys %$lines ) ) );
- } elsif ($compress) {
+ } elsif ( $output eq "compress" ) {
foreach my $tag ( sort { $a <=> $b } ( keys %$lines ) ) {
next if ( !defined( $lines->{$tag} ) );
@@ -2966,7 +2970,7 @@
print("$data\n");
}
}
- } elsif ($compress_C) {
+ } elsif ( $output eq "compress_c" ) {
foreach my $tag ( sort { $a <=> $b } ( keys %$lines ) ) {
print("----------------\n");
print("$tag\n");
@@ -3173,13 +3177,14 @@
# Push a command onto the list of commands to be executed.
sub push_command {
- my ( $mode, $args ) = @_;
+ my ( $mode, $out_format, $args ) = @_;
# $args = $conf{mode_options}{$mode};
my %cmd;
- $cmd{mode} = $mode;
- $cmd{args} = $args if defined($args);
+ $cmd{mode} = $mode;
+ $cmd{out_format} = $out_format if defined($out_format);
+ $cmd{args} = $args if defined($args);
push @commands, \%cmd;
}
@@ -3207,6 +3212,10 @@
$req->{cargs} = $cmd->{args};
}
+ if ( defined $cmd->{out_format} ) {
+ $req->{out_format} = $cmd->{out_format};
+ }
+
# Send along the secondary args.
if ( defined $allfns{ $req->{mode} }{secondary} ) {
foreach my $sec ( @{ $allfns{ $req->{mode} }{secondary} } ) {
@@ -3271,7 +3280,7 @@
if ( defined( $allfns{$mode}{out_handler} ) ) {
$allfns{$mode}{out_handler}( $conf{mode_options}{$mode}, $d );
} else {
- default_output_handler( $mode, $d );
+ default_output_handler( $comm_data->{current_req}, $d );
}
$comm_data->{current_req} = $req;
@@ -3831,29 +3840,17 @@
# exit 1;
#}
- push_command("queue");
+ push_command( "queue", "compress" );
my %c;
$c{"strip-above-wait"} = 0;
- push_command( "stack", \%c );
- go_job($full_report);
- exit(0);
-
- printf("\n");
-
- $compress = 1;
-
- go_job($full_report);
- undef $compress;
-
- printf("\n");
+ push_command( "stack", "tree", \%c );
+ # This option is still a global rather than being just a mode
option.
$strip_above_wait = 0;
- $tree = 1;
- go_job($full_report);
- undef $tree;
- exit 0;
+ go_job($full_report);
+ exit(0);
}
if ($show_jobs) {
@@ -3977,7 +3974,11 @@
$allfns{$mode}{pre_out_handler}();
}
- push_command( $mode, $conf{mode_options}{$mode} );
+ my $of;
+ $of = "tree" if $tree;
+ $of = "compress" if $compress;
+ $of = "compress_c" if $compress_C;
+ push_command( $mode, $of, $conf{mode_options}{$mode} );
go_job($jobid);
}
}
From codesite-noreply at google.com Mon Jun 29 17:55:43 2009
From: codesite-noreply at google.com (codesite-noreply at google.com)
Date: Mon, 29 Jun 2009 16:55:43 +0000
Subject: [padb-devel] [padb commit] r77 - Update the release notes and bump
the version number
Message-ID: <0016368e1e618a08d9046d7f8f3a@google.com>
Author: apittman
Date: Mon Jun 29 07:49:47 2009
New Revision: 77
Modified:
branches/full-duplex/src/padb
Log:
Update the release notes and bump the version number
to 3.0
Modified: branches/full-duplex/src/padb
==============================================================================
--- branches/full-duplex/src/padb (original)
+++ branches/full-duplex/src/padb Mon Jun 29 07:49:47 2009
@@ -1,6 +1,6 @@
#!/usr/bin/perl -w
-# padb. a simple parallel debugging aid from Quadrics.
+# padb. a simple parallel debugging aid.
# For help and support visit http://padb.pittman.org.uk
# or email padb-users at pittman.org.uk
@@ -23,8 +23,21 @@
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301 USA
# Revision history
+# Version 3.0
+# * Full-duplex communication between inner and outer processes, padb
+# no longer simply sends request on the command line and processes
+# the response but is truly interactive between the inner and outer
+# processes. This avails greater flexibility in what can be achieved
+# and hopefully helps with scalability as well.
+# * Enabled warnings (-w) by default. Fixed lots of warnings, mostly
+# about comparing with undef
+# * Much more complete separation into "modes" of operation, most options
+# are now mode specific rather than simply using global variables or
+# global conf options.
+# * Overhaul of the allfns (mode) callbacks and in particular their
parameters
+# * Performance improvements.
#
-# Version 2.5 (Beta)
+# Version 2.5
# * First Non-Quadrics version
# * Various stability/bug fixes.
# * Deadlock detect at the MPI Layer rather than the Elan layer
@@ -239,7 +252,7 @@
# Main.
my $prog = basename $0;
-my $version = "2.5-beta";
+my $version = "3.0-beta";
my %conf;
@@ -413,8 +426,7 @@
sub show_version {
printf("$prog version $version\n\n");
printf("Written by Ashley Pittman\n");
-
- #ashley at quadrics.com
+ printf("http://padb.pittman.org.uk\n");
exit 0;
}
From codesite-noreply at google.com Mon Jun 29 18:15:05 2009
From: codesite-noreply at google.com (codesite-noreply at google.com)
Date: Mon, 29 Jun 2009 17:15:05 +0000
Subject: [padb-devel] [padb commit] r78 - Split the usage into modes which
are QsNet specific
Message-ID: <0016e640cdd0d63f31046d7fd4e6@google.com>
Author: apittman
Date: Mon Jun 29 09:13:18 2009
New Revision: 78
Modified:
branches/full-duplex/src/padb
Log:
Split the usage into modes which are QsNet specific
and ones which are more general.
Modified: branches/full-duplex/src/padb
==============================================================================
--- branches/full-duplex/src/padb (original)
+++ branches/full-duplex/src/padb Mon Jun 29 09:13:18 2009
@@ -437,15 +437,13 @@
-a --all report on all running jobs for user.
-A --any report on a running job for user.
--u --user=USER report on jobs for username=.
+-u --user= report on jobs for username=.
--r --rank=RANK report only on processes .
- --group-id=ID report only on group .
+-r --rank= report only on processes .
+ --group-id= report only on group .
--s --statistics Show the job-wide statistics.
--g --group Show the state of collective operations (groups).
XXXX
- --full-report=JOBID All of the above.
+ --full-report= Generate a full report of job state.
--nostrip-below-main Don\'t strip stack traces below main.
--nostrip-above-wait Don\'t strip stack traces about elan_waitWord.
@@ -501,8 +499,24 @@
chomp $usage;
my $extra = "";
+ $extra .= "Modes of operation\n";
foreach my $arg ( sort( keys %allfns ) ) {
next unless ( defined $allfns{$arg}{help} );
+ next if ( defined $allfns{$arg}{qsnet} );
+ if ( defined $allfns{$arg}{arg_short} ) {
+ $extra .= "-$allfns{$arg}{arg_short}";
+ } else {
+ $extra .= " ";
+ }
+ $extra .= sprintf( " --%-18s%s.\n",
+ $allfns{$arg}{arg_long},
+ $allfns{$arg}{help} );
+ }
+
+ $extra .= "\nQsNet specific modes\n";
+ foreach my $arg ( sort( keys %allfns ) ) {
+ next unless ( defined $allfns{$arg}{help} );
+ next unless ( defined $allfns{$arg}{qsnet} );
if ( defined $allfns{$arg}{arg_short} ) {
$extra .= "-$allfns{$arg}{arg_short}";
} else {
@@ -3852,7 +3866,9 @@
# exit 1;
#}
- push_command( "queue", "compress" );
+ push_command( "mqueue", "compress" );
+
+ push_command("deadlock");
my %c;
$c{"strip-above-wait"} = 0;
@@ -6718,6 +6734,7 @@
$allfns{queue} = {
'arg_long' => "message-queue",
+ 'qsnet' => 1,
'arg_short' => "q",
'handler' => \&show_queue,
'help' => "Show the message queues",
@@ -6834,6 +6851,7 @@
$allfns{set_debug} = {
'handler' => \&set_debug,
+ 'qsnet' => 1,
'arg_long' => 'set-debug',
'arg_short' => 'D',
'help' => "Set debug flags (use --dflag=value)",
@@ -6856,6 +6874,7 @@
$allfns{qsnet_stats} = {
'handler_all' => \&inner_show_stats,
'out_handler' => \&show_stats,
+ 'qsnet' => 1,
'arg_long' => 'statistics-total',
'arg_short' => 's',
'help' => "Show the job-wide statistics.",
@@ -6870,6 +6889,7 @@
$allfns{qsnet_groups} = {
'handler_all' => \&inner_show_stats,
'out_handler' => \&group_status,
+ 'qsnet' => 1,
'arg_long' => 'group',
'arg_short' => 'g',
'help' => "Show the state of collective operations
(groups).",
From codesite-noreply at google.com Mon Jun 29 18:26:55 2009
From: codesite-noreply at google.com (codesite-noreply at google.com)
Date: Mon, 29 Jun 2009 17:26:55 +0000
Subject: [padb-devel] [padb commit] r79 - Change mpi_watch_all() to attach
to each process
Message-ID: <001485f91cb2256d3a046d7fff6a@google.com>
Author: apittman
Date: Mon Jun 29 10:19:39 2009
New Revision: 79
Modified:
branches/full-duplex/src/padb
Log:
Change mpi_watch_all() to attach to each process
at the start, then query each one and finally detach
from each process. Reduced contention on the CPU means
this leads to a performance improvement.
Modified: branches/full-duplex/src/padb
==============================================================================
--- branches/full-duplex/src/padb (original)
+++ branches/full-duplex/src/padb Mon Jun 29 10:19:39 2009
@@ -5862,28 +5862,71 @@
#
# * - error.
-sub mpi_watch {
- my ( $carg, $vp, $pid ) = @_;
-
- my @mq = fetch_mpi_queue( $carg, $vp, $pid );
- my $sm = 0;
- my $rm = 0;
- my $um = 0;
- my $good = ".";
+sub mpi_watch_all {
+ my ( $carg, $list ) = @_;
my %res;
-
my $fns = mpi_watch_load($carg);
- my $fnmode;
- my $fnreal;
- my $gdb = gdb_start();
- kill( "CONT", $pid );
- if ( gdb_attach( $gdb, $pid ) ) {
- my @threads = gdb_dump_frames_per_thread($gdb);
- gdb_detach($gdb);
- gdb_quit($gdb);
+ foreach my $proc ( @{$list} ) {
+ my $vp = $proc->{vp};
+ my $pid = $proc->{pid};
+ my $gdb = gdb_start();
kill( "CONT", $pid );
+ if ( gdb_attach( $gdb, $pid ) ) {
+ $proc->{gdb} = $gdb;
+ } else {
+ output $vp, "Failed to attach to to process";
+ }
+ }
+
+ foreach my $proc ( @{$list} ) {
+ my $vp = $proc->{vp};
+ my $pid = $proc->{pid};
+ my $gdb = $proc->{gdb};
+
+ my @mq;
+ my $sm = 0;
+ my $rm = 0;
+ my $um = 0;
+ my $good = ".";
+ my $fnmode;
+
+ @mq = fetch_mpi_queue_gdb( $carg, $vp, $pid, $gdb );
+
+ if ( $#mq == 0 ) {
+ $good = ",";
+ } else {
+ foreach my $o (@mq) {
+ if ( $o =~ /Operation (\d)/ ) {
+ my $type = $1;
+ $sm++ if ( $type == 0 );
+ $rm++ if ( $type == 1 );
+ $um++ if ( $type == 2 );
+ }
+ }
+ }
+
+ my $mt = ( grep { $_ } ( $sm, $rm, $um ) );
+ if ( $mt != 0 ) {
+ my $mode = "*";
+
+ if ($um) {
+ $mode = "u";
+ $mode = "U" if ( $mt != 1 );
+ } else {
+ if ( $mt == 1 ) {
+ $mode = "s" if ($sm);
+ $mode = "r" if ($rm);
+ } else {
+ $mode = "m";
+ }
+ }
+ $res{$vp}{state} = $mode;
+ next;
+ }
+
+ my @threads = gdb_dump_frames_per_thread($gdb);
foreach my $thread ( sort { $a->{id} <=> $b->{id} } @threads ) {
my @frames = @{ $thread->{frames} };
@@ -5891,74 +5934,35 @@
my $frame = $frames[$i];
if ( defined $fns->{fns}{ $frame->{func} } ) {
$fnmode = $fns->{fns}{ $frame->{func} };
- $fnreal = $frame->{func};
last;
}
}
}
- }
- # $res{mq} = \@mq;
- if ( $#mq == 0 ) {
- $good = ",";
- } else {
- foreach my $o (@mq) {
- if ( $o =~ /Operation (\d)/ ) {
- my $type = $1;
- $sm++ if ( $type == 0 );
- $rm++ if ( $type == 1 );
- $um++ if ( $type == 2 );
- }
+ if ( defined $fnmode ) {
+ $res{$vp}{state} = $fns->{names}{$fnmode};
+ next;
+
}
- }
- my $mt = ( grep { $_ } ( $sm, $rm, $um ) );
- if ( $mt != 0 ) {
- my $mode = "*";
-
- if ($um) {
- $mode = "u";
- $mode = "U" if ( $mt != 1 );
+ my $m = find_from_status( $pid, "State" );
+ if ( $m eq "R" ) {
+ $m = $good;
+ } elsif ( $m eq "S" ) {
+ $m = "-";
} else {
- if ( $mt == 1 ) {
- $mode = "s" if ($sm);
- $mode = "r" if ($rm);
- } else {
- $mode = "m";
- }
+ $m = "*";
}
- $res{state} = $mode;
- output( $vp, $mode );
- return \%res;
- }
-
- if ( defined $fnmode ) {
- $res{state} = "$fns->{names}{$fnmode} $fnreal ";
- $res{state} = $fns->{names}{$fnmode};
- return \%res;
- }
- my $m = find_from_status( $pid, "State" );
- if ( $m eq "R" ) {
- $m = $good;
- } elsif ( $m eq "S" ) {
- $m = "-";
- } else {
- $m = "*";
+ $res{$vp}{state} = $m;
}
- output $vp, $m;
- $res{state} = $m;
- return \%res;
-}
-sub mpi_watch_all {
- my ( $carg, $list ) = @_;
- my %res;
foreach my $proc ( @{$list} ) {
- my $vp = $proc->{vp};
- my $pid = $proc->{pid};
- $res{$vp} = mpi_watch( $carg, $vp, $pid );
+ my $gdb = $proc->{gdb};
+ gdb_detach($gdb);
+ gdb_quit($gdb);
}
+
return \%res;
}
From codesite-noreply at google.com Mon Jun 29 18:33:30 2009
From: codesite-noreply at google.com (codesite-noreply at google.com)
Date: Mon, 29 Jun 2009 17:33:30 +0000
Subject: [padb-devel] [padb commit] r76 - Attach to all processes at the
same time when reading
Message-ID: <0016e644cb28acdeeb046d801671@google.com>
Author: apittman
Date: Mon Jun 29 07:14:20 2009
New Revision: 76
Modified:
branches/full-duplex/src/padb
Log:
Attach to all processes at the same time when reading
MPI message queues. This leads to much quicker
run-times and probably results in better quality data
due to all processes being stopped when the queues are
read.
Modified: branches/full-duplex/src/padb
==============================================================================
--- branches/full-duplex/src/padb (original)
+++ branches/full-duplex/src/padb Mon Jun 29 07:14:20 2009
@@ -4604,6 +4604,46 @@
}
}
+sub show_mpi_queue_all {
+ my ( $carg, $list ) = @_;
+
+ my @all;
+
+ foreach my $proc ( @{$list} ) {
+ my $vp = $proc->{vp};
+ my $pid = $proc->{pid};
+
+ debug $vp, "Attaching to $pid";
+ my $gdb = gdb_start();
+ kill( "CONT", $pid );
+ if ( gdb_attach( $gdb, $pid ) ) {
+ $proc->{gdb} = $gdb;
+ push( @all, $proc );
+ } else {
+ output $vp, "Failed to attach to to process";
+ }
+
+ }
+
+ foreach my $proc (@all) {
+
+ my $vp = $proc->{vp};
+ my $pid = $proc->{pid};
+ my $gdb = $proc->{gdb};
+
+ my @mq = fetch_mpi_queue_gdb( $carg, $vp, $pid, $gdb );
+ foreach my $o (@mq) {
+ output( $vp, $o );
+ }
+ }
+
+ foreach my $proc (@all) {
+ my $gdb = $proc->{gdb};
+ gdb_detach($gdb);
+ gdb_quit($gdb);
+ }
+}
+
# Ideally handle all this at a higher level...
sub show_mpi_queue_for_deadlock_all {
my ( $carg, $list ) = @_;
@@ -6687,11 +6727,11 @@
};
$allfns{mqueue} = {
- 'handler' => \&show_mpi_queue,
- 'arg_long' => 'mpi-queue',
- 'arg_short' => 'Q',
- 'help' => "Show MPI message queues",
- 'options_i' => { "mpi-dll" => undef, }
+ 'handler_all' => \&show_mpi_queue_all,
+ 'arg_long' => 'mpi-queue',
+ 'arg_short' => 'Q',
+ 'help' => "Show MPI message queues",
+ 'options_i' => { "mpi-dll" => undef, }
};
$allfns{deadlock} = {
From codesite-noreply at google.com Mon Jun 29 22:11:37 2009
From: codesite-noreply at google.com (codesite-noreply at google.com)
Date: Mon, 29 Jun 2009 21:11:37 +0000
Subject: [padb-devel] [padb commit] r80 - Remove comment about ISC and make
not of the 2.5 release.
Message-ID: <0016361e8834bb4b08046d832225@google.com>
Author: apittman
Date: Mon Jun 29 13:54:04 2009
New Revision: 80
Modified:
trunk/doc/index.html
Log:
Remove comment about ISC and make not of the 2.5 release.
Modified: trunk/doc/index.html
==============================================================================
--- trunk/doc/index.html (original)
+++ trunk/doc/index.html Mon Jun 29 13:54:04 2009
@@ -11,12 +11,10 @@
Padb is currently maintained outside of Quadrics by
-Ashley Pittman,
-I will be attending ISC
-later on this month.
+Ashley Pittman.
News
-28-06-09 A 2.5-rc1 release candidate is avaliable to download from the
+25-06-09 A 2.5 stable release (version 2.5) is avaliable to download
from the
downloads page.
Features
From codesite-noreply at google.com Tue Jun 30 14:13:13 2009
From: codesite-noreply at google.com (codesite-noreply at google.com)
Date: Tue, 30 Jun 2009 13:13:13 +0000
Subject: [padb-devel] [padb commit] r81 - Fix a formatting typo in the web
page.
Message-ID: <0016e644d0a6ad770d046d9091ce@google.com>
Author: apittman
Date: Tue Jun 30 05:43:52 2009
New Revision: 81
Modified:
trunk/doc/modes.html
Log:
Fix a formatting typo in the web page.
Modified: trunk/doc/modes.html
==============================================================================
--- trunk/doc/modes.html (original)
+++ trunk/doc/modes.html Tue Jun 30 05:43:52 2009
@@ -280,7 +280,7 @@
Signal delivery
To deliver signals to processes in a job use the --kill mode
together with the optional
---signal=<name>> option. No output is produced by this mode.
+--signal=<name> option. No output is produced by this mode.
Quadrics specific modes
The --set-debug, --group and --statistics modes are
QsNet specific. The --deadlock mode