[padb] r309 committed - Add release notes for code added since the 3.0 branch was created.
padb at googlecode.com
padb at googlecode.com
Sun Nov 1 19:09:59 GMT 2009
Revision: 309
Author: apittman
Date: Sun Nov 1 11:09:15 2009
Log: Add release notes for code added since the 3.0 branch was created.
http://code.google.com/p/padb/source/detail?r=309
Modified:
/trunk/src/padb
=======================================
--- /trunk/src/padb Sun Nov 1 09:49:32 2009
+++ /trunk/src/padb Sun Nov 1 11:09:15 2009
@@ -29,10 +29,40 @@
# Revision history
# Version 3.?
+# * Add variables to tree based stack traces.
+# * Solaris port. Limited functionality compared to running on Linux
+# however stack trace mode works fully.
# * Add "mpirun" as a resource manager, this causes it walk the local
-# process list looking for processes called mpirun and to get the pid
-# and hostlist by reading data from Mpir_Proctable as specified in the
-# origional paper. Padb then launches itself via pdsh.
+# process list looking for processes called mpirun and to get the
+# pid and hostlist by reading data from Mpir_Proctable as specified
+# in the original paper. Padb then launches itself via pdsh.
+# * For Open-MPI magically dip inside a number of datatypes to print
+# the name as Open-MPI sees them rather than just the struct
+# contents. This allows us to report communicators, datatypes and
+# reduction operations by name.
+# * Add a --lstopo option to run the lstopo command for each rank.
+# http://www.open-mpi.org/projects/hwloc/
+# * Enhance the integration with gdb, use sequence numbers when
+# talking to gdb and check that we get back what we give it.
+# Correctly notice and raise an appropriate error if gdb dies
+# unexpectedly.
+# * Intercept pointer values for variables and instead of showing the
+# pointer value show a description of what it points to using
+# /proc/pid/maps
+# * Nicely indent variables when showing stack traces. Indent no
+# more than necessary based on the length of the variable name and
+# the length of the type name.
+# * Fix an error where error strings were being passed through
+# sprintf, we now correctly handle errors when the error strings
+# contain % characters
+# * Overhauled the minfo code and the way it interacts with padb.
+# Add significantly better error handling to this code.
+# * Allow tracing of gdb and minfo interaction to log file. Debug
+# options so require enabling at the source level.
+# * Add a --create-secret-file option to generate the secret file
+# automatically
+# * Add SVN tags to the source file and the the revision id to the
+# output of output of --version
#
# Version 3.0
# * Full-duplex communication between inner and outer processes, padb no
@@ -51,9 +81,9 @@
# * Simplify the slurm_find_pids() function to just return the output of
# scontrol listpids
# * Take the old process-tree walking code from slurm_find_pids() and make
-# it independant and call it for all resource managers. This allows
+# it independent and call it for all resource managers. This allows
# scripts which call parallel applications to be bypassed and the
-# applications themselves targetted.
+# applications themselves targeted.
# * Added "port-range" option to limit port usage in case people try and
# use padb with firewalls enabled.
#
@@ -219,7 +249,7 @@
# * Multi-pass argument handling, --kill also accepts --signal for example,
# this should really be done at the getopt layer. Also proper usage info
# for these secondary args.
-# * Paramater checking of secondary args, signal has a hacky implementation
+# * Parameter checking of secondary args, signal has a hacky implementation
# and port-range doesn't have any checking currently.
# * libunwind support? lighter weight than gdb and possibly more reliable.
# * Maybe PMI would help?
@@ -725,7 +755,7 @@
my %ic_names;
my %ic_names_cmd;
-# Debugging: this function is called periodically with a mode, an abritary
+# Debugging: this function is called periodically with a mode, an arbitrary
# ref and a string, it can either print simply the string or call dumper on
# the ref as well. Enable with --debug=type1,type2=all
my %debug_modes;
@@ -804,7 +834,7 @@
next unless ( $pid =~ m{\A\d+\z}xms );
my ( undef, undef, undef, undef, $owner ) = stat "/proc/$pid";
- # Check the stat worked, it's possible for processes to dissapear
+ # Check the stat worked, it's possible for processes to disappear
# Take care to check for defined rather than true as root has a uid
# of zero.
next unless defined $owner;
@@ -841,7 +871,7 @@
next unless ( $pid =~ m{\A\d+\z}xms );
my ( undef, undef, undef, undef, $owner ) = stat "/proc/$pid";
- # Check the stat worked, it's possible for processes to dissapear
+ # Check the stat worked, it's possible for processes to disappear
# Take care to check for defined rather than true as root has a uid
# of zero.
next unless defined $owner;
@@ -2928,7 +2958,7 @@
return;
}
- # Multiple resource managers are installed and have jobs, bouce back to
+ # Multiple resource managers are installed and have jobs, bounce back
to
# the user to specify which one they want.
print
"Error, multiple active resource managers detected, use -Ormgr=<resource
manager>\n";
@@ -3120,8 +3150,8 @@
}
# Nicely format process information. XXX: proc-sort-key should probably
-# sort on column headers as well as keys. Idealy we'd know what format we
-# wanted and only ask the nodes to report relevent info, for now they still
+# sort on column headers as well as keys. Ideally we'd know what format we
+# wanted and only ask the nodes to report relevant info, for now they still
# report everything.
sub show_proc_format {
my ( $carg, $nlines ) = @_;
@@ -3811,7 +3841,7 @@
}
# XXX: Should only send this list over if it makes sense, for example
- # the deadlock code only works when targetting all ranks.
+ # the deadlock code only works when targeting all ranks.
if ( defined $rank_rng ) {
$req->{ranks} = $rank_rng;
}
@@ -4228,7 +4258,7 @@
}
# Note the performance of this function is much higher when adding values
-# at the top of the range than at the start, persumably it's easier to make
+# at the top of the range than at the start, presumably it's easier to make
# an array longer than it is to unshift something onto the start. Quietly
# return if the value is already in the range.
sub rng_add_value {
@@ -5470,7 +5500,7 @@
# datatype. The format gdb uses is documented here:
# http://sources.redhat.com/gdb/current/onlinedocs/gdb_26.html#SEC275
#
-# The options $collapse argunment here is for element names that should be
+# The options $collapse argument here is for element names that should be
# collapsed into an array, for example in the following example each
# instance if thread-id would over-write the previous one with it's own
# value so to avoid this thread-ids (note the extra "s" here is passed as
@@ -6468,7 +6498,7 @@
sub gdb_expand_var {
my ( $gdb, $arg ) = @_;
- # If you try and read a value which claims to be optimized away it
+ # If you try and read a value which claims to be optimised away it
# will return a value of zero, hard to know how to handle this but
# not reporting it is probably the better of the two options.
return
@@ -7159,7 +7189,7 @@
# finding main, with code to detach and try again if we don't. This served
# us well on ia64 where gdb isn't very good however it as on most machines
# gdb gives you results below main (__libc_start_main()) this test fails
-# which causes padb to loop a number of times for each procees on a node.
+# which causes padb to loop a number of times for each process on a node.
# We still sometimes get garbage (due to hand-rolled memcpy()) so leave the
# loop in but don't sleep every iteration. This could be handled better by
# checking for the presence of one of the stack_strip_below functions in
@@ -7277,7 +7307,7 @@
my $strip_below;
- # Find a funtion to strip above. Only actually enable this if
+ # Find a function to strip above. Only actually enable this if
# there is a function present which we are targeting or else no
# output will be generated! Do this in reverse order so we
# strip as much as possible from the stack trace.
@@ -7860,7 +7890,7 @@
# Merge this reply into the local one.
$handle->{child_replys}++;
- # Combine the host responces.
+ # Combine the host responses.
foreach my $status ( keys %{ $r->{host_responce} } ) {
foreach my $host ( keys %{ $r->{host_responce}{$status} } ) {
$handle->{all_replys}->{host_responce}{$status}{$host} =
@@ -7868,7 +7898,7 @@
}
}
- # Combine the target process responces.
+ # Combine the target process responses.
if ( exists $r->{target_responce} ) {
foreach my $tp ( keys %{ $r->{target_responce} } ) {
$handle->{all_replys}->{target_responce}{$tp} =
@@ -7876,7 +7906,7 @@
}
}
- # Combine the target process responces from child.
+ # Combine the target process responses from child.
if ( exists $r->{target_output} ) {
foreach my $tp ( keys %{ $r->{target_output} } ) {
$handle->{all_replys}->{target_output}{$tp} =
@@ -7884,7 +7914,7 @@
}
}
- # Copy the target local responces.
+ # Copy the target local responses.
if ( exists $handle->{target_responce} ) {
foreach my $tp ( keys %{ $handle->{target_responce} } ) {
$handle->{all_replys}->{target_responce}{$tp} =
@@ -7899,7 +7929,7 @@
%inner_output = ();
- # Copy the network target errors into responce.
+ # Copy the network target errors into response.
if ( exists $r->{target_data} ) {
if ( exists $handle->{all_replys}->{target_data} ) {
foreach my $key ( keys %{ $r->{target_data} } ) {
@@ -7923,7 +7953,7 @@
}
}
- # Merge in local target responces.
+ # Merge in local target responses.
foreach my $key ( keys %local_target_data ) {
foreach my $value ( keys %{ $local_target_data{$key} } ) {
if ( defined
$handle->{all_replys}->{target_data}{$key}{$value} ) {
@@ -7960,7 +7990,7 @@
# Convert from a pid to a command name and do it in a safe manner to avoid
# warnings. suid programs tend to have the exe link which is un-readable
-# so if that yeilds nothing then load the name from the status file.
+# so if that yields nothing then load the name from the status file.
sub pid_to_name {
my $pid = shift;
my $exe = readlink "/proc/$pid/exe";
@@ -8039,7 +8069,7 @@
# The process might have died and we simply didn't find anything,
# if this is the case then just skip it, the outer will notice the
- # missing signon and report an approtiate error.
+ # missing signon and report an appropriate error.
next unless defined $newpid;
my $status = hash_from_status($newpid);
@@ -8499,9 +8529,9 @@
sub common_main {
- # The quasi-authorative list of modes padb can operate in.
-
- # Notes on the callback functions and paramaters.
+ # The quasi-authoritative list of modes padb can operate in.
+
+ # Notes on the callback functions and parameters.
# handler Called in the inner for each target process.
# param: ??, $vp, $pid
@@ -8511,7 +8541,7 @@
#
# ??, $vp, $pid
- # These two functions can eitehr return a value, and have it passed to
+ # These two functions can either return a value, and have it passed to
# the output handler or call output() and use the
# default_output_handler().
@@ -8692,7 +8722,7 @@
# These next two don't work currently pending access to a QsNet system
# for testing. In the new full-duplex world startup is a little
# different and these functions need updating. In particular the
- # following need to be addressed. the callback paramaters are probably
+ # following need to be addressed. the callback parameters are probably
# wrong. The shared memory key needs to be calculated. Config options
# need to be read locally rather than globally
$allfns{qsnet_stats} = {
More information about the padb-devel
mailing list