From padb at googlecode.com Sun Oct 24 00:16:05 2010 From: padb at googlecode.com (padb at googlecode.com) Date: Sat, 23 Oct 2010 23:16:05 +0000 Subject: [padb] r395 committed - Accept a patch to work with more recent versions of pbs Message-ID: <000e0cd149b67e73b5049350f156@google.com> Revision: 395 Author: apittman at gmail.com Date: Sat Oct 23 16:15:51 2010 Log: Accept a patch to work with more recent versions of pbs http://code.google.com/p/padb/source/detail?r=395 Modified: /trunk/src/padb ======================================= --- /trunk/src/padb Fri Feb 26 12:08:10 2010 +++ /trunk/src/padb Sat Oct 23 16:15:51 2010 @@ -9065,6 +9065,64 @@ # # PBS support +# +# Find if parent is resource manager or not +# there's two cases: +# 1- parent immediately is pbs_attach, Ok it's true +# 2- no parent immediately is pbs_attach, so find until mpd & parent is root +# +# take care of no existing process or immediate parent is 1 or 0 +# or proc itself is 'pbs_attach' +# +# reason to write this: +# in new version of pbspro there's no more pbs_attach +# and in this case padb fails to find any rank procs +# +# example: +#UID PID PPID C STIME TTY TIME CMD +#thipa 23562 1 0 13:28 ? 00:00:00 python /opt/mpi/mpibull2-1.3.7-1.t/bin/mpd.py +#thipa 23563 23562 0 13:28 ? 00:00:00 python /opt/mpi/mpibull2-1.3.7-1.t/bin/mpd.py +#thipa 23564 23562 0 13:28 ? 00:00:00 python /opt/mpi/mpibull2-1.3.7-1.t/bin/mpd.py +#thipa 23565 23563 0 13:28 ? 00:00:00 ./pp_sndrcv_spbl +#thipa 23566 23564 0 13:28 ? 00:00:00 ./pp_sndrcv_spbl +# +# +sub is_parent_resmgr_pbs { + my $input_pid = shift; + my $result; + my $parent_pid = find_from_status( $input_pid, 'PPid' ); + my $name_pid = find_from_status( $input_pid, 'Name'); + return if (!defined ($parent_pid) || $parent_pid == 1 || $parent_pid == 0 || $name_pid eq 'pbs_attach'); + # loop to find its parents + my $pid = $parent_pid; + my $loop = 0; + for (;;) { + # find PPid of this pid + my $ppid = find_from_status( $pid, 'PPid' ); + my @name = slurp_file("/proc/$pid/cmdline"); + $loop++; + my $line = $name[0]; + my @champs = split(/\0+/,$line); + if ($loop == 1 ) { + if ($champs[0] eq 'pbs_attach') { + $result = 1; # OK parent is resm + last; + } + if ($ppid == 1 || $ppid == 0) { + last; # parent is root last anyway + } + } elsif ($loop >= 2) { + if ($ppid == 1 || $ppid == 0) { + if (defined($champs[1]) && $champs[1] =~ /mpd.py/) { + $result = 1 # OK parent is resm + } + last; # parent is root last anyway + } + } + $pid=$ppid; + } + return ($result); +} # sub pbs_find_pids { my $job = shift; @@ -9082,7 +9140,7 @@ next if ( is_resmgr_process($pid) ); # Skip over ones which aren't direct descendants of a resource manager - next unless is_parent_resmgr($pid); + next unless is_parent_resmgr_pbs($pid); my $vp; my %env = get_remote_env($pid); @@ -9090,7 +9148,7 @@ %env = get_remote_env_bygdb($pid); } - if ( $env{PBS_JOBID} eq $job ) { + if ( defined( $env{PBS_JOBID} ) && $env{PBS_JOBID} eq $job ) { $vp = $env{PMI_RANK}; } if ( defined $vp ) { From padb at googlecode.com Sun Oct 24 00:34:15 2010 From: padb at googlecode.com (padb at googlecode.com) Date: Sat, 23 Oct 2010 23:34:15 +0000 Subject: [padb] r396 committed - Pass the previous commit through perltidy to ensure consistency. Message-ID: <00504502cc8c7c3f3404935132d8@google.com> Revision: 396 Author: apittman at gmail.com Date: Sat Oct 23 16:33:23 2010 Log: Pass the previous commit through perltidy to ensure consistency. http://code.google.com/p/padb/source/detail?r=396 Modified: /trunk/src/padb ======================================= --- /trunk/src/padb Sat Oct 23 16:15:51 2010 +++ /trunk/src/padb Sat Oct 23 16:33:23 2010 @@ -9066,18 +9066,18 @@ # # PBS support # -# Find if parent is resource manager or not +# Find if parent is resource manager or not # there's two cases: # 1- parent immediately is pbs_attach, Ok it's true # 2- no parent immediately is pbs_attach, so find until mpd & parent is root # # take care of no existing process or immediate parent is 1 or 0 # or proc itself is 'pbs_attach' -# +# # reason to write this: # in new version of pbspro there's no more pbs_attach # and in this case padb fails to find any rank procs -# +# # example: #UID PID PPID C STIME TTY TIME CMD #thipa 23562 1 0 13:28 ? 00:00:00 python /opt/mpi/mpibull2-1.3.7-1.t/bin/mpd.py @@ -9091,38 +9091,45 @@ my $input_pid = shift; my $result; my $parent_pid = find_from_status( $input_pid, 'PPid' ); - my $name_pid = find_from_status( $input_pid, 'Name'); - return if (!defined ($parent_pid) || $parent_pid == 1 || $parent_pid == 0 || $name_pid eq 'pbs_attach'); + my $name_pid = find_from_status( $input_pid, 'Name' ); + return + if ( !defined($parent_pid) + || $parent_pid == 1 + || $parent_pid == 0 + || $name_pid eq 'pbs_attach' ); + # loop to find its parents - my $pid = $parent_pid; + my $pid = $parent_pid; my $loop = 0; - for (;;) { - # find PPid of this pid - my $ppid = find_from_status( $pid, 'PPid' ); - my @name = slurp_file("/proc/$pid/cmdline"); - $loop++; - my $line = $name[0]; - my @champs = split(/\0+/,$line); - if ($loop == 1 ) { - if ($champs[0] eq 'pbs_attach') { - $result = 1; # OK parent is resm - last; - } - if ($ppid == 1 || $ppid == 0) { - last; # parent is root last anyway - } - } elsif ($loop >= 2) { - if ($ppid == 1 || $ppid == 0) { - if (defined($champs[1]) && $champs[1] =~ /mpd.py/) { - $result = 1 # OK parent is resm - } - last; # parent is root last anyway - } - } - $pid=$ppid; + for ( ; ; ) { + + # find PPid of this pid + my $ppid = find_from_status( $pid, 'PPid' ); + my @name = slurp_file("/proc/$pid/cmdline"); + $loop++; + my $line = $name[0]; + my @champs = split( /\0+/, $line ); + if ( $loop == 1 ) { + if ( $champs[0] eq 'pbs_attach' ) { + $result = 1; # OK parent is resm + last; + } + if ( $ppid == 1 || $ppid == 0 ) { + last; # parent is root last anyway + } + } elsif ( $loop >= 2 ) { + if ( $ppid == 1 || $ppid == 0 ) { + if ( defined( $champs[1] ) && $champs[1] =~ /mpd.py/ ) { + $result = 1 # OK parent is resm + } + last; # parent is root last anyway + } + } + $pid = $ppid; } return ($result); } + # sub pbs_find_pids { my $job = shift; From padb at googlecode.com Sun Oct 24 00:38:16 2010 From: padb at googlecode.com (padb at googlecode.com) Date: Sat, 23 Oct 2010 23:38:16 +0000 Subject: [padb] r397 committed - Be careful when interacting with gdb, don't try and write to it... Message-ID: <000e0cd33276dd128204935140f0@google.com> Revision: 397 Author: apittman at gmail.com Date: Sat Oct 23 16:34:51 2010 Log: Be careful when interacting with gdb, don't try and write to it if we haven't had any communication, it's probably dead - most likely because it never ran in the first place. http://code.google.com/p/padb/source/detail?r=397 Modified: /trunk/src/padb ======================================= --- /trunk/src/padb Sat Oct 23 16:33:23 2010 +++ /trunk/src/padb Sat Oct 23 16:34:51 2010 @@ -5984,6 +5984,16 @@ sub gdb_quit { my ($gdb) = @_; + if ( $gdb->{seq} == 1 ) { + foreach my $fdname (qw(rdr wtr err)) { + next unless exists $gdb->{$fdname}; + close $gdb->{$fdname}; + } + if ( defined $gdb->{debugfd} ) { + close $gdb->{debugfd}; + } + return; + } gdb_send( $gdb, 'quit' ); waitpid $gdb->{gdbpid}, 0; foreach my $fdname (qw(rdr wtr err)) { From padb at googlecode.com Sun Oct 24 01:35:44 2010 From: padb at googlecode.com (padb at googlecode.com) Date: Sun, 24 Oct 2010 00:35:44 +0000 Subject: [padb] r398 committed - Re-instate the behaviour of ignoring SIGPIPE to avoid GDB errors... Message-ID: <000e0cd32e205f83610493520e2a@google.com> Revision: 398 Author: apittman at gmail.com Date: Sat Oct 23 17:35:27 2010 Log: Re-instate the behaviour of ignoring SIGPIPE to avoid GDB errors being fatal. This allows padb to correctly detect that the attach has failed and report a sensible error message. fixes #5 http://code.google.com/p/padb/source/detail?r=398 Modified: /trunk/src/padb ======================================= --- /trunk/src/padb Sat Oct 23 16:34:51 2010 +++ /trunk/src/padb Sat Oct 23 17:35:27 2010 @@ -5953,7 +5953,14 @@ # number of years however so lets try not doing it for a while and see # where that gets us. -# $SIG{PIPE} = 'IGNORE'; +# Enabling this again until I can fix things properly as it's causing problems. +# if gdb isn't installed then we get a SIGPIPE (which is normally fatal) when +# starting it, previously this would happen inside an eval block but now with +# the global attach it doesn't. Ideally we should catch the case where gdb isn't +# there and pass the error back up but that is tricky to do so for now simply +# catch the signal, we won't then be able to attach and will report a sensible +# error message to the user. +$SIG{PIPE} = 'IGNORE'; sub gdb_start { my ( $exe, $core ) = @_; @@ -6024,7 +6031,7 @@ if ( not defined $p{status} ) { $gdb->{error} = 'Failed to attach to process'; if ( not find_exe('gdb') ) { - $gdb->{error} = 'Failed to attach to process (gdb not installed?)'; + $gdb->{error} = 'Failed to attach to process (is gdb installed?)'; } return; } @@ -6095,7 +6102,7 @@ if ( not defined $p{status} ) { $gdb->{error} = 'Failed to attach to process'; if ( not find_exe('gdb') ) { - $gdb->{error} = 'Failed to attach to process (gdb not installed?)'; + $gdb->{error} = 'Failed to attach to process (is gdb installed?)'; } return; } From padb at googlecode.com Sun Oct 24 01:53:52 2010 From: padb at googlecode.com (padb at googlecode.com) Date: Sun, 24 Oct 2010 00:53:52 +0000 Subject: [padb] r399 committed - Fail better in the light of new kernel security features preventing... Message-ID: <000e0cd152d43a097d0493524f9c@google.com> Revision: 399 Author: apittman at gmail.com Date: Sat Oct 23 17:52:52 2010 Log: Fail better in the light of new kernel security features preventing the use of ptrace on non-child processes. It seems new kernels (Ubuntu 10.10 included) have a file /proc/sys/kernel/yama/ptrace_scope which controls which processes can be ptraced, with this param at it's default value programs can only ptrace their children which stops gdb from attach to programs. If we fail to attach ever then check if this file exists and if it's value is non-zero then tell the user it's a likely cause of the problem. http://code.google.com/p/padb/source/detail?r=399 Modified: /trunk/src/padb ======================================= --- /trunk/src/padb Sat Oct 23 17:35:27 2010 +++ /trunk/src/padb Sat Oct 23 17:52:52 2010 @@ -3414,7 +3414,7 @@ my $gdb = gdb_start(); if ( not gdb_attach( $gdb, $job ) ) { if ( defined $gdb->{error} ) { - print "Failed to attach to process: $gdb->{error}\n"; + print "$gdb->{error}\n"; } else { print "Failed to attach to process\n"; } @@ -6040,6 +6040,14 @@ my $r = gdb_parse_reason( $p{reason} ); if ( defined $r->{msg} ) { $gdb->{error} = "Failed to attach to process: $r->{msg}"; + my $yama_file = "/proc/sys/kernel/yama/ptrace_scope"; + if ( -f $yama_file ) { + my $yama = slurp_file($yama_file); + if ( $yama != '0' ) { + $gdb->{error} = +"Failed to attach to process: $r->{msg} (try echo 0 > $yama_file)"; + } + } } else { $gdb->{error} = 'Failed to attach to process'; } @@ -6111,6 +6119,15 @@ my $r = gdb_parse_reason( $p{reason} ); if ( defined $r->{msg} ) { $gdb->{error} = "Failed to attach to process: $r->{msg}"; + my $yama_file = "/proc/sys/kernel/yama/ptrace_scope"; + if ( -f $yama_file ) { + my $yama = slurp_file($yama_file); + if ( $yama != '0' ) { + $gdb->{error} = +"Failed to attach to process!: $r->{msg} (try echo 0 > $yama_file)"; + } + } + } else { $gdb->{error} = 'Failed to attach to process'; } From padb at googlecode.com Sun Oct 24 02:07:00 2010 From: padb at googlecode.com (padb at googlecode.com) Date: Sun, 24 Oct 2010 01:07:00 +0000 Subject: [padb] r400 committed - Use passive mode to upload to the website. Message-ID: <000e0cd32fa830e85a0493527e05@google.com> Revision: 400 Author: apittman at gmail.com Date: Sat Oct 23 18:05:53 2010 Log: Use passive mode to upload to the website. http://code.google.com/p/padb/source/detail?r=400 Modified: /trunk/doc/upload_website ======================================= --- /trunk/doc/upload_website Mon Dec 21 15:11:50 2009 +++ /trunk/doc/upload_website Sat Oct 23 18:05:53 2010 @@ -13,7 +13,7 @@ FILES="index usage download email extensions modes full-report configuration" -ftp-upload --host padb.pittman.org.uk -u padb at pittman.co.uk --password $PASSWORD layout.css +ftp-upload --passive --host padb.pittman.org.uk -u padb at pittman.co.uk --password $PASSWORD layout.css for FILE in $FILES do @@ -22,13 +22,13 @@ cat header.html > $TFILE cat $FILE.html >> $TFILE cat footer.html >> $TFILE - ftp-upload --host padb.pittman.org.uk -u padb at pittman.co.uk --password $PASSWORD --as $FILE.html $TFILE + ftp-upload --passive --host padb.pittman.org.uk -u padb at pittman.co.uk --password $PASSWORD --as $FILE.html $TFILE # ftp-upload --host padb.pittman.org.uk -u padb at pittman.co.uk --password $PASSWORD --as $FILE/index.html $TFILE #ftp-upload --host padb.pittman.org.uk -u padb at pittman.co.uk --password $PASSWORD --as $FILE/layout.css layout.css rm $TFILE done -ftp-upload --host padb.pittman.org.uk -u padb at pittman.co.uk --password $PASSWORD OpenMPI-padb-groups.patch +ftp-upload --passive --host padb.pittman.org.uk -u padb at pittman.co.uk --password $PASSWORD OpenMPI-padb-groups.patch #ftp-upload --host padb.pittman.org.uk -u padb at pittman.co.uk --password $PASSWORD --as extensions/OpenMPI-padb-groups.patch OpenMPI-padb-groups.patch echo All done. From padb at googlecode.com Sun Oct 24 02:11:01 2010 From: padb at googlecode.com (padb at googlecode.com) Date: Sun, 24 Oct 2010 01:11:01 +0000 Subject: [padb] r401 committed - Bump the version number of the rc Message-ID: <000e0cd2e0828d2c380493528c12@google.com> Revision: 401 Author: apittman at gmail.com Date: Sat Oct 23 18:06:44 2010 Log: Bump the version number of the rc http://code.google.com/p/padb/source/detail?r=401 Modified: /trunk/configure.in ======================================= --- /trunk/configure.in Mon Feb 15 10:24:12 2010 +++ /trunk/configure.in Sat Oct 23 18:06:44 2010 @@ -1,5 +1,5 @@ AC_INIT(src/padb) -AM_INIT_AUTOMAKE(padb,3.2-beta0) +AM_INIT_AUTOMAKE(padb,3.2-beta1) AC_PROG_CC AC_PROG_INSTALL AM_PROG_CC_C_O From padb at googlecode.com Sun Oct 24 02:15:01 2010 From: padb at googlecode.com (padb at googlecode.com) Date: Sun, 24 Oct 2010 01:15:01 +0000 Subject: [padb] r402 committed - Update the new for a 3.2 beta release. Message-ID: <00504502c43fdfacea0493529abb@google.com> Revision: 402 Author: apittman at gmail.com Date: Sat Oct 23 18:13:32 2010 Log: Update the new for a 3.2 beta release. http://code.google.com/p/padb/source/detail?r=402 Modified: /trunk/doc/index.html ======================================= --- /trunk/doc/index.html Mon Dec 21 15:11:50 2009 +++ /trunk/doc/index.html Sat Oct 23 18:13:32 2010 @@ -18,8 +18,11 @@

Recent News