From codesite-noreply at google.com Wed Jul 1 10:13:22 2009 From: codesite-noreply at google.com (codesite-noreply at google.com) Date: Wed, 01 Jul 2009 09:13:22 +0000 Subject: [padb-devel] [padb commit] r82 - Tidy up the *_is_installed functions, if a rmgr code needs Message-ID: <0016e644de68c307b0046da1559f@google.com> Author: apittman Date: Wed Jul 1 01:51:13 2009 New Revision: 82 Modified: branches/full-duplex/src/padb Log: Tidy up the *_is_installed functions, if a rmgr code needs more than one executable to run then test for the presence of all executables. Modified: branches/full-duplex/src/padb ============================================================================== --- branches/full-duplex/src/padb (original) +++ branches/full-duplex/src/padb Wed Jul 1 01:51:13 2009 @@ -2021,7 +2021,7 @@ } sub rms_is_installed { - return find_exe("prun"); + return ( find_exe("prun") and find_exe("rmsquery") ); } sub rms_get_jobs { @@ -2128,7 +2128,7 @@ ############################################################################### sub slurm_is_installed { - return find_exe("srun"); + return ( find_exe("srun") and find_exe("squeue") and find_exe("scontrol") ); } sub slurm_get_jobs { @@ -2266,7 +2266,7 @@ ############################################################################### sub mpd_is_installed { - return find_exe("mpdlistjobs"); + return ( find_exe("mpdlistjobs") and find_exe("mpdrun") ); } sub mpd_get_data { @@ -2362,7 +2362,7 @@ } sub open_is_installed { - return find_exe("ompi-ps"); + return ( find_exe("ompi-ps") and find_exe("orterun") ); } my %open_jobs; @@ -2475,9 +2475,7 @@ # Check for both LSF and RMS, I know LSF works in other ways but I don't # know how to launch jobs then... - my $rms = find_exe("rinfo"); - return 0 unless $rms; - return find_exe("bjobs"); + return ( find_exe("bjobs") and rms_is_installed() ); } sub lsf_get_jobs { From codesite-noreply at google.com Wed Jul 1 10:26:38 2009 From: codesite-noreply at google.com (codesite-noreply at google.com) Date: Wed, 01 Jul 2009 09:26:38 +0000 Subject: [padb-devel] [padb commit] r83 - Make the proc-summary option more configurable, add options for Message-ID: <001485f91c002bdb7d046da18538@google.com> Author: apittman Date: Wed Jul 1 02:26:01 2009 New Revision: 83 Modified: branches/full-duplex/src/padb Log: Make the proc-summary option more configurable, add options for reverse-sort-order and nprocs-output to fully control what is displayed to the user. Also allow columns to be sorted on column name as well as key. Modified: branches/full-duplex/src/padb ============================================================================== --- branches/full-duplex/src/padb (original) +++ branches/full-duplex/src/padb Wed Jul 1 02:26:01 2009 @@ -2753,11 +2753,15 @@ } sub sort_proc_hashes { - my $key = shift; - my @all = @_; + my $carg = shift; + my $key = shift; + my @all = @_; - #print Dumper $all; - return ( reverse( sort { $a->{$key} <=> $b->{$key} } @all ) ); + if ( $carg->{"reverse-sort-order"} ) { + return ( sort { $a->{$key} <=> $b->{$key} } @all ); + } else { + return ( reverse( sort { $a->{$key} <=> $b->{$key} } @all ) ); + } } sub pre_mpi_watch { @@ -2796,6 +2800,7 @@ my $show_fields = 0; my %proc_format_lengths; + my %proc_header_reverse; my $separator = $carg->{"column-seperator"}; @@ -2809,6 +2814,7 @@ push @proc_format_array, lc($name); $proc_format_header{ lc($name) } = $desc; $proc_format_lengths{ lc($name) } = length($desc); + $proc_header_reverse{ lc($desc) } = lc($name); } else { push @proc_format_array, lc($column); $proc_format_header{ lc($column) } = $column; @@ -2841,7 +2847,12 @@ push @all, \%hash; } - @all = sort_proc_hashes( $carg->{"proc-sort-key"}, @all ); + # Allow sort keys to be based on column names as well as real keys. + my $key = lc( $carg->{"proc-sort-key"} ); + if ( defined $proc_header_reverse{$key} ) { + $key = $proc_header_reverse{$key}; + } + @all = sort_proc_hashes( $carg, $key, @all ); if ( $carg->{"proc-show-header"} ) { my @res; @@ -2853,6 +2864,7 @@ my $line = join( $separator, @res ); print "$line\n"; } + my $count = $carg->{"nprocs-output"}; foreach my $hash (@all) { my @res; foreach my $key (@proc_format_array) { @@ -2864,6 +2876,9 @@ } my $line = join( $separator, @res ); print "$line\n"; + if ( $count and ( --$count == 0 ) ) { + return; + } } } @@ -6798,13 +6813,15 @@ 'arg_long' => 'proc-summary', 'help' => "Show process information in top format", 'options_i' => { - "column-seperator" => " ", - "proc-shows-proc" => 1, - "proc-shows-fds" => 0, - "proc-shows-maps" => 0, - "proc-shows-stat" => 1, - "proc-sort-key" => "vp", - "proc-show-header" => 1, + "column-seperator" => " ", + "proc-shows-proc" => 1, + "proc-shows-fds" => 0, + "proc-shows-maps" => 0, + "proc-shows-stat" => 1, + "proc-sort-key" => "vp", + "proc-show-header" => 1, + "reverse-sort-order" => 0, + "nprocs-output" => 0, }, 'secondary' => [ { From codesite-noreply at google.com Wed Jul 1 12:39:24 2009 From: codesite-noreply at google.com (codesite-noreply at google.com) Date: Wed, 01 Jul 2009 11:39:24 +0000 Subject: [padb-devel] [padb commit] r84 - Add load1, load5 and load15 outputs to proc-info and proc-format. Message-ID: <0016364ecdfcfa6ab5046da35fa6@google.com> Author: apittman Date: Wed Jul 1 02:45:51 2009 New Revision: 84 Modified: branches/full-duplex/src/padb Log: Add load1, load5 and load15 outputs to proc-info and proc-format. Modified: branches/full-duplex/src/padb ============================================================================== --- branches/full-duplex/src/padb (original) +++ branches/full-duplex/src/padb Wed Jul 1 02:45:51 2009 @@ -5450,6 +5450,7 @@ my @all; my $jiffies_start; + my $load_avg; if ( $carg->{"proc-shows-proc"} ) { foreach my $proc ( @{$list} ) { my $pid = $proc->{pid}; @@ -5474,6 +5475,9 @@ # End critical path. $jiffies_start = add_and_divide_jiffies( $stat, $stat2 ); + open( LFD, "/proc/loadavg" ); + $load_avg = ; + close LFD; } foreach my $proc ( @{$list} ) { @@ -5514,6 +5518,8 @@ my $elapsed = $jiffies_end - $jiffies_start; + my ( $l1, $l5, $l15 ) = split( " ", $load_avg ); + foreach my $proc ( @{$list} ) { my $vp = $proc->{vp}; my $jpre = stat_to_jiffies( $proc->{stat_start} ); @@ -5522,7 +5528,10 @@ my $used = ( $jused / $elapsed ) * $cpucount * 100; my $used_str = sprintf( "%d", $used ); - proc_output( $vp, "pcpu", $used_str ); + proc_output( $vp, "pcpu", $used_str ); + proc_output( $vp, "load1", $l1 ); + proc_output( $vp, "load5", $l15 ); + proc_output( $vp, "load15", $l15 ); } } From codesite-noreply at google.com Wed Jul 1 12:49:53 2009 From: codesite-noreply at google.com (codesite-noreply at google.com) Date: Wed, 01 Jul 2009 11:49:53 +0000 Subject: [padb-devel] [padb commit] r85 - Clean up verbose mode, show per-mode options with their current Message-ID: <0016361e8960834dd0046da385f3@google.com> Author: apittman Date: Wed Jul 1 03:06:16 2009 New Revision: 85 Modified: branches/full-duplex/src/padb Log: Clean up verbose mode, show per-mode options with their current values when verbose mode is enabled. Modified: branches/full-duplex/src/padb ============================================================================== --- branches/full-duplex/src/padb (original) +++ branches/full-duplex/src/padb Wed Jul 1 03:06:16 2009 @@ -3262,6 +3262,17 @@ } } + if ( $conf{verbose} and defined $req->{cargs} ) { + printf("Mode '$req->{mode}' mode specific flags:\n"); + foreach my $arg ( sort( keys( %{ $req->{cargs} } ) ) ) { + if ( defined $req->{cargs}{$arg} ) { + printf( "%20s : '%s'\n", $arg, $req->{cargs}{$arg} ); + } else { + printf( "%20s : undef\n", $arg ); + } + } + } + return $req; } @@ -3572,7 +3583,7 @@ $secret = find_padb_secret(); if ( not defined $secret ) { - printf("Error: No secret\n"); + printf("Error: Could not load secret file on this node\n"); exit(1); } @@ -3594,8 +3605,8 @@ my $ncpus = $res[1]; my $hosts = $res[2]; - $conf{"verbose"} && defined $ncpus && print "Job has $ncpus cpus\n"; - $conf{"verbose"} && defined $hosts && print "Job has $hosts hosts\n"; + $conf{"verbose"} && defined $ncpus && print "Job has $ncpus processe(s)\n"; + $conf{"verbose"} && defined $hosts && print "Job spans $hosts host(s)\n"; # Maybe do it this way, edb works best when run with the same LD_LIBRARY_PATH # as the application. It's very important when running the message queue @@ -3775,34 +3786,31 @@ # -Oedbopt="--pagesize=8192 --pagesize-header=4096" # -Oedbopt="--pagesize 8192 --pagesize-header 4096" foreach my $config_option (@config_options) { - my @pairs = split( ",", $config_option ); - foreach my $pair (@pairs) { - my ( $name, $val ) = split( "=", $pair ); + my ( $name, $val ) = split( "=", $config_option ); - # $name =~ s/\-/\_/g; + # $name =~ s/\-/\_/g; - if ( $name eq "scriptDir" ) { - printf( + if ( $name eq "scriptDir" ) { + printf( "$prog: -OscriptDir deprecated, use -Oedb=/path/to/edb instead\n" - ); - exit(1); - } + ); + exit(1); + } - if ( !exists $conf{$name} - and !exists $conf{mode_options_reverse}{$name} ) - { - printf("Error, unknown config option '$name'\n"); - config_help(); - exit(1); - } - if ( !defined $val ) { - printf("Error, config option '$name' requires value\n"); - config_help(); - exit(1); - } - config_set( $name, $val ); + if ( !exists $conf{$name} + and !exists $conf{mode_options_reverse}{$name} ) + { + printf("Error, unknown config option '$name'\n"); + config_help(); + exit(1); + } + if ( !defined $val ) { + printf("Error, config option '$name' requires value\n"); + config_help(); + exit(1); } + config_set( $name, $val ); } if ($list_rmgrs) { From codesite-noreply at google.com Wed Jul 1 12:53:59 2009 From: codesite-noreply at google.com (codesite-noreply at google.com) Date: Wed, 01 Jul 2009 11:53:59 +0000 Subject: [padb-devel] [padb commit] r86 - Re-instate orte support across multiple nodes, I accidentally disabled Message-ID: <0016e644b95027eb9b046da394aa@google.com> Author: apittman Date: Wed Jul 1 03:31:28 2009 New Revision: 86 Modified: branches/full-duplex/src/padb Log: Re-instate orte support across multiple nodes, I accidentally disabled this last week when changing the way it reads orte-ps Modified: branches/full-duplex/src/padb ============================================================================== --- branches/full-duplex/src/padb (original) +++ branches/full-duplex/src/padb Wed Jul 1 03:31:28 2009 @@ -2374,7 +2374,7 @@ return; } - my $hostname = hostname(); + my $job; open( OPEN, "ompi-ps|" ) or return; @@ -2408,16 +2408,14 @@ next if $host eq "Node"; $open_jobs{$job}{hosts}{$host}++; - if ( $host eq $hostname ) { - my $name = $elems[1]; - $name =~ /\[\[(\d+)\,(\d+)\]\,(\d+)\]/; - my $rank = $3; - - my $pid = $elems[3]; - $rank =~ s/ //g; - $pid =~ s/ //g; - $open_jobs{$job}{ranks}{$host}{$rank} = $pid; - } + my $name = $elems[1]; + $name =~ /\[\[(\d+)\,(\d+)\]\,(\d+)\]/; + my $rank = $3; + + my $pid = $elems[3]; + $rank =~ s/ //g; + $pid =~ s/ //g; + $open_jobs{$job}{ranks}{$host}{$rank} = $pid; } } From codesite-noreply at google.com Wed Jul 1 16:56:07 2009 From: codesite-noreply at google.com (codesite-noreply at google.com) Date: Wed, 01 Jul 2009 15:56:07 +0000 Subject: [padb-devel] [padb commit] r87 - Clean up the comms loop on the outer, give every fd a callback to Message-ID: <00163630f62b15ae00046da6f653@google.com> Author: apittman Date: Wed Jul 1 06:34:20 2009 New Revision: 87 Modified: branches/full-duplex/src/padb Log: Clean up the comms loop on the outer, give every fd a callback to handle output/eof or new connections. This cleans up the code and also means that sysread() is called instead of getline() on fd's from the inner process which prevents output going astray. Modified: branches/full-duplex/src/padb ============================================================================== --- branches/full-duplex/src/padb (original) +++ branches/full-duplex/src/padb Wed Jul 1 06:34:20 2009 @@ -3111,17 +3111,11 @@ # Do this to allow telnet sessions to work. $str =~ s/\r//g; - # printf("Testing $str\n"); - - if ( $str =~ /^([^\n]+)\n(.*)$/ ) { - - # printf("Calling callback for \"$1\"\n"); + # Allow multi-line output here, making sure we process each line. + while ( $str =~ /^([^\n]+)\n(.*)$/ ) { $sd->{line_cb}( $handle, $sd, $1 ); $sd->{str} = $2; - } else { - - # Likely just truncated input, wait for more to arrive. - # printf("No match ()\n"); + $str = $2; } return; @@ -3161,15 +3155,15 @@ $comm_data->{remote}{$td}{port}, $comm_data->{remote}{$td}{key} ); - $cdata->{active} = 1; - $cdata->{str} = ""; - $cdata->{line_cb} = \&command_from_inner; - $cdata->{eof_cb} = \&eof_from_inner; + $cdata->{active} = 1; + $cdata->{str} = ""; + $cdata->{fd_desc} = "child socket"; + $cdata->{line_cb} = \&command_from_inner; + $cdata->{eof_cb} = \&eof_from_fd; + $cdata->{event_cb} = \&handle_event_from_socket; $comm_data->{sockets}{ $cdata->{socket} } = $cdata; $comm_data->{sel}->add( $cdata->{socket} ); - - #print Dumper $comm_data; } sub issue_command_to_inner { @@ -3374,14 +3368,65 @@ } } -sub eof_from_inner { +sub inner_stdout_cb { + my ( $comm_data, $cdata, $line ) = @_; + my @words = split( " ", $line ); + if ( $#words == 3 and $words[0] eq "connect" ) { + + handle_signon( $comm_data, $words[1], $words[2], $words[3] ); + return; + } elsif ( $words[0] eq "debug" ) { + my $count = $comm_data->{sel}->count(); + print("There are $count sockets\n"); + return; + } + print("inner: $line"); +} + +sub inner_sterr_cb { + my ( $comm_data, $cdata, $line ) = @_; + print("einner: $line"); +} + +sub eof_from_fd { my ( $comm_data, $cdata ) = @_; if ( $comm_data->{state} ne "shutdown" ) { - printf("Unexpected EOF from child socket ($comm_data->{state})\n"); + printf("Unexpected EOF from $cdata->{fd_desc} ($comm_data->{state})\n"); } } +sub handle_event_from_socket { + my ( $comm_data, $h ) = @_; + my $cdata = $comm_data->{sockets}{$h}; + + my $data; + my $nb = sysread( $h, $data, 1024 ); + + if ( $nb == 0 ) { + if ( defined( $cdata->{eof_cb} ) ) { + $cdata->{eof_cb}( $comm_data, $cdata ); + } + $comm_data->{sel}->remove($h); + $h->close(); + } else { + $cdata->{str} .= $data; + extract_line( $comm_data, $cdata ); + } +} + +sub handle_event_from_port { + my ( $comm_data, $h ) = @_; + + my $new = $h->accept(); + $comm_data->{sel}->add($new); + my %cdata; + $cdata{str} = ""; + $cdata{line_cb} = \&hello_from_inner; + $cdata{event_cb} = \&handle_event_from_socket; + $comm_data->{sockets}{$new} = \%cdata; +} + sub go_parallel { my $jobid = shift; my $cmd = shift; @@ -3403,6 +3448,10 @@ my $hostname = hostname(); $cmd .= " --outer=$hostname:$port"; $sel->add($sl); + + my %cdata; + $cdata{event_cb} = \&handle_event_from_port; + $comm_data->{sockets}{$sl} = \%cdata; } my $errors = 0; @@ -3435,77 +3484,33 @@ $comm_data->{sel} = $sel; my $start = time(); + my %op; + $op{str} = ""; + $op{line_cb} = \&inner_stdout_cb; + $op{eof_cb} = \&eof_from_fd; + $op{fd_desc} = "Inner stdout"; + $op{event_cb} = \&handle_event_from_socket; + $comm_data->{sockets}{ $pcmd->{out} } = \%op; + + my %ep; + $ep{str} = ""; + $ep{line_cb} = \&inner_stderr_cb; + $ep{eof_cb} = \&eof_from_fd; + $ep{fd_desc} = "Inner stderr"; + $ep{event_cb} = \&handle_event_from_socket; + $comm_data->{sockets}{ $pcmd->{err} } = \%ep; + while ( $sel->count() > 1 ) { while ( my @live = $sel->can_read(5) ) { foreach my $h (@live) { - if ( $h eq $pcmd->{out} ) { - my $line = $h->getline(); - if ( not defined $line ) { - if ( not $comm_data->{state} eq "shutdown" ) { - printf("Warning, EOF from ofd\n"); - } - $sel->remove($h); - $h->close(); - next; - } - my @words = split( " ", $line ); - if ( $#words == 3 and $words[0] eq "connect" ) { - - handle_signon( $comm_data, $words[1], $words[2], - $words[3] ); - next; - } elsif ( $words[0] eq "debug" ) { - my $count = $sel->count(); - print("There are $count sockets\n"); - next; - } - print("inner: $line"); - } elsif ( $h eq $pcmd->{err} ) { - my $line = $h->getline(); - - if ( not defined $line ) { - if ( not $comm_data->{state} eq "shutdown" ) { - printf("Warning, EOF from efd\n"); - } - $sel->remove($h); - $h->close(); - next; - } - printf("einner:$line"); - } elsif ( defined $comm_data->{sockets}{$h} ) { + if ( defined $comm_data->{sockets}{$h} ) { my $cdata = $comm_data->{sockets}{$h}; - - my $data; - my $nb = sysread( $h, $data, 1024 ); - - if ( $nb == 0 ) { - if ( defined( $cdata->{eof_cb} ) ) { - $cdata->{eof_cb}( $comm_data, $cdata ); - } - $sel->remove($h); - $h->close(); - } else { - $cdata->{str} .= $data; - extract_line( $comm_data, $cdata ); - } - } elsif ( exists( $comm_data->{listen} ) - and $h eq $comm_data->{listen} ) - { - - # It's a new socket on our listening port. - my $new = $h->accept(); - $sel->add($new); - my %cdata; - $cdata{str} = ""; - $cdata{line_cb} = \&hello_from_inner; - - $comm_data->{sockets}{$new} = \%cdata; - + $cdata->{event_cb}( $comm_data, $h ); } else { printf("Responce from unknown fd $h\n"); exit(1); } - } #for... + } } my $t2 = time() - $start; my $count = $sel->count(); From codesite-noreply at google.com Thu Jul 2 11:26:50 2009 From: codesite-noreply at google.com (codesite-noreply at google.com) Date: Thu, 02 Jul 2009 10:26:50 +0000 Subject: [padb-devel] [padb commit] r89 - Add code to check all remote processes are discovered and are Message-ID: <0016e6409b344e9985046db67a22@google.com> Author: apittman Date: Thu Jul 2 02:33:22 2009 New Revision: 89 Modified: branches/full-duplex/src/padb Log: Add code to check all remote processes are discovered and are vaguely in the same state (executable name). It's enabled by default but can be controlled by setting "check-signon" to one of "none", "missing" or "all". Modified: branches/full-duplex/src/padb ============================================================================== --- branches/full-duplex/src/padb (original) +++ branches/full-duplex/src/padb Thu Jul 2 02:33:22 2009 @@ -371,6 +371,10 @@ $conf{"dump-raw"} = 0; $conf{"showcmd"} = 0; +# Valid values are "none" "missing" or "all". Anything not recognised +# is treated as "all". +$conf{"check-signon"} = "all"; + $conf{slurm_job_step} = "0"; # Output options. @@ -3268,6 +3272,50 @@ return $req; } +sub report_failed_signon { + my ( $key, $data ) = @_; + my %values; + foreach my $proc ( keys( %{$data} ) ) { + push( @{ $values{ $data->{$proc}{$key} } }, $proc ); + } + my %c; + $c{i} = length($key); + printf("$key : ranks\n"); + foreach my $value ( sort( keys(%values) ) ) { + printf( "%$c{i}s : %s\n", $value, compress( @{ $values{$value} } ) ); + } +} + +sub check_signon { + my ( $comm_data, $data ) = @_; + return if ( $conf{"check-signon"} eq "none" ); + my @missing; + my %target_state; + my %target_exe; + for ( my $proc = 0 ; $proc < $comm_data->{nprocesses} ; $proc++ ) { + if ( not defined $data->{target_responce}{$proc} ) { + push( @missing, $proc ); + next; + } + $target_exe{ $data->{target_responce}{$proc}{name} }++; + $target_state{ $data->{target_responce}{$proc}{state} }++; + } + if ( $#missing != -1 ) { + printf( "Warning, failed to locates ranks %s\n", compress(@missing) ); + } + return if ( $conf{"check-signon"} eq "missing" ); + my $exe_count = keys(%target_exe); + if ( $exe_count != 1 ) { + printf("Warning, remote process name differs across ranks\n"); + report_failed_signon( "name", $data->{target_responce} ); + } + my $state_count = keys(%target_state); + if ( $state_count != 1 ) { + printf("Warning, remote process state differs across ranks\n"); + report_failed_signon( "state", $data->{target_responce} ); + } +} + sub command_from_inner { my ( $comm_data, $cdata, $line ) = @_; @@ -3292,9 +3340,7 @@ $comm_data->{current_req} = next_command($comm_data); issue_command_to_inner( $cdata, $comm_data->{current_req} ); $comm_data->{state} = "live"; - - #XXX: Check all target_processes are here. - # print Dumper $d; + check_signon( $comm_data, $d ); return; } @@ -3342,7 +3388,7 @@ $comm_data->{remote}{$host}{key} = $key; $comm_data->{signons}++; - if ( $comm_data->{signons} == $comm_data->{hosts} ) { + if ( $comm_data->{signons} == $comm_data->{nhosts} ) { connect_to_children($comm_data); } } @@ -3359,7 +3405,7 @@ handle_signon( $comm_data, $words[2], $words[3], $words[4] ); - if ( $comm_data->{signons} == $comm_data->{hosts} ) { + if ( $comm_data->{signons} == $comm_data->{nhosts} ) { # Don't listen on this port any more; $comm_data->{sel}->remove( $comm_data->{listen} ); @@ -3383,7 +3429,7 @@ print("inner: $line\n"); } -sub inner_sterr_cb { +sub inner_stderr_cb { my ( $comm_data, $cdata, $line ) = @_; print("einner: $line\n"); } @@ -3428,10 +3474,10 @@ } sub go_parallel { - my $jobid = shift; - my $cmd = shift; - my $ncpus = shift; - my $hosts = shift; + my $jobid = shift; + my $cmd = shift; + my $nprocesses = shift; + my $nhosts = shift; my $comm_data; @@ -3470,10 +3516,11 @@ close $pcmd->{in}; - $comm_data->{hosts} = $hosts; - $comm_data->{cmd} = $cmd; - $comm_data->{jobid} = $jobid; - $comm_data->{signons} = 0; + $comm_data->{nhosts} = $nhosts; + $comm_data->{nprocesses} = $nprocesses; + $comm_data->{cmd} = $cmd; + $comm_data->{jobid} = $jobid; + $comm_data->{signons} = 0; # State, one of "connecting" "live" and "shutdown"; $comm_data->{state} = "connecting"; @@ -3517,8 +3564,8 @@ if ( $count > 0 ) { #printf("Still here, time:$t2 comm_count:$count\n"); - if ( $comm_data->{signons} != $comm_data->{hosts} ) { - my $missing = $comm_data->{hosts} - $comm_data->{signons}; + if ( $comm_data->{signons} != $comm_data->{nhosts} ) { + my $missing = $comm_data->{nhosts} - $comm_data->{signons}; print("Waiting for signon from $missing hosts.\n"); } } @@ -6435,7 +6482,6 @@ $netdata->{target_responce}{$vp}->{name} = $name; $netdata->{target_responce}{$vp}->{state} = $state; } - } # Receive a command (perl reference) from our parent. From codesite-noreply at google.com Thu Jul 2 12:45:16 2009 From: codesite-noreply at google.com (codesite-noreply at google.com) Date: Thu, 02 Jul 2009 11:45:16 +0000 Subject: [padb-devel] [padb commit] r88 - Change the default sort order in proc-summary, make mprocs-output Message-ID: <0016e644b950d5ec43046db79279@google.com> Author: apittman Date: Thu Jul 2 01:41:15 2009 New Revision: 88 Modified: branches/full-duplex/src/padb Log: Change the default sort order in proc-summary, make mprocs-output default to undef rather than 0. Modified: branches/full-duplex/src/padb ============================================================================== --- branches/full-duplex/src/padb (original) +++ branches/full-duplex/src/padb Thu Jul 2 01:41:15 2009 @@ -2756,9 +2756,9 @@ my @all = @_; if ( $carg->{"reverse-sort-order"} ) { - return ( sort { $a->{$key} <=> $b->{$key} } @all ); - } else { return ( reverse( sort { $a->{$key} <=> $b->{$key} } @all ) ); + } else { + return ( sort { $a->{$key} <=> $b->{$key} } @all ); } } @@ -2874,7 +2874,7 @@ } my $line = join( $separator, @res ); print "$line\n"; - if ( $count and ( --$count == 0 ) ) { + if ( defined($count) and ( --$count == 0 ) ) { return; } } @@ -3380,12 +3380,12 @@ print("There are $count sockets\n"); return; } - print("inner: $line"); + print("inner: $line\n"); } sub inner_sterr_cb { my ( $comm_data, $cdata, $line ) = @_; - print("einner: $line"); + print("einner: $line\n"); } sub eof_from_fd { @@ -6841,14 +6841,14 @@ "proc-sort-key" => "vp", "proc-show-header" => 1, "reverse-sort-order" => 0, - "nprocs-output" => 0, + "nprocs-output" => undef, }, 'secondary' => [ { 'arg_long' => 'proc-format', 'type' => 's', 'default' => -'vp=vpid,hostname,pid,vmsize,vmrss,stat.state=S,pcpu=%cpu,name=command' +'vp=vpid,hostname,pid,vmsize,vmrss,stat.state=S,load1=uptime,pcpu=%cpu,name=command' } ] From codesite-noreply at google.com Thu Jul 2 12:49:19 2009 From: codesite-noreply at google.com (codesite-noreply at google.com) Date: Thu, 02 Jul 2009 11:49:19 +0000 Subject: [padb-devel] [padb commit] r90 - When working in --proc-summary mode only send data back from the inner Message-ID: <0016e644b950491f9a046db7a189@google.com> Author: apittman Date: Thu Jul 2 03:44:23 2009 New Revision: 90 Modified: branches/full-duplex/src/padb Log: When working in --proc-summary mode only send data back from the inner processes to the outer if the outer is going to display it. This greatly reduces the amount of network traffic produced. Modified: branches/full-duplex/src/padb ============================================================================== --- branches/full-duplex/src/padb (original) +++ branches/full-duplex/src/padb Thu Jul 2 03:44:23 2009 @@ -3440,6 +3440,8 @@ if ( $comm_data->{state} ne "shutdown" ) { printf("Unexpected EOF from $cdata->{fd_desc} ($comm_data->{state})\n"); } + + #printf("Expected EOF from $cdata->{fd_desc} ($comm_data->{state})\n"); } sub handle_event_from_socket { @@ -5490,11 +5492,14 @@ } my $proc_info; +my %proc_keys; sub proc_output { my ( $vp, $key, $value ) = @_; if ( $confInner{mode} eq "proc-summary" ) { - $proc_info->{$vp}{ lc($key) } = $value; + if ( defined $proc_keys{ lc($key) } ) { + $proc_info->{$vp}{ lc($key) } = $value; + } } else { output( $vp, "$key: $value" ); } @@ -5502,6 +5507,16 @@ sub show_proc_all { my ( $carg, $list ) = @_; + + %proc_keys = (); + + if ( defined $carg->{"proc-format"} ) { + my @columns = split( ",", $carg->{"proc-format"} ); + foreach my $column (@columns) { + my ( $name, $desc ) = split( "=", $column ); + $proc_keys{ lc($name) } = 1; + } + } $proc_info = undef; From codesite-noreply at google.com Thu Jul 2 13:45:55 2009 From: codesite-noreply at google.com (codesite-noreply at google.com) Date: Thu, 02 Jul 2009 12:45:55 +0000 Subject: [padb-devel] [padb commit] r91 - Send the rank list (if supplied) over with individual requests rather than Message-ID: <0016e644de5ebd313b046db86b18@google.com> Author: apittman Date: Thu Jul 2 04:54:46 2009 New Revision: 91 Modified: branches/full-duplex/src/padb Log: Send the rank list (if supplied) over with individual requests rather than as part of the signon function. This is where it belongs really going forward and is needed for the initial signon tests to work properly. Modified: branches/full-duplex/src/padb ============================================================================== --- branches/full-duplex/src/padb (original) +++ branches/full-duplex/src/padb Thu Jul 2 04:54:46 2009 @@ -3195,10 +3195,6 @@ $req->{jobconfig}{jobid} = $comm_data->{jobid}; $req->{jobconfig}{rmgr} = $conf{rmgr}; - if ( $#ranks != -1 ) { - @{ $req->{ranks} } = @ranks; - } - if ( $conf{rmgr} eq "orte" ) { $req->{jobconfig}{"orte-data"} = $open_jobs{ $comm_data->{jobid} }; } @@ -3247,6 +3243,12 @@ $req->{cargs} = $cmd->{args}; } + # XXX: Should only send this list over if it makes sense, for example + # the deadlock code only works when targetting all ranks. + if ( $#ranks != -1 ) { + @{ $req->{ranks} } = @ranks; + } + if ( defined $cmd->{out_format} ) { $req->{out_format} = $cmd->{out_format}; } @@ -3301,7 +3303,7 @@ $target_state{ $data->{target_responce}{$proc}{state} }++; } if ( $#missing != -1 ) { - printf( "Warning, failed to locates ranks %s\n", compress(@missing) ); + printf( "Warning, failed to locate ranks %s\n", compress(@missing) ); } return if ( $conf{"check-signon"} eq "missing" ); my $exe_count = keys(%target_exe); @@ -6061,32 +6063,16 @@ return \%res; } -sub show_pid { +sub maybe_show_pid { my ( $vp, $pid ) = @_; - debug( $vp, "Looking at $vp, pid: $pid" ); + debug( $vp, "maybe_show_pid vp $vp, pid: $pid" ); my %d; - $d{pid} = $pid; $d{vp} = $vp; + $d{pid} = $pid; push( @{ $confInner{"all-pids"} }, \%d ); -} - -sub maybe_show_pid { - my ( $vp, $pid ) = @_; - - debug( $vp, "maybe_show_pid vp $vp, pid: $pid" ); - if ( $#ranks == -1 ) { - show_pid( $vp, $pid ); - } - - foreach my $rank (@ranks) { - if ( $rank eq $vp ) { - show_pid( $vp, $pid ); - return; - } - } } sub find_from_status { @@ -6477,15 +6463,6 @@ $confInner{"orte-data"} = $cmd->{jobconfig}{"orte-data"}; } -# Save the rank list if supplied, if there is no list then assume all, should probably -# be sent over as part of the request rather than the header ready for when padb -# can handle multiple commands over one run. -# XXX: We also need to handle the case where modes don't accept partial input, for example -# deadlock detect where this shouldn't be passed. - if ( exists( $cmd->{ranks} ) ) { - @ranks = @{ $cmd->{ranks} }; - } - # Query the resource manager to find the pids, they'll be added to the "all-pids" field. $rmgr{ $cmd->{jobconfig}{rmgr} }{find_pids}( $cmd->{jobconfig}{jobid} ); @@ -6565,14 +6542,31 @@ $confInner{mode} = $cmd->{mode}; + my $pid_list; + + # If supplied with a rank list then use it now to generate a list of + # processes to inspect. + if ( exists( $cmd->{ranks} ) ) { + my @ranks = @{ $cmd->{ranks} }; + foreach my $proc ( @{ $confInner{"all-pids"} } ) { + my $vp = $proc->{vp}; + my $pid = $proc->{pid}; + foreach my $rank (@ranks) { + if ( $vp == $rank ) { + push @{$pid_list}, $proc; + } + } + } + } else { + $pid_list = $confInner{"all-pids"}; + } + # Now do the work. my $res; if ( defined $allfns{ $cmd->{mode} }{handler_all} ) { - $res = - $allfns{ $cmd->{mode} }{handler_all}( $cmd->{cargs}, - $confInner{"all-pids"} ); + $res = $allfns{ $cmd->{mode} }{handler_all}( $cmd->{cargs}, $pid_list ); } else { - $res = default_handler_all( $cmd, $confInner{"all-pids"} ); + $res = default_handler_all( $cmd, $pid_list ); } $netdata->{target_responce} = $res; $netdata->{all_replys}{target_responce} = $res; From codesite-noreply at google.com Wed Jul 8 18:44:58 2009 From: codesite-noreply at google.com (codesite-noreply at google.com) Date: Wed, 08 Jul 2009 17:44:58 +0000 Subject: [padb-devel] [padb commit] r92 - Warn if a specific resource manager is seleceted but it Message-ID: <0016368e1e61425a22046e354c3c@google.com> Author: apittman Date: Wed Jul 8 10:41:26 2009 New Revision: 92 Modified: branches/full-duplex/src/padb Log: Warn if a specific resource manager is seleceted but it doesn't appear to be installed. The program will almost certainly bomb out but for now print a warning and then keep going. Modified: branches/full-duplex/src/padb ============================================================================== --- branches/full-duplex/src/padb (original) +++ branches/full-duplex/src/padb Wed Jul 8 10:41:26 2009 @@ -2557,6 +2557,11 @@ # Don't do any sanity checks here to cope with non-default installs. if ( defined $rmgr{ $conf{"rmgr"} } ) { + if ( not $rmgr{ $conf{"rmgr"} }{is_installed}() ) { + printf( +"Warning: Selected resource manager $conf{rmgr} does not appear to be installed\n" + ); + } setup_rmgr( $conf{"rmgr"} ); return; } @@ -2597,6 +2602,11 @@ # Don't do any sanity checks here to cope with non-default installs. if ( defined $rmgr{ $conf{"rmgr"} } ) { + if ( not $rmgr{ $conf{"rmgr"} }{is_installed}() ) { + printf( +"Warning: Selected resource manager $conf{rmgr} does not appear to be installed\n" + ); + } setup_rmgr( $conf{"rmgr"} ); return; } From codesite-noreply at google.com Wed Jul 8 20:03:35 2009 From: codesite-noreply at google.com (codesite-noreply at google.com) Date: Wed, 08 Jul 2009 19:03:35 +0000 Subject: [padb-devel] [padb commit] r93 - Parse multi-line output over sockets better, the previous Message-ID: <000e0cd1af8c63926e046e3665dd@google.com> Author: apittman Date: Wed Jul 8 12:02:34 2009 New Revision: 93 Modified: branches/full-duplex/src/padb Log: Parse multi-line output over sockets better, the previous regexp was loosing data if there was more than one line to be read at a time. Not a problem for most things as the comms protocol is line based but for debugging the inner processes it made it harder. Modified: branches/full-duplex/src/padb ============================================================================== --- branches/full-duplex/src/padb (original) +++ branches/full-duplex/src/padb Wed Jul 8 12:02:34 2009 @@ -3126,10 +3126,17 @@ $str =~ s/\r//g; # Allow multi-line output here, making sure we process each line. - while ( $str =~ /^([^\n]+)\n(.*)$/ ) { + while ( $str =~ /^([^\n]+)\n/ ) { $sd->{line_cb}( $handle, $sd, $1 ); - $sd->{str} = $2; - $str = $2; + my $len = length($1); + my $flen = length($str); + if ( ( $len + 1 ) != $flen ) { + $str = substr( $str, $len + 1, $flen - $len ); + } else { + $str = ""; + + } + $sd->{str} = $str; } return; From codesite-noreply at google.com Wed Jul 8 20:07:39 2009 From: codesite-noreply at google.com (codesite-noreply at google.com) Date: Wed, 08 Jul 2009 19:07:39 +0000 Subject: [padb-devel] [padb commit] r94 - Read in 64k chunks rather than paltry 1k chunks. Message-ID: <00163646d5f0f3167e046e36734f@google.com> Author: apittman Date: Wed Jul 8 12:05:30 2009 New Revision: 94 Modified: branches/full-duplex/src/padb Log: Read in 64k chunks rather than paltry 1k chunks. Modified: branches/full-duplex/src/padb ============================================================================== --- branches/full-duplex/src/padb (original) +++ branches/full-duplex/src/padb Wed Jul 8 12:05:30 2009 @@ -3468,7 +3468,7 @@ my $cdata = $comm_data->{sockets}{$h}; my $data; - my $nb = sysread( $h, $data, 1024 ); + my $nb = sysread( $h, $data, 65536 ); if ( $nb == 0 ) { if ( defined( $cdata->{eof_cb} ) ) { @@ -6735,7 +6735,7 @@ my $sinfo = $netdata->{connections}{$s}; my $d; - my $count = sysread( $s, $d, 1024 ); + my $count = sysread( $s, $d, 65536 ); # Dead connection. if ( not defined $d or $count eq 0 ) { From codesite-noreply at google.com Wed Jul 8 20:11:41 2009 From: codesite-noreply at google.com (codesite-noreply at google.com) Date: Wed, 08 Jul 2009 19:11:41 +0000 Subject: [padb-devel] [padb commit] r95 - Re-instate the group-id= option for deadlock detection, it now works Message-ID: <00163646c04a63e4ec046e36821d@google.com> Author: apittman Date: Wed Jul 8 12:10:24 2009 New Revision: 95 Modified: branches/full-duplex/src/padb Log: Re-instate the group-id= option for deadlock detection, it now works on strings rather than integers so works with MPI communicators as well as QsNet groups. Modified: branches/full-duplex/src/padb ============================================================================== --- branches/full-duplex/src/padb (original) +++ branches/full-duplex/src/padb Wed Jul 8 12:10:24 2009 @@ -588,7 +588,7 @@ "verbose|v+" => \$conf{verbose}, "user|u=s" => \$user, "rank|r=i" => \@ranks, - "group-id=i" => \@target_groups, + "group-id=s" => \@target_groups, "help|h" => \&usage, "all|a" => \$all, "any|A" => \$any, @@ -1518,11 +1518,11 @@ my %ad; - my @tg; + my %tg; if ( $#target_groups != -1 ) { foreach my $gid (@target_groups) { - $tg[$gid]++; + $tg{$gid}++; } } @@ -1533,7 +1533,7 @@ foreach my $gid ( keys %{ $dataset->{'subsystems'}{'Group'} } ) { if ( $#target_groups != -1 ) { - next unless defined $tg[$gid]; + next unless defined $tg{$gid}; } my $str; @@ -1584,7 +1584,7 @@ foreach my $gid ( sort { $a <=> $b } keys %ad ) { if ( $#target_groups != -1 ) { - next unless defined $tg[$gid]; + next unless defined $tg{$gid}; } my $gstr = "Information for group '$gid'\n"; @@ -4815,11 +4815,11 @@ my %ad; - my @tg; + my %tg; if ( $#target_groups != -1 ) { foreach my $gid (@target_groups) { - $tg[$gid]++; + $tg{$gid}++; } } @@ -4837,7 +4837,7 @@ } if ( $#target_groups != -1 ) { - next unless defined $tg[$gid]; + next unless defined $tg{$gid}; } if ( $gd->{size} > 0 ) { @@ -4865,7 +4865,7 @@ foreach my $gid ( sort keys %ad ) { if ( $#target_groups != -1 ) { - next unless defined $tg[$gid]; + next unless defined $tg{$gid}; } my $gstr = "Information for group '$gid' ($ad{$gid}{name})\n"; From codesite-noreply at google.com Thu Jul 9 02:08:10 2009 From: codesite-noreply at google.com (codesite-noreply at google.com) Date: Thu, 09 Jul 2009 01:08:10 +0000 Subject: [padb-devel] [padb commit] r96 - Add user and cpu time (as percentages) to the information gathered Message-ID: <001636e0b15943e94b046e3b7d99@google.com> Author: apittman Date: Wed Jul 8 18:01:48 2009 New Revision: 96 Modified: branches/full-duplex/src/padb Log: Add user and cpu time (as percentages) to the information gathered from /proc. Modified: branches/full-duplex/src/padb ============================================================================== --- branches/full-duplex/src/padb (original) +++ branches/full-duplex/src/padb Wed Jul 8 18:01:48 2009 @@ -5500,14 +5500,31 @@ return ( ( string_to_jiffies($pre) + string_to_jiffies($post) ) / 2 ); } -# Convert /proc/self/stat into used jiffies. -sub stat_to_jiffies { - my $stat = shift; - my @values = split( " ", $stat ); - my $jiffies = 0; - $jiffies += $values[13]; # utime - $jiffies += $values[14]; # stime - return $jiffies; +sub pcpu_user { + my ( $cpucount, $elapsed, $start, $end ) = @_; + my @pre = split( " ", $start ); + my @post = split( " ", $end ); + my $jused = $post[13] - $pre[13]; + my $used = ( $jused / $elapsed ) * $cpucount * 100; + return sprintf( "%d", $used ); +} + +sub pcpu_sys { + my ( $cpucount, $elapsed, $start, $end ) = @_; + my @pre = split( " ", $start ); + my @post = split( " ", $end ); + my $jused = $post[14] - $pre[14]; + my $used = ( $jused / $elapsed ) * $cpucount * 100; + return sprintf( "%d", $used ); +} + +sub pcpu_total { + my ( $cpucount, $elapsed, $start, $end ) = @_; + my @pre = split( " ", $start ); + my @post = split( " ", $end ); + my $jused = $post[13] - $pre[13] + $post[14] - $pre[14]; + my $used = ( $jused / $elapsed ) * $cpucount * 100; + return sprintf( "%d", $used ); } my $proc_info; @@ -5613,14 +5630,29 @@ my ( $l1, $l5, $l15 ) = split( " ", $load_avg ); foreach my $proc ( @{$list} ) { - my $vp = $proc->{vp}; - my $jpre = stat_to_jiffies( $proc->{stat_start} ); - my $jpost = stat_to_jiffies( $proc->{stat_end} ); - my $jused = $jpost - $jpre; - my $used = ( $jused / $elapsed ) * $cpucount * 100; - my $used_str = sprintf( "%d", $used ); + my $vp = $proc->{vp}; - proc_output( $vp, "pcpu", $used_str ); + proc_output( + $vp, "pcpu", + pcpu_total( + $cpucount, $elapsed, + $proc->{stat_start}, $proc->{stat_end} + ) + ); + proc_output( + $vp, "pucpu", + pcpu_user( + $cpucount, $elapsed, + $proc->{stat_start}, $proc->{stat_end} + ) + ); + proc_output( + $vp, "pscpu", + pcpu_sys( + $cpucount, $elapsed, + $proc->{stat_start}, $proc->{stat_end} + ) + ); proc_output( $vp, "load1", $l1 ); proc_output( $vp, "load5", $l15 ); proc_output( $vp, "load15", $l15 ); From codesite-noreply at google.com Thu Jul 9 21:49:13 2009 From: codesite-noreply at google.com (codesite-noreply at google.com) Date: Thu, 09 Jul 2009 20:49:13 +0000 Subject: [padb-devel] [padb] r97 commited - Move the --no-strip-below-main and --no-strip-above-wait options... Message-ID: <00163646d11a0adef6046e4bfd78@google.com> Revision: 97 Author: apittman Date: Thu Jul 9 13:48:44 2009 Log: Move the --no-strip-below-main and --no-strip-above-wait options from being global variables to mode options. http://code.google.com/p/padb/source/detail?r=97 Modified: /branches/full-duplex/src/padb ======================================= --- /branches/full-duplex/src/padb Wed Jul 8 18:01:48 2009 +++ /branches/full-duplex/src/padb Thu Jul 9 13:48:44 2009 @@ -569,9 +569,6 @@ my $compress_C; my $tree; -my $strip_below_main = 1; -my $strip_above_wait = 1; - my @config_options; my %ic_names; @@ -603,9 +600,7 @@ "core=s" => \$core_name, "exe=s" => \$exe_name, "list-rmgrs" => \$list_rmgrs, - "strip-below-main!" => \$strip_below_main, - "strip-above-wait!" => \$strip_above_wait, - "watch!" => \$watch, + "watch" => \$watch, "local-stats" => \$local_stats, "show-jobs" => \$show_jobs, "norc" => \$norc, @@ -2714,7 +2709,7 @@ ############################################################################### sub strip_stack_traces { - my $lines = shift; + my ( $cargs, $lines ) = @_; my %above; my %below; @@ -2747,10 +2742,11 @@ $main_idx = 0 if not defined $main_idx; if ( $main_idx != 0 or $wait_idx != 0 ) { my $end = - ( $strip_above_wait and $wait_idx ) + ( $cargs->{"strip-above-wait"} and $wait_idx ) ? $wait_idx : $#{ $lines->{$tag} }; - my $start = ( $strip_below_main and $main_idx ) ? $main_idx : 0; + my $start = + ( $cargs->{"strip-below-main"} and $main_idx ) ? $main_idx : 0; printf( "Stripping 0.." . $#{ $lines->{$tag} } @@ -2907,11 +2903,11 @@ return; } - if ( $mode eq "stack" or $input_file ) { - if ( $strip_below_main or $strip_above_wait ) { - strip_stack_traces($lines); - } - } + #if ( $mode eq "stack" or $input_file ) { + # if ( $strip_below_main or $strip_above_wait ) { + # strip_stack_traces(undef,$lines); + # } + #} if ($tree) { print show_tree go_p( 0, $lines, @@ -2984,6 +2980,8 @@ sub default_output_handler { my ( $req, $d ) = @_; + my $cargs = $req->{cargs}; + # Could warn on missing output here... my $lines = $d->{target_output}; @@ -2993,8 +2991,9 @@ $output = $req->{out_format} if defined $req->{out_format}; if ( $mode eq "stack" or $input_file ) { - if ( $strip_below_main or $strip_above_wait ) { - strip_stack_traces($lines); + if ( $cargs->{"strip-below-main"} or $cargs->{"strip-above-wait"} ) { + printf("Striping stack traces\n"); + strip_stack_traces( $cargs, $lines ); } } @@ -3270,10 +3269,13 @@ $req->{out_format} = $cmd->{out_format}; } - # Send along the secondary args. + # Send along the secondary args, taking care not to override any that + # are already defined. if ( defined $allfns{ $req->{mode} }{secondary} ) { foreach my $sec ( @{ $allfns{ $req->{mode} }{secondary} } ) { - $req->{cargs}{ $sec->{arg_long} } = $sec->{value}; + if ( not defined( $req->{cargs}{ $sec->{arg_long} } ) ) { + $req->{cargs}{ $sec->{arg_long} } = $sec->{value}; + } } } @@ -3966,9 +3968,6 @@ $c{"strip-above-wait"} = 0; push_command( "stack", "tree", \%c ); - # This option is still a global rather than being just a mode option. - $strip_above_wait = 0; - go_job($full_report); exit(0); } @@ -6846,7 +6845,7 @@ $res .= "|$arg->{arg_short}"; } if ( defined $arg->{type} ) { - $res .= "=$arg->{type}"; + $res .= $arg->{type}; } return $res; } @@ -6890,7 +6889,7 @@ 'secondary' => [ { 'arg_long' => 'signal', - 'type' => 's', + 'type' => '=s', 'default' => 'TERM' } ] @@ -6950,7 +6949,7 @@ 'secondary' => [ { 'arg_long' => 'proc-format', - 'type' => 's', + 'type' => '=s', 'default' => 'vp=vpid,hostname,pid,vmsize,vmrss,stat.state=S,load1=uptime,pcpu=%cpu,name=command' } @@ -6970,7 +6969,20 @@ "stack-strip-above" => "elan_waitWord,elan_pollWord,elan_deviceCheck,opal_condition_wait", "stack-strip-below" => "main", - } + }, + 'secondary' => [ + { + 'arg_long' => 'strip-below-main', + 'type' => '!', + 'default' => 1, + }, + { + 'arg_long' => 'strip-above-wait', + 'type' => '!', + 'default' => 1, + }, + ] + }; $allfns{stack_long} = { @@ -7001,7 +7013,7 @@ 'secondary' => [ { 'arg_long' => 'dflag', - 'type' => 's', + 'type' => '=s', 'default' => '0' } ] From codesite-noreply at google.com Thu Jul 9 22:10:22 2009 From: codesite-noreply at google.com (codesite-noreply at google.com) Date: Thu, 09 Jul 2009 21:10:22 +0000 Subject: [padb-devel] [padb] r98 commited - Whoop, remove a printf which had slipped in. Message-ID: <0016e64b9d08ae30ad046e4c48f8@google.com> Revision: 98 Author: apittman Date: Thu Jul 9 14:10:01 2009 Log: Whoop, remove a printf which had slipped in. http://code.google.com/p/padb/source/detail?r=98 Modified: /branches/full-duplex/src/padb ======================================= --- /branches/full-duplex/src/padb Thu Jul 9 13:48:44 2009 +++ /branches/full-duplex/src/padb Thu Jul 9 14:10:01 2009 @@ -2992,7 +2992,6 @@ if ( $mode eq "stack" or $input_file ) { if ( $cargs->{"strip-below-main"} or $cargs->{"strip-above-wait"} ) { - printf("Striping stack traces\n"); strip_stack_traces( $cargs, $lines ); } } From codesite-noreply at google.com Wed Jul 15 17:03:59 2009 From: codesite-noreply at google.com (codesite-noreply at google.com) Date: Wed, 15 Jul 2009 16:03:59 +0000 Subject: [padb-devel] [padb commit] r99 - Add a extra newline to the help so it's clearer what goes with what. Message-ID: <001636e0aedb025a8b046ec0b4b3@google.com> Author: apittman Date: Wed Jul 15 09:02:50 2009 New Revision: 99 Modified: branches/full-duplex/src/padb Log: Add a extra newline to the help so it's clearer what goes with what. Modified: branches/full-duplex/src/padb ============================================================================== --- branches/full-duplex/src/padb (original) +++ branches/full-duplex/src/padb Wed Jul 15 09:02:50 2009 @@ -3814,7 +3814,7 @@ } foreach my $mode ( sort( keys( %{ $conf{mode_options} } ) ) ) { - printf("Options for mode '$mode'\n"); + printf("\nOptions for mode '$allfns{$mode}{arg_long}'\n"); foreach my $key ( sort( keys( %{ $conf{mode_options}{$mode} } ) ) ) { if ( defined $conf{mode_options}{$mode}{$key} ) { printf( From codesite-noreply at google.com Wed Jul 15 17:08:05 2009 From: codesite-noreply at google.com (codesite-noreply at google.com) Date: Wed, 15 Jul 2009 16:08:05 +0000 Subject: [padb-devel] [padb commit] r100 - Add an explicit return so this runction returns undef, otherwise Message-ID: <00163645923cb08e5a046ec0c284@google.com> Author: apittman Date: Wed Jul 15 09:04:04 2009 New Revision: 100 Modified: branches/full-duplex/src/padb Log: Add an explicit return so this runction returns undef, otherwise we return the result of the signal call which gets passed back up the tree only to be ignored at a later stage. Modified: branches/full-duplex/src/padb ============================================================================== --- branches/full-duplex/src/padb (original) +++ branches/full-duplex/src/padb Wed Jul 15 09:04:04 2009 @@ -5845,6 +5845,7 @@ my ( $cargs, $vp, $pid ) = @_; my $signal = uc( $cargs->{signal} ); kill( $signal, $pid ); + return; } sub show_queue { From codesite-noreply at google.com Wed Jul 15 18:21:04 2009 From: codesite-noreply at google.com (codesite-noreply at google.com) Date: Wed, 15 Jul 2009 17:21:04 +0000 Subject: [padb-devel] [padb commit] r101 - Combine the target_output and target_responce in the Message-ID: <001636457a9aae7700046ec1c725@google.com> Author: apittman Date: Wed Jul 15 10:20:25 2009 New Revision: 101 Modified: branches/full-duplex/src/padb Log: Combine the target_output and target_responce in the reply_from_child() funtion making sure we pass all responces back up the tree to where they belong. Modified: branches/full-duplex/src/padb ============================================================================== --- branches/full-duplex/src/padb (original) +++ branches/full-duplex/src/padb Wed Jul 15 10:20:25 2009 @@ -4760,6 +4760,7 @@ gdb_detach($gdb); gdb_quit($gdb); } + return; } # Ideally handle all this at a higher level... @@ -5660,6 +5661,7 @@ if ( $confInner{mode} eq "proc-summary" ) { return $proc_info; } + return; } sub show_proc { @@ -5839,6 +5841,7 @@ } } } + return; } sub kill_proc { @@ -5938,6 +5941,7 @@ } unlink($file); + return; } sub set_debug { @@ -6421,6 +6425,7 @@ run_command( undef, "$confInner{edb} --stats-raw --parallel --key=$key $confInner{edbopt}" ); + return; } sub default_handler_all { @@ -6471,17 +6476,30 @@ } } + # Combine the target process responces. + if ( exists $r->{target_output} ) { + foreach my $tp ( keys( %{ $r->{target_output} } ) ) { + $handle->{all_replys}->{target_output}{$tp} = + $r->{target_output}{$tp}; + } + } + + if ( exists $handle->{target_responce} ) { + foreach my $tp ( keys( %{ $handle->{target_responce} } ) ) { + $handle->{all_replys}->{target_responce}{$tp} = + $handle->{target_responce}{$tp}; + } + } + # Save any output we've got from this node. foreach my $key ( keys(%inner_output) ) { - $r->{target_output}{$key} = $inner_output{$key}; + $handle->{all_replys}->{target_output}{$key} = $inner_output{$key}; } %inner_output = (); - # If this is the last reply from a child then report upstream. - # print Dumper $handle; + # If this isn't the last child to signon don't reply up-stream yet. if ( $handle->{child_replys} != $handle->{children} ) { - my $missing = $handle->{children} - $handle->{child_replys}; return; } From codesite-noreply at google.com Wed Jul 15 18:40:11 2009 From: codesite-noreply at google.com (codesite-noreply at google.com) Date: Wed, 15 Jul 2009 17:40:11 +0000 Subject: [padb-devel] [padb commit] r102 - Handle the case where there is no reply from individual processes, Message-ID: <001636e0b5e30629eb046ec20c5a@google.com> Author: apittman Date: Wed Jul 15 10:38:54 2009 New Revision: 102 Modified: branches/full-duplex/src/padb Log: Handle the case where there is no reply from individual processes, don't pass undef back up the tree, instead return nothing. Modified: branches/full-duplex/src/padb ============================================================================== --- branches/full-duplex/src/padb (original) +++ branches/full-duplex/src/padb Wed Jul 15 10:38:54 2009 @@ -6434,9 +6434,11 @@ foreach my $proc ( @{$list} ) { my $vp = $proc->{vp}; my $pid = $proc->{pid}; - $gres{$vp} = + my $res = $allfns{ $cmd->{mode} }{handler}( $cmd->{cargs}, $vp, $pid ); + $gres{$vp} = $res if ( defined $res ); } + return if not %gres; return \%gres; } @@ -6634,9 +6636,11 @@ } else { $res = default_handler_all( $cmd, $pid_list ); } - $netdata->{target_responce} = $res; - $netdata->{all_replys}{target_responce} = $res; - + if ( $res ) { + $netdata->{target_responce} = $res; + $netdata->{all_replys}{target_responce} = $res; + } + return; } From codesite-noreply at google.com Wed Jul 15 19:17:45 2009 From: codesite-noreply at google.com (codesite-noreply at google.com) Date: Wed, 15 Jul 2009 18:17:45 +0000 Subject: [padb-devel] [padb commit] r103 - Include the last cpu executed on in the default proc-summary output. Message-ID: <0016364c63755db3e4046ec292f5@google.com> Author: apittman Date: Wed Jul 15 11:17:03 2009 New Revision: 103 Modified: branches/full-duplex/src/padb Log: Include the last cpu executed on in the default proc-summary output. Modified: branches/full-duplex/src/padb ============================================================================== --- branches/full-duplex/src/padb (original) +++ branches/full-duplex/src/padb Wed Jul 15 11:17:03 2009 @@ -6973,7 +6973,7 @@ 'arg_long' => 'proc-format', 'type' => '=s', 'default' => -'vp=vpid,hostname,pid,vmsize,vmrss,stat.state=S,load1=uptime,pcpu=%cpu,name=command' +'vp=vpid,hostname,pid,vmsize,vmrss,stat.state=S,load1=uptime,pcpu=%cpu,stat.processor=lcorename=command' } ] From codesite-noreply at google.com Wed Jul 15 19:21:46 2009 From: codesite-noreply at google.com (codesite-noreply at google.com) Date: Wed, 15 Jul 2009 18:21:46 +0000 Subject: [padb-devel] [padb commit] r104 - Fix typo in previous commit. Message-ID: <00163645825abf484e046ec2a068@google.com> Author: apittman Date: Wed Jul 15 11:18:36 2009 New Revision: 104 Modified: branches/full-duplex/src/padb Log: Fix typo in previous commit. Modified: branches/full-duplex/src/padb ============================================================================== --- branches/full-duplex/src/padb (original) +++ branches/full-duplex/src/padb Wed Jul 15 11:18:36 2009 @@ -6973,7 +6973,7 @@ 'arg_long' => 'proc-format', 'type' => '=s', 'default' => -'vp=vpid,hostname,pid,vmsize,vmrss,stat.state=S,load1=uptime,pcpu=%cpu,stat.processor=lcorename=command' +'vp=vpid,hostname,pid,vmsize,vmrss,stat.state=S,load1=uptime,pcpu=%cpu,stat.processor=lcore,name=command' } ] From codesite-noreply at google.com Wed Jul 15 19:27:54 2009 From: codesite-noreply at google.com (codesite-noreply at google.com) Date: Wed, 15 Jul 2009 18:27:54 +0000 Subject: [padb-devel] [padb commit] r105 - Return undef from show_queue to prevent unnecessairy data Message-ID: <000e0cd17e78b2b552046ec2b653@google.com> Author: apittman Date: Wed Jul 15 11:27:28 2009 New Revision: 105 Modified: branches/full-duplex/src/padb Log: Return undef from show_queue to prevent unnecessairy data being passed back up the tree. Modified: branches/full-duplex/src/padb ============================================================================== --- branches/full-duplex/src/padb (original) +++ branches/full-duplex/src/padb Wed Jul 15 11:27:28 2009 @@ -5868,6 +5868,7 @@ return if ( $lines != 0 ); show_mpi_queue( $carg, $vp, $pid ); + return; } sub show_clever_full_stack { From codesite-noreply at google.com Wed Jul 15 20:55:36 2009 From: codesite-noreply at google.com (codesite-noreply at google.com) Date: Wed, 15 Jul 2009 19:55:36 +0000 Subject: [padb-devel] [padb commit] r106 - Run through perltidy to indent r102 nicely. Message-ID: <001636e9116b5338fe046ec3f021@google.com> Author: apittman Date: Wed Jul 15 12:54:33 2009 New Revision: 106 Modified: branches/full-duplex/src/padb Log: Run through perltidy to indent r102 nicely. Modified: branches/full-duplex/src/padb ============================================================================== --- branches/full-duplex/src/padb (original) +++ branches/full-duplex/src/padb Wed Jul 15 12:54:33 2009 @@ -6435,9 +6435,8 @@ foreach my $proc ( @{$list} ) { my $vp = $proc->{vp}; my $pid = $proc->{pid}; - my $res = - $allfns{ $cmd->{mode} }{handler}( $cmd->{cargs}, $vp, $pid ); - $gres{$vp} = $res if ( defined $res ); + my $res = $allfns{ $cmd->{mode} }{handler}( $cmd->{cargs}, $vp, $pid ); + $gres{$vp} = $res if ( defined $res ); } return if not %gres; return \%gres; @@ -6637,11 +6636,11 @@ } else { $res = default_handler_all( $cmd, $pid_list ); } - if ( $res ) { - $netdata->{target_responce} = $res; - $netdata->{all_replys}{target_responce} = $res; + if ($res) { + $netdata->{target_responce} = $res; + $netdata->{all_replys}{target_responce} = $res; } - + return; } From codesite-noreply at google.com Wed Jul 15 21:00:39 2009 From: codesite-noreply at google.com (codesite-noreply at google.com) Date: Wed, 15 Jul 2009 20:00:39 +0000 Subject: [padb-devel] [padb commit] r107 - Tidy up the error messages when the inner command dies unexpectadly, Message-ID: <001636456fe65b25d5046ec402b8@google.com> Author: apittman Date: Wed Jul 15 13:00:25 2009 New Revision: 107 Modified: branches/full-duplex/src/padb Log: Tidy up the error messages when the inner command dies unexpectadly, firstly print a warning if we didn't expect it to exit and also print more meaningfull information when there is an error. Modified: branches/full-duplex/src/padb ============================================================================== --- branches/full-duplex/src/padb (original) +++ branches/full-duplex/src/padb Wed Jul 15 13:00:25 2009 @@ -3522,10 +3522,6 @@ $comm_data->{sockets}{$sl} = \%cdata; } - my $errors = 0; - - my $report_errors = 1; - my $pcmd = { pid => -1, in => "", @@ -3596,15 +3592,20 @@ waitpid( $pcmd->{pid}, 0 ); my $res = $?; - printf("result from parallel command was $res\n") + if ( $comm_data->{state} ne "shutdown" ) { + printf( +"Unexpected exit from parallel command (state=$comm_data->{state})\n" + ); + } + printf("result from parallel command was $res ($comm_data->{state})\n") if ( $conf{"verbose"} ); if ( $res != 0 ) { my %status = rc_status($res); if ( job_is_running($jobid) ) { - if ($report_errors) { - printf("Failed to run parallel command (rc = $status{rc})\n"); - } + printf( + "Bad exit code from parallel command (exit_code=$status{rc})\n" + ); } else { printf("Job $jobid is no longer active\n"); return 1; @@ -4675,7 +4676,7 @@ kill( "CONT", $pid ); my $p = gdb_attach( $g, $pid ); if ( !$p ) { - debug( $vp, "Failed to attach to $pid\n" ); + debug( $vp, "Failed to attach to process\n" ); return; } From codesite-noreply at google.com Wed Jul 15 21:18:48 2009 From: codesite-noreply at google.com (codesite-noreply at google.com) Date: Wed, 15 Jul 2009 20:18:48 +0000 Subject: [padb-devel] [padb commit] r108 - Check that the attach returned anything before checking Message-ID: <001636e909e94bc55a046ec4436d@google.com> Author: apittman Date: Wed Jul 15 13:18:14 2009 New Revision: 108 Modified: branches/full-duplex/src/padb Log: Check that the attach returned anything before checking that it returned an error, fixes issue #2 Modified: branches/full-duplex/src/padb ============================================================================== --- branches/full-duplex/src/padb (original) +++ branches/full-duplex/src/padb Wed Jul 15 13:18:14 2009 @@ -4198,6 +4198,7 @@ my $result = gdb_send( $gdb, "attach $pid" ); + return unless defined $result; return if ( $result eq "error" ); $gdb->{attached} = 1; From codesite-noreply at google.com Wed Jul 15 22:41:00 2009 From: codesite-noreply at google.com (codesite-noreply at google.com) Date: Wed, 15 Jul 2009 21:41:00 +0000 Subject: [padb-devel] [padb commit] r109 - Add the number of threads to the process information. Message-ID: <000e0cd14eee3e9ce1046ec56969@google.com> Author: apittman Date: Wed Jul 15 14:39:54 2009 New Revision: 109 Modified: branches/full-duplex/src/padb Log: Add the number of threads to the process information. Modified: branches/full-duplex/src/padb ============================================================================== --- branches/full-duplex/src/padb (original) +++ branches/full-duplex/src/padb Wed Jul 15 14:39:54 2009 @@ -3597,7 +3597,7 @@ "Unexpected exit from parallel command (state=$comm_data->{state})\n" ); } - printf("result from parallel command was $res ($comm_data->{state})\n") + printf("result from parallel command is $res (state=$comm_data->{state})\n") if ( $conf{"verbose"} ); if ( $res != 0 ) { @@ -5675,6 +5675,8 @@ if ( -d "/proc/$pid/task" and $carg->{"proc-shows-proc"} ) { + my $threads = 0; + # 2.6 kernel. (ntpl) opendir( DIR, "/proc/$pid/task" ); my @tasks = readdir(DIR); @@ -5683,7 +5685,9 @@ next if ( $task eq "." ); next if ( $task eq ".." ); show_task_dir( $carg, $vp, $pid, "/proc/$pid/task/$task" ); + $threads++; } + proc_output( $vp, "threads", $threads ); } else { show_task_dir( $carg, $vp, $pid, "/proc/$pid" ); } From codesite-noreply at google.com Fri Jul 17 16:11:44 2009 From: codesite-noreply at google.com (codesite-noreply at google.com) Date: Fri, 17 Jul 2009 15:11:44 +0000 Subject: [padb-devel] [padb commit] r110 - Read not just the name and value for variables but also Message-ID: <00163645923cd80be4046ee834c3@google.com> Author: apittman Date: Fri Jul 17 08:11:13 2009 New Revision: 110 Modified: branches/full-duplex/src/padb Log: Read not just the name and value for variables but also the type. Tidy up the output slightly to put it into a (per function) table so it's easy to read. Move the paramaters from in-line in the function name to be in a similar table to the locals. Modified: branches/full-duplex/src/padb ============================================================================== --- branches/full-duplex/src/padb (original) +++ branches/full-duplex/src/padb Fri Jul 17 08:11:13 2009 @@ -4999,9 +4999,16 @@ print $r; } -# For reference the other interesting options here are these two. -# "-stack-list-arguments 1" -# "-stack-list-locals 2" +sub gdb_read_value { + my ( $gdb, $name ) = @_; + my %t = gdb_n_send( $gdb, "-data-evaluate-expression $name" ); + if ( $t{status} eq "done" ) { + my $v = gdb_parse_reason( $t{reason} ); + return $v->{value}; + } + return; +} + sub gdb_dump_frames { my ( $gdb, $detail ) = @_; my %result = gdb_n_send( $gdb, "-stack-list-frames" ); @@ -5012,31 +5019,33 @@ if ( defined $detail ) { foreach my $frame ( @{ $data->{stack} } ) { my %r = gdb_n_send( $gdb, - "-stack-list-arguments 0 $frame->{level} $frame->{level}" ); + "-stack-list-arguments 2 $frame->{level} $frame->{level}" ); my $args = gdb_parse_reason( $r{reason}, "name" ); - my @all; if ( defined $args->{"stack-args"}[0]{frame}{args} ) { my @names = @{ $args->{"stack-args"}[0]{frame}{args} }; @{ $frame->{params} } = @names; - push @all, (@names); } gdb_send( $gdb, "-stack-select-frame $frame->{level}" ); - my %s = gdb_n_send( $gdb, "-stack-list-locals 0" ); + my %s = gdb_n_send( $gdb, "-stack-list-locals --simple-values" ); if ( $s{status} eq "done" ) { my $args = gdb_parse_reason( $s{reason}, "name" ); if ( defined $args->{locals} ) { - @{ $frame->{locals} } = @{ $args->{locals} }; - push @all, ( @{ $args->{locals} } ); - } - } - foreach my $name (@all) { - my %t = gdb_n_send( $gdb, "-data-evaluate-expression $name" ); - if ( $t{status} eq "done" ) { - my $v = gdb_parse_reason( $t{reason} ); - $frame->{vals}{$name} = $v->{value}; + # Some variables don't show up a value from list-locals, + # __FUNCION__ and array pointers are two examples. For + # vars where the value isn't given automatically read + # the value of them directly. + foreach my $arg ( @{ $args->{locals} } ) { + next if defined $arg->{value}; + + my $value = gdb_read_value( $gdb, $arg->{name} ); + if ( defined $value ) { + $arg->{value} = $value; + } + } + $frame->{locals} = $args->{locals}; } } } @@ -5709,6 +5718,26 @@ return $nvp; } +sub show_vars { + my ( $vp, $frame, $type ) = @_; + my %l; + $l{t} = 0; + $l{n} = 0; + return if ( @{ $frame->{$type} } == 0 ); + foreach my $arg ( @{ $frame->{$type} } ) { + $l{t} = length( $arg->{type} ) if ( length( $arg->{type} ) > $l{t} ); + $l{n} = length( $arg->{name} ) if ( length( $arg->{name} ) > $l{n} ); + } + my $header = sprintf(" $type:"); + output( $vp, $header ); + foreach my $arg ( @{ $frame->{$type} } ) { + my $value = ( defined $arg->{value} ? $arg->{value} : "??" ); + my $output = + sprintf( " %-$l{t}s %$l{n}s = $value", $arg->{type}, $arg->{name} ); + output( $vp, $output ); + } +} + # Try and be clever here, attach to each and every process on this node first, # then go back and query them each in turn, should mean that some processes are # not spinning whilst gdb is doing it's thing which will mean a quicker runtime @@ -5814,34 +5843,16 @@ next unless exists $$frame{level}; next unless exists $$frame{func}; + output( $vp, + ( $$frame{func} || "?" ) + . "() at " + . ( $$frame{file} || "?" ) . ":" + . ( $$frame{line} || "?" ) ); if ( $carg->{"stack-shows-params"} ) { - my @a; - foreach my $arg ( @{ $frame->{params} } ) { - if ( defined $frame->{vals}{$arg} ) { - push( @a, "$arg = $frame->{vals}{$arg}" ); - } else { - push( @a, "$arg = ??" ); - } - } - my $a = join( ", ", @a ); - my $file = $frame->{file} || "?"; - my $line = $frame->{line} || "?"; - output( $vp, "$frame->{func}($a) at $file:$line" ); - } else { - output( $vp, - ( $$frame{func} || "?" ) - . "() at " - . ( $$frame{file} || "?" ) . ":" - . ( $$frame{line} || "?" ) ); + show_vars( $vp, $frame, "params" ); } if ( $carg->{"stack-shows-locals"} ) { - foreach my $arg ( @{ $frame->{locals} } ) { - if ( defined $frame->{vals}{$arg} ) { - output( $vp, " $arg = $frame->{vals}{$arg}" ); - } else { - output( $vp, " $arg = ??" ); - } - } + show_vars( $vp, $frame, "locals" ); } } From codesite-noreply at google.com Fri Jul 17 17:07:12 2009 From: codesite-noreply at google.com (codesite-noreply at google.com) Date: Fri, 17 Jul 2009 16:07:12 +0000 Subject: [padb-devel] [padb commit] r111 - Add but don't enable code for following pointers when found Message-ID: <001636e1fcf72e8686046ee8fb73@google.com> Author: apittman Date: Fri Jul 17 09:06:30 2009 New Revision: 111 Modified: branches/full-duplex/src/padb Log: Add but don't enable code for following pointers when found in stack traces. This can result is lots of data so it probably needs to be filtered down somehow, perhaps being enabled on a per type/variable/struct entry name? Modified: branches/full-duplex/src/padb ============================================================================== --- branches/full-duplex/src/padb (original) +++ branches/full-duplex/src/padb Fri Jul 17 09:06:30 2009 @@ -5001,7 +5001,8 @@ sub gdb_read_value { my ( $gdb, $name ) = @_; - my %t = gdb_n_send( $gdb, "-data-evaluate-expression $name" ); + # Quote the request in case it contains spaces. + my %t = gdb_n_send( $gdb, "-data-evaluate-expression \"$name\"" ); if ( $t{status} eq "done" ) { my $v = gdb_parse_reason( $t{reason} ); return $v->{value}; @@ -5009,6 +5010,36 @@ return; } +sub gdb_expand_vars { + my ( $gdb, $frame, $type ) = @_; + + foreach my $arg ( @{ $frame->{$type} } ) { + + # Detect simple pointers and deferefence then to show the underlying + # struct. Works quite well but is a problem with very large or complex + # data structures. More work is required to make this feature viable so + # leave it disabled for now. Perhaps have an option for enabling it + # in a per-type basis? + + if ( ( $arg->{type} =~ m/ \*$/ ) and $arg->{value} ne "0x0" and 0 ) { + my $value = gdb_read_value( $gdb, "* $arg->{name}" ); + if ( defined $value ) { + $arg->{value} .= " ($value)"; + } + } + + # Some variables don't show up a value from list-locals, + # __FUNCION__ and array pointers are two examples. For + # vars where the value isn't given automatically read + # the value of them directly. + next if defined $arg->{value}; + my $value = gdb_read_value( $gdb, $arg->{name} ); + if ( defined $value ) { + $arg->{value} = $value; + } + } +} + sub gdb_dump_frames { my ( $gdb, $detail ) = @_; my %result = gdb_n_send( $gdb, "-stack-list-frames" ); @@ -5018,6 +5049,8 @@ } if ( defined $detail ) { foreach my $frame ( @{ $data->{stack} } ) { + gdb_send( $gdb, "-stack-select-frame $frame->{level}" ); + my %r = gdb_n_send( $gdb, "-stack-list-arguments 2 $frame->{level} $frame->{level}" ); my $args = gdb_parse_reason( $r{reason}, "name" ); @@ -5025,27 +5058,16 @@ if ( defined $args->{"stack-args"}[0]{frame}{args} ) { my @names = @{ $args->{"stack-args"}[0]{frame}{args} }; @{ $frame->{params} } = @names; + + gdb_expand_vars( $gdb, $frame, "params" ); } - gdb_send( $gdb, "-stack-select-frame $frame->{level}" ); my %s = gdb_n_send( $gdb, "-stack-list-locals --simple-values" ); if ( $s{status} eq "done" ) { my $args = gdb_parse_reason( $s{reason}, "name" ); if ( defined $args->{locals} ) { - - # Some variables don't show up a value from list-locals, - # __FUNCION__ and array pointers are two examples. For - # vars where the value isn't given automatically read - # the value of them directly. - foreach my $arg ( @{ $args->{locals} } ) { - next if defined $arg->{value}; - - my $value = gdb_read_value( $gdb, $arg->{name} ); - if ( defined $value ) { - $arg->{value} = $value; - } - } $frame->{locals} = $args->{locals}; + gdb_expand_vars( $gdb, $frame, "locals" ); } } } From codesite-noreply at google.com Fri Jul 17 18:10:42 2009 From: codesite-noreply at google.com (codesite-noreply at google.com) Date: Fri, 17 Jul 2009 17:10:42 +0000 Subject: [padb-devel] [padb commit] r112 - Collect together all the code which sends SIGCONT to a process Message-ID: <0016e64af60c45358a046ee9def1@google.com> Author: apittman Date: Fri Jul 17 10:09:46 2009 New Revision: 112 Modified: branches/full-duplex/src/padb Log: Collect together all the code which sends SIGCONT to a process before attaching into a send_cont_signal() function. Call this function inside gdb_attach and gdb_detach to ensure it always gets called. We should probably read the process state and check if it's T or not and warn the user if it is rather than just silently fixing things up. Modified: branches/full-duplex/src/padb ============================================================================== --- branches/full-duplex/src/padb (original) +++ branches/full-duplex/src/padb Fri Jul 17 10:09:46 2009 @@ -4196,6 +4196,7 @@ sub gdb_attach { my ( $gdb, $pid ) = @_; + send_cont_signal($pid); my $result = gdb_send( $gdb, "attach $pid" ); return unless defined $result; @@ -4215,7 +4216,7 @@ $gdb->{attached} = 0; - return $gdb->{tracepid}; + send_cont_signal( $gdb->{tracepid} ); } sub gdb_wait_for_prompt { @@ -4671,10 +4672,19 @@ return @mq; } +# Send a CONT signal to a pid, there have been problems where a program +# is in "T" state which causes the attach to hang forever. Send the +# process a signal before attaching to wake it up in case this is the case. +# gdb crashing (yes it does happen) is a common case for processes to be +# stopped so always deliver this signal before and after attaching. +sub send_cont_signal { + my $pid = shift; + kill( "CONT", $pid ); +} + sub fetch_mpi_queue { my ( $carg, $vp, $pid ) = @_; my $g = gdb_start(); - kill( "CONT", $pid ); my $p = gdb_attach( $g, $pid ); if ( !$p ) { debug( $vp, "Failed to attach to process\n" ); @@ -4735,7 +4745,6 @@ debug $vp, "Attaching to $pid"; my $gdb = gdb_start(); - kill( "CONT", $pid ); if ( gdb_attach( $gdb, $pid ) ) { $proc->{gdb} = $gdb; push( @all, $proc ); @@ -4778,7 +4787,6 @@ debug $vp, "Attaching to $pid"; my $gdb = gdb_start(); - kill( "CONT", $pid ); if ( gdb_attach( $gdb, $pid ) ) { $proc->{gdb} = $gdb; push( @all, $proc ); @@ -5001,6 +5009,7 @@ sub gdb_read_value { my ( $gdb, $name ) = @_; + # Quote the request in case it contains spaces. my %t = gdb_n_send( $gdb, "-data-evaluate-expression \"$name\"" ); if ( $t{status} eq "done" ) { @@ -5237,7 +5246,7 @@ debug $vp, "running (p) $cmd"; my $lines = 0; - kill( "CONT", $pid ); + send_cont_signal($pid); open( CMD, "$cmd 2>/dev/null|" ) || p_die( $vp, "cant start command $cmd" ); while () { @@ -5245,7 +5254,7 @@ output $vp, $_; $lines++; } - kill( "CONT", $pid ); + send_cont_signal($pid); close CMD; return $lines; } @@ -5776,7 +5785,6 @@ debug $vp, "Attaching to $pid"; my $gdb = gdb_start(); - kill( "CONT", $pid ); if ( gdb_attach( $gdb, $pid ) ) { $proc->{gdb} = $gdb; push( @all, $proc ); @@ -5801,7 +5809,7 @@ if ($tries) { debug $vp, "Re-attaching to $pid, $tries"; - kill( "CONT", $pid ); + send_cont_signal($pid); sleep(1); my $g = gdb_start(); if ( gdb_attach( $g, $pid ) ) { @@ -5824,7 +5832,6 @@ } gdb_detach($gdb); gdb_quit($gdb); - kill( "CONT", $pid ); if ( defined $threads[0]->{frames} ) { my @frames = @{ $threads[0]->{frames} }; @@ -5914,12 +5921,11 @@ my ( $vp, $pid ) = @_; my $gdb = gdb_start(); - kill( "CONT", $pid ); + if ( gdb_attach( $gdb, $pid ) ) { my @threads = gdb_dump_frames_per_thread( $gdb, 1 ); gdb_detach($gdb); gdb_quit($gdb); - kill( "CONT", $pid ); foreach my $thread ( sort { $a->{id} <=> $b->{id} } @threads ) { my @frames = @{ $thread->{frames} }; @@ -6061,7 +6067,6 @@ my $vp = $proc->{vp}; my $pid = $proc->{pid}; my $gdb = gdb_start(); - kill( "CONT", $pid ); if ( gdb_attach( $gdb, $pid ) ) { $proc->{gdb} = $gdb; } else { From codesite-noreply at google.com Fri Jul 17 18:24:46 2009 From: codesite-noreply at google.com (codesite-noreply at google.com) Date: Fri, 17 Jul 2009 17:24:46 +0000 Subject: [padb-devel] [padb commit] r113 - Test and re-instate local as a valid resource manager. For some Message-ID: <0016364582cc9a4549046eea1053@google.com> Author: apittman Date: Fri Jul 17 10:23:41 2009 New Revision: 113 Modified: branches/full-duplex/src/padb Log: Test and re-instate local as a valid resource manager. For some reason it requires inner-callback to be enabled but as it's only for one host this isn't a problem. I assume it's something todo with buffering as normal :( Modified: branches/full-duplex/src/padb ============================================================================== --- branches/full-duplex/src/padb (original) +++ branches/full-duplex/src/padb Fri Jul 17 10:23:41 2009 @@ -327,25 +327,28 @@ }; $rmgr{"local"} = { - 'get_active_jobs' => \&local_get_jobs, - 'job_is_running' => \&local_job_is_running, - 'setup_pcmd' => \&local_setup_pcmd, - 'find_pids' => \&local_find_pids, + 'get_active_jobs' => \&local_get_jobs, + 'job_is_running' => \&local_job_is_running, + 'setup_pcmd' => \&local_setup_pcmd, + 'find_pids' => \&local_find_pids, + 'require_inner_callback' => 1, }; $rmgr{"local-qsnet"} = { - 'is_installed' => \&local_q_is_installed, - 'get_active_jobs' => \&local_q_get_jobs, - 'job_is_running' => \&local_job_is_running, - 'setup_pcmd' => \&local_setup_pcmd, - 'inner_rmgr' => "local", + 'is_installed' => \&local_q_is_installed, + 'get_active_jobs' => \&local_q_get_jobs, + 'job_is_running' => \&local_job_is_running, + 'setup_pcmd' => \&local_setup_pcmd, + 'inner_rmgr' => "local", + 'require_inner_callback' => 1, }; $rmgr{"local-fd"} = { - 'get_active_jobs' => \&local_fd_get_jobs, - 'job_is_running' => \&local_job_is_running, - 'setup_pcmd' => \&local_setup_pcmd, - 'inner_rmgr' => "local", + 'get_active_jobs' => \&local_fd_get_jobs, + 'job_is_running' => \&local_job_is_running, + 'setup_pcmd' => \&local_setup_pcmd, + 'inner_rmgr' => "local", + 'require_inner_callback' => 1, }; ############################################################################### @@ -2255,7 +2258,7 @@ } sub local_setup_pcmd { - return ( "", undef ); + return ( "", 1, 1 ); } ############################################################################### @@ -2549,10 +2552,12 @@ sub find_rmgr { # If it's been set on the command line and it's valid then just use what we are given. -# Don't do any sanity checks here to cope with non-default installs. +# Do sanity checks here but only warn on the result to cope with non-default installs. if ( defined $rmgr{ $conf{"rmgr"} } ) { - if ( not $rmgr{ $conf{"rmgr"} }{is_installed}() ) { + if ( defined $rmgr{ $conf{"rmgr"} }{is_installed} + and not $rmgr{ $conf{"rmgr"} }{is_installed}() ) + { printf( "Warning: Selected resource manager $conf{rmgr} does not appear to be installed\n" ); @@ -2594,10 +2599,12 @@ sub find_any_rmgr { # If it's been set on the command line and it's valid then just use what we are given. -# Don't do any sanity checks here to cope with non-default installs. +# Do sanity checks here but only warn on the result to cope with non-default installs. if ( defined $rmgr{ $conf{"rmgr"} } ) { - if ( not $rmgr{ $conf{"rmgr"} }{is_installed}() ) { + if ( defined $rmgr{ $conf{"rmgr"} }{is_installed} + and not $rmgr{ $conf{"rmgr"} }{is_installed}() ) + { printf( "Warning: Selected resource manager $conf{rmgr} does not appear to be installed\n" ); From codesite-noreply at google.com Fri Jul 17 18:31:51 2009 From: codesite-noreply at google.com (codesite-noreply at google.com) Date: Fri, 17 Jul 2009 17:31:51 +0000 Subject: [padb-devel] [padb commit] r114 - Don't try and read elan_base->state->vp when running with "local" Message-ID: <0016e649d4aee4a9f6046eea29ca@google.com> Author: apittman Date: Fri Jul 17 10:31:24 2009 New Revision: 114 Modified: branches/full-duplex/src/padb Log: Don't try and read elan_base->state->vp when running with "local" as the resource manger, it's ugly, unlikely to work and breaks things. Modified: branches/full-duplex/src/padb ============================================================================== --- branches/full-duplex/src/padb (original) +++ branches/full-duplex/src/padb Fri Jul 17 10:31:24 2009 @@ -5150,13 +5150,6 @@ return ""; } -sub gdb_int_from_raw { - my $str = shift; - if ( $str =~ /\$\d+ \= (\d)+/ ) { - return $1; - } -} - sub stack_from_core { my $exe = shift; my $core = shift; @@ -5740,22 +5733,6 @@ } } -sub gdb_int_from_pid { - my $pid = shift; - my $var = shift; - my $gdb = gdb_start(); - if ( not gdb_attach( $gdb, $pid ) ) { - return; - } - - # use data-evaluate-expression here? - my %r = gdb_n_send( $gdb, "p $var" ); - my $nvp = gdb_int_from_raw( $r{raw} ); - gdb_detach($gdb); - gdb_quit($gdb); - return $nvp; -} - sub show_vars { my ( $vp, $frame, $type ) = @_; my %l; @@ -6361,18 +6338,12 @@ } } -# Local processes per node, i.e. no resource manager support. +# Local processes per node, i.e. no resource manager support, we only +# have one process in this case so call it process 0. sub local_find_pids { my $pid = shift; -# Hard-wire this to vp 0, probably not true but without the resource manager it's difficult -# to tell. We should really use elan_base->state->vp here. - my $vp = gdb_int_from_pid( $pid, "elan_base->state->vp" ); - if ( not defined $vp or $vp eq "" ) { - $vp = 0; - } - - maybe_show_pid( $vp, $pid ); + maybe_show_pid( 0, $pid ); } sub mpd_find_pids { From codesite-noreply at google.com Sun Jul 19 20:38:02 2009 From: codesite-noreply at google.com (codesite-noreply at google.com) Date: Sun, 19 Jul 2009 19:38:02 +0000 Subject: [padb-devel] [padb commit] r115 - Quote eronous signal name when exiting with a problem. Message-ID: <001636457520e23138046f142821@google.com> Author: apittman Date: Sun Jul 19 12:37:20 2009 New Revision: 115 Modified: branches/full-duplex/src/padb Log: Quote eronous signal name when exiting with a problem. Modified: branches/full-duplex/src/padb ============================================================================== --- branches/full-duplex/src/padb (original) +++ branches/full-duplex/src/padb Sun Jul 19 12:37:20 2009 @@ -4040,7 +4040,7 @@ map { $sig_names{$_} = 1 } split( " ", $Config{"sig_name"} ); if ( not defined $sig_names{$signal} ) { - cmdline_error("$prog: Error: signal $signal is invalid\n"); + cmdline_error("$prog: Error: signal \"$signal\" is invalid\n"); } } From codesite-noreply at google.com Sun Jul 19 21:54:17 2009 From: codesite-noreply at google.com (codesite-noreply at google.com) Date: Sun, 19 Jul 2009 20:54:17 +0000 Subject: [padb-devel] [padb commit] r116 - Add a debug_log() function to allow better handling of debugging, Message-ID: <000e0cd509c2955eae046f15392d@google.com> Author: apittman Date: Sun Jul 19 13:53:38 2009 New Revision: 116 Modified: branches/full-duplex/src/padb Log: Add a debug_log() function to allow better handling of debugging, all debugging goes through a single function and can be enabled on a per "mode" basis so different parts of the code can be debugged. Add a timestamp to all debugging and also allows Dumper to be called on an arbitary ref for those hard-to-reach problems. Enable debugging with the --debug=type,type2,type2=all... Modified: branches/full-duplex/src/padb ============================================================================== --- branches/full-duplex/src/padb (original) +++ branches/full-duplex/src/padb Sun Jul 19 13:53:38 2009 @@ -369,10 +369,8 @@ my $rem_jobid; # Debug options. -$conf{"verbose"} = 0; -$conf{"tree-verbose"} = 0; -$conf{"dump-raw"} = 0; -$conf{"showcmd"} = 0; +$conf{"verbose"} = 0; +$conf{"dump-raw"} = 0; # Valid values are "none" "missing" or "all". Anything not recognised # is treated as "all". @@ -386,7 +384,6 @@ $conf{"scripts"} = "bash,sh,dash,ash,perl,xterm"; $conf{"lsf-job-offset"} = 1; $conf{"local-fd-name"} = "/dev/null"; -$conf{"full-duplex"} = 1; $conf{"inner-callback"} = 0; # These two are used by deadlock and QsNet group @@ -479,7 +476,6 @@ prun-timeout Timeout to use when launching parallel job. Stack trace options: - tree-verbose turn on debugging for the stack trace tree generation code. gdb-retry-count Number of times to try getting a 'good' stack trace from gdb. stack-show-params Show function parameters in stack traces. stack-show-locals Show locals in stack traces. @@ -579,10 +575,38 @@ # can access secondary comamnd line argunments by name. my %secondary_args; +# Debugging: this function is called periodically with +# a mode, an abritary ref and a string, it can either print simply +# the string or call dumper on the ref as well. +# Enable with --debug=type1,type2=all +my %debugModes; +my $start_time = time(); + +sub debug_log { + my ( $type, $handle, $str, @params ) = @_; + if ( not defined $debugModes{$type} ) { + printf("Unknown debug mode: $type\n"); + exit(1); + } + return unless $debugModes{$type}; + my $time = time() - $start_time; + printf( "DEBUG ($type): %3d: $str\n", $time, @params ); + return if $debugModes{$type} eq "basic"; + return unless defined $handle; + print Dumper $handle; +} + +# Valid debug modes, a full list is maintained here so using unexpected +# ones can generate warnings. +$debugModes{"full-duplex"} = 0; +$debugModes{"show-cmd"} = 0; +$debugModes{"all"} = 0; +$debugModes{"tree"} = 0; + sub parse_args_outer { Getopt::Long::Configure("bundling"); - my $mode; + my $debugflag; my %optionhash = ( "verbose|v+" => \$conf{verbose}, @@ -607,7 +631,8 @@ "local-stats" => \$local_stats, "show-jobs" => \$show_jobs, "norc" => \$norc, - "config-file=s" => \$configfile + "config-file=s" => \$configfile, + "debug=s" => \$debugflag, ); my %config_hash; @@ -629,6 +654,24 @@ GetOptions(%optionhash) or exit(1); + if ( defined $debugflag ) { + foreach my $f ( split( ",", $debugflag ) ) { + my ( $name, $v ) = split( "=", $f ); + if ( defined $debugModes{$name} ) { + $debugModes{$name} = defined($v) ? $v : "basic"; + } else { + printf("Attempt to set unknown debug flag \"$name\".\n"); + } + } + if ( $debugModes{all} ) { + foreach my $mode ( keys(%debugModes) ) { + $debugModes{$mode} = $debugModes{all}; + } + } + } + + my $mode; + foreach my $arg ( keys %config_hash ) { next unless defined $config_hash{$arg}; $mode = $arg; @@ -1808,8 +1851,7 @@ my $prev; my $tag = $tags[0]; - printf("called tag:$tag, level:$level tags:@tags\n") - if $conf{"tree-verbose"}; + debug_log( "tree", undef, "called tag:$tag, level:$level tags:@tags" ); return if ( !defined($tag) ); return if ( !defined( $lines->{$tag} ) ); @@ -1868,9 +1910,11 @@ $endlevel = ( $#{ $lines->{$tag} } ); } - printf( -"level $level, endlevel $endlevel, identical:@identical different:@different\n", - ) if $conf{"tree-verbose"}; + debug_log( + "tree", undef, + +"level $level, endlevel $endlevel, identical:@identical different:@different" + ); for ( my $l = $level ; $l <= $endlevel ; $l++ ) { @@ -1898,9 +1942,11 @@ $prev->{children} = go_p( $endlevel + 1, $lines, @identical ); } - printf( -"returning level:$level endlevel:$endlevel identical:@identical different:@different\n" - ) if $conf{"tree-verbose"}; + debug_log( + "tree", undef, + +"returning level:$level endlevel:$endlevel identical:@identical different:@different" + ); if (@different) { my $new = go_p( $level, $lines, @different ); @@ -1942,6 +1988,7 @@ sub show_tree { my $ref = shift; + debug_log( "tree", $ref, "Complete tree" ); return _show_tree( $ref, undef, "" ); } @@ -3195,11 +3242,8 @@ sub issue_command_to_inner { my ( $cdata, $cmd ) = @_; my $str = my_encode($cmd); - if ( $conf{"full-duplex"} eq "debug" ) { - printf( "Sending command to inner, %d bytes\n", length($str) ); - print( Dumper($cmd) ); - - } + debug_log( "full-duplex", $cmd, "Sending command to inner, %d bytes", + length($str) ); $cdata->{socket}->print("$str\n"); } @@ -3357,10 +3401,7 @@ # A reply from inner. my $d = my_decode($line); - if ( $conf{"full-duplex"} eq "debug" ) { - printf( "Reply from inner, %d bytes\n", length($line) ); - print( Dumper($d) ); - } + debug_log( "full-duplex", $d, "Reply from inner, %d bytes", length($line) ); # The inner process has signed on. if ( $comm_data->{current_req}->{mode} eq "signon" ) { @@ -3710,7 +3751,7 @@ #} - ( $conf{"verbose"} > 1 or $conf{"showcmd"} ) && print "$cmd\n"; + debug_log( "show-cmd", undef, $cmd ); if ( not defined $hosts ) { printf("Full duplex mode needs to know the host count\n");