Changeset 379

Show
Ignore:
Timestamp:
08/25/06 01:39:52 (2 years ago)
Author:
marksmith
Message:

* make still_alive do its own "last time I announced this" logic so we don't

have to duplicate that everywhere

* make watchdog timeout on replicator be 30 seconds, as it's very possible it's

doing a much longer query or something

Files:

Legend:

Unmodified
Added
Removed
Modified
Copied
Moved
  • branches/server-newrepl/lib/MogileFS/Worker.pm

    r353 r379  
    55            'readbuf',            # unparsed data from parent 
    66            'monitor_has_run',    # true once we've heard of the monitor job being alive 
     7            'last_ping',          # time we last said we're alive 
    78            ); 
    89 
     
    2122    $self->{last_bcast_state} = {}; 
    2223    $self->{monitor_has_run}  = 0; 
     24    $self->{last_ping}        = 0; 
    2325 
    2426    IO::Handle::blocking($psock, 0); 
     
    4345sub still_alive { 
    4446    my $self = shift; 
    45     $self->send_to_parent(":still_alive");  # a no-op, just for the watchdog 
     47    my $now = time(); 
     48    if ($now > $self->{last_ping}) { 
     49        $self->send_to_parent(":still_alive");  # a no-op, just for the watchdog 
     50        $self->{last_ping} = $now; 
     51    } 
    4652} 
    4753 
  • branches/server-newrepl/lib/MogileFS/Worker/Replicate.pm

    r378 r379  
    3838    return 0; 
    3939} 
     40 
     41# replicator wants  
     42sub watchdog_timeout { 30; } 
    4043 
    4144# { fid => lastcheck }; instructs us not to replicate this fid... we will clear 
     
    223226        while ($fixed < $LIMIT && $devcount < $min) { 
    224227            my $now = time(); 
     228            $self->still_alive; 
     229 
    225230            my $fids = $dbh->selectcol_arrayref("SELECT fid FROM file WHERE dmid=? AND classid=? ". 
    226231                                                "AND devcount = ? AND length IS NOT NULL ". 
     
    258263 
    259264                $self->read_from_parent; 
     265                $self->still_alive; 
    260266 
    261267                if (my $status = replicate($dbh, $fid, class => $mclass)) { 
     
    422428        my $rv = undef; 
    423429        if (MogileFS::Config->http_mode) { 
    424             my $lastping = time(); 
    425430            my $worker = MogileFS::ProcManager->is_child or die; 
    426431            $rv = http_copy( 
     
    430435                            expected_len => undef,  # FIXME: get this info to pass along 
    431436                            errref       => \$copy_err, 
    432                             callback     => sub { 
    433                                 my $now = time(); 
    434                                 return if $now == $lastping; # ping once per second 
    435                                 $worker->still_alive; 
    436                                 $lastping = $now; 
    437                             }, 
     437                            callback     => sub { $worker->still_alive; }, 
    438438                            ); 
    439439            die "Bogus error code" if !$rv && $copy_err !~ /^(?:src|dest)_error$/;