Changeset 1368

Show
Ignore:
Timestamp:
11/25/09 07:43:08 (4 months ago)
Author:
dormando
Message:

Rebalance to repair overreplication errors.

If a fid is "too happy" (has too many copies), randomly rebalance
the devs until it's happy. This may momentarily make a fid unhappy
with its replication policy, but it will fix itself eventually.

Location:
trunk/server
Files:
3 modified

Legend:

Unmodified
Added
Removed
  • trunk/server/CHANGES

    r1358 r1368  
     1 
     2        * Replication will now attempt to handle over replication situations. 
     3          FSCK will catch over replication as a policy violation, and 
     4          replicate will attempt to fix it. (dormando) 
     5          (rarely it might need multiple fscks) 
    16 
    27        * Since FIDSizes is no longer used, remove it and remove gearman 
  • trunk/server/lib/MogileFS/ReplicationPolicy/MultipleHosts.pm

    r1198 r1368  
    5050    my $total_disks = scalar grep { $_->dstate->should_have_files } values %$all_devs; 
    5151 
    52     # if we have two copies and that's all the disks there are 
    53     # anywhere, be happy enough, even if mindevcount is higher.  in 
    54     # that case, when they add more disks later, they'll need to fsck 
    55     # to make files replicate more. 
    56     return ALL_GOOD if $already_on >= 2 && $already_on == $total_disks; 
    57  
    5852    # see which and how many unique hosts we're already on. 
    5953    my %on_dev; 
     
    6660    my $total_uniq_hosts = unique_hosts($all_devs); 
    6761 
     62    # if we are on two hosts but 10 devices, you want to weaken the number of 
     63    # devices you're on until you're on the right number of hosts with the 
     64    # right number of devices. 
    6865    return TOO_GOOD if $uniq_hosts_on >  $min; 
    69     return TOO_GOOD if $uniq_hosts_on == $min && $already_on > $min; 
     66    return TOO_GOOD if $already_on > $min; 
    7067    return ALL_GOOD if $uniq_hosts_on == $min; 
    7168    return ALL_GOOD if $uniq_hosts_on >= $total_uniq_hosts && $already_on >= $min; 
     69 
     70    # if we have two copies and that's all the disks there are 
     71    # anywhere, be happy enough, even if mindevcount is higher.  in 
     72    # that case, when they add more disks later, they'll need to fsck 
     73    # to make files replicate more. 
     74    # this is here instead of above in case an over replication error causes 
     75    # the file to be on all disks (where more than necessary) 
     76    return ALL_GOOD if $already_on >= 2 && $already_on == $total_disks; 
    7277 
    7378    # if there are more hosts we're not on yet, we want to exclude devices we're already 
  • trunk/server/lib/MogileFS/Worker/Replicate.pm

    r1286 r1368  
    178178        # failed_getting_lock        => harmless.  skip.  somebody else probably doing. 
    179179        # 
     180        # -- ACTIONABLE -- 
     181        # too_happy                  => too many copies, attempt to rebalance. 
     182        # 
    180183        # -- TEMPORARY; DO EXPONENTIAL BACKOFF -- 
    181184        # source_down                => only source available is observed down. 
     
    220223        } 
    221224 
     225        # try to shake off extra copies. fall through to the backoff logic 
     226        # so we don't flood if it's impossible to properly weaken the fid. 
     227        # there's a race where the fid could be checked again, but the 
     228        # exclusive locking prevents replication clobbering. 
     229        if ($errcode eq 'too_happy') { 
     230            $unlock->() if $unlock; 
     231            $unlock = undef; 
     232            my $f = MogileFS::FID->new($fid); 
     233            my @devs = List::Util::shuffle($f->devids); 
     234            my $devfid; 
     235            # First one we can delete from, we try to rebalance away from. 
     236            for (@devs) { 
     237                my $dev = MogileFS::Device->of_devid($_); 
     238                # Not positive 'can_read_from' needs to be here. 
     239                # We must be able to delete off of this dev so the fid can 
     240                # move. 
     241                if ($dev->can_delete_from && $dev->can_read_from) { 
     242                    $devfid = MogileFS::DevFID->new($dev, $f); 
     243                    last; 
     244                } 
     245            } 
     246            $self->rebalance_devfid($devfid) if $devfid; 
     247        } 
     248 
    222249        # at this point, the rest of the errors require exponential backoff.  define what this means 
    223250        # as far as failcount -> delay to next try. 
     
    418445    my $del_reason; 
    419446 
    420     if ($ret eq "lost_race") { 
     447    if ($ret eq "too_happy" || $ret eq "lost_race") { 
    421448        # for some reason, we did no work. that could be because 
    422449        # either 1) we lost the race, as the error code implies, 
     
    693720    } 
    694721 
     722    # We are over replicated. Let caller decide if it should rebalance. 
     723    if ($rr->too_happy) { 
     724        return $retunlock->(0, "too_happy", "fid $fidid is on too many devices"); 
     725    } 
     726 
    695727    if ($rr->is_happy) { 
    696728        return $retunlock->(1) if $got_copy_request;