Changeset 1368
- Timestamp:
- 11/25/09 07:43:08 (4 months ago)
- Location:
- trunk/server
- Files:
-
- 3 modified
-
CHANGES (modified) (1 diff)
-
lib/MogileFS/ReplicationPolicy/MultipleHosts.pm (modified) (2 diffs)
-
lib/MogileFS/Worker/Replicate.pm (modified) (4 diffs)
Legend:
- Unmodified
- Added
- Removed
-
trunk/server/CHANGES
r1358 r1368 1 2 * Replication will now attempt to handle over replication situations. 3 FSCK will catch over replication as a policy violation, and 4 replicate will attempt to fix it. (dormando) 5 (rarely it might need multiple fscks) 1 6 2 7 * Since FIDSizes is no longer used, remove it and remove gearman -
trunk/server/lib/MogileFS/ReplicationPolicy/MultipleHosts.pm
r1198 r1368 50 50 my $total_disks = scalar grep { $_->dstate->should_have_files } values %$all_devs; 51 51 52 # if we have two copies and that's all the disks there are53 # anywhere, be happy enough, even if mindevcount is higher. in54 # that case, when they add more disks later, they'll need to fsck55 # to make files replicate more.56 return ALL_GOOD if $already_on >= 2 && $already_on == $total_disks;57 58 52 # see which and how many unique hosts we're already on. 59 53 my %on_dev; … … 66 60 my $total_uniq_hosts = unique_hosts($all_devs); 67 61 62 # if we are on two hosts but 10 devices, you want to weaken the number of 63 # devices you're on until you're on the right number of hosts with the 64 # right number of devices. 68 65 return TOO_GOOD if $uniq_hosts_on > $min; 69 return TOO_GOOD if $ uniq_hosts_on == $min && $already_on > $min;66 return TOO_GOOD if $already_on > $min; 70 67 return ALL_GOOD if $uniq_hosts_on == $min; 71 68 return ALL_GOOD if $uniq_hosts_on >= $total_uniq_hosts && $already_on >= $min; 69 70 # if we have two copies and that's all the disks there are 71 # anywhere, be happy enough, even if mindevcount is higher. in 72 # that case, when they add more disks later, they'll need to fsck 73 # to make files replicate more. 74 # this is here instead of above in case an over replication error causes 75 # the file to be on all disks (where more than necessary) 76 return ALL_GOOD if $already_on >= 2 && $already_on == $total_disks; 72 77 73 78 # if there are more hosts we're not on yet, we want to exclude devices we're already -
trunk/server/lib/MogileFS/Worker/Replicate.pm
r1286 r1368 178 178 # failed_getting_lock => harmless. skip. somebody else probably doing. 179 179 # 180 # -- ACTIONABLE -- 181 # too_happy => too many copies, attempt to rebalance. 182 # 180 183 # -- TEMPORARY; DO EXPONENTIAL BACKOFF -- 181 184 # source_down => only source available is observed down. … … 220 223 } 221 224 225 # try to shake off extra copies. fall through to the backoff logic 226 # so we don't flood if it's impossible to properly weaken the fid. 227 # there's a race where the fid could be checked again, but the 228 # exclusive locking prevents replication clobbering. 229 if ($errcode eq 'too_happy') { 230 $unlock->() if $unlock; 231 $unlock = undef; 232 my $f = MogileFS::FID->new($fid); 233 my @devs = List::Util::shuffle($f->devids); 234 my $devfid; 235 # First one we can delete from, we try to rebalance away from. 236 for (@devs) { 237 my $dev = MogileFS::Device->of_devid($_); 238 # Not positive 'can_read_from' needs to be here. 239 # We must be able to delete off of this dev so the fid can 240 # move. 241 if ($dev->can_delete_from && $dev->can_read_from) { 242 $devfid = MogileFS::DevFID->new($dev, $f); 243 last; 244 } 245 } 246 $self->rebalance_devfid($devfid) if $devfid; 247 } 248 222 249 # at this point, the rest of the errors require exponential backoff. define what this means 223 250 # as far as failcount -> delay to next try. … … 418 445 my $del_reason; 419 446 420 if ($ret eq " lost_race") {447 if ($ret eq "too_happy" || $ret eq "lost_race") { 421 448 # for some reason, we did no work. that could be because 422 449 # either 1) we lost the race, as the error code implies, … … 693 720 } 694 721 722 # We are over replicated. Let caller decide if it should rebalance. 723 if ($rr->too_happy) { 724 return $retunlock->(0, "too_happy", "fid $fidid is on too many devices"); 725 } 726 695 727 if ($rr->is_happy) { 696 728 return $retunlock->(1) if $got_copy_request;
