Changeset 856
- Timestamp:
- 04/19/07 19:26:17 (3 years ago)
- Location:
- trunk/server/lib/MogileFS
- Files:
-
- 2 modified
-
Store.pm (modified) (2 diffs)
-
Worker/Fsck.pm (modified) (9 diffs)
Legend:
- Unmodified
- Added
- Removed
-
trunk/server/lib/MogileFS/Store.pm
r842 r856 589 589 utime INT UNSIGNED NOT NULL, 590 590 fid INT UNSIGNED NULL, 591 e rrCHAR(4),591 evcode CHAR(4), 592 592 devid MEDIUMINT UNSIGNED, 593 593 INDEX(utime) … … 1222 1222 } 1223 1223 1224 sub fsck_log { 1225 my ($self, %opts) = @_; 1226 $self->dbh->do("INSERT INTO fsck_log (utime, fid, evcode, devid) ". 1227 "VALUES (" . $self->unix_timestamp . ",?,?,?)", 1228 undef, 1229 delete $opts{fid}, 1230 delete $opts{code}, 1231 delete $opts{devid}); 1232 croak("Unknown opts") if %opts; 1233 return 1; 1234 } 1235 1236 1224 1237 # run before daemonizing. you can die from here if you see something's amiss. or emit 1225 1238 # warnings. -
trunk/server/lib/MogileFS/Worker/Fsck.pm
r851 r856 22 22 my $run_count = 0; 23 23 24 # <debug crap> 25 my $running = 0; # start time 26 my $n_check = 0; # items checked 27 my $start = sub { 28 return if $running; 29 $running = time(); 30 print "START @" . time() . "\n"; 31 }; 32 my $stats = sub { 33 return unless $running; 34 my $now = time(); 35 my $elap = $now - $running; 36 printf("In %d secs, %d fids, %0.02f fids/sec\n", $elap, $n_check, ($n_check / ($elap || 1))); 37 }; 38 my $last_beat = 0; 39 my $beat = sub { 40 my $now = time(); 41 return unless $now >= $last_beat + 5; 42 $stats->(); 43 $last_beat = $now; 44 }; 45 my $stop = sub { 46 return unless $running; 47 $stats->(); 48 print "DONE.\n"; 49 $running = 0; 50 }; 51 # </debug crap> 52 24 53 every(5.0, sub { 25 54 my $sleep_set = shift; … … 45 74 46 75 unless (@fids) { 47 warn "[fsck] no fids to check...\n";76 $stop->(); 48 77 return; 49 78 } 79 $start->(); 50 80 51 81 MogileFS::FID->mass_load_devids(@fids); … … 53 83 my $new_max; 54 84 foreach my $fid (@fids) { 85 $n_check++; 55 86 $self->still_alive; 56 87 last if $self->should_stop_running; 57 88 last unless $self->check_fid($fid, no_stat => $opt_nostat); 58 89 $new_max = $fid->id; 90 $beat->(); 59 91 } 60 92 … … 87 119 # if no problems, no action. 88 120 # if problems, log & enqueue fixes 121 use constant CANT_CHECK => 0; 89 122 sub check_fid { 90 123 my ($self, $fid, %opts) = @_; … … 93 126 94 127 # first obvious fucked-up case: no devids even presumed to exist. 95 unless ( @{ $fid->devids }) {128 unless ($fid->devids) { 96 129 # first, log this weird condition. (N = NO PAths) 97 Mgd::get_store ->fsck_log(code => "NOPA", fid => $fid->id);130 Mgd::get_store()->fsck_log(code => "NOPA", fid => $fid->id); 98 131 # weird, schedule a fix (which will do a search over all 99 132 # devices as a last-ditch effort to locate it) … … 106 139 unless ($fid->devids_meet_policy) { 107 140 # log a policy violation 108 Mgd::get_store ->fsck_log(code => "POVI", fid => $fid->id);141 Mgd::get_store()->fsck_log(code => "POVI", fid => $fid->id); 109 142 $self->fix_fid($fid); 110 143 return 1; … … 115 148 # check the replication policy, which is already done, so finish. 116 149 return 1 if $opt_no_stat; 117 118 # **********************************************************************119 # FIXME: temporary, until statting is done...120 # **********************************************************************121 122 # printf("fid %d is good.\n", $fid->id);123 return 1;124 150 125 151 # iterate and do HEAD requests to determine some basic information about the file … … 132 158 my $path = $dfid->get_url 133 159 or die "FIXME"; 134 # TODO: use side-channel? eh, why? LWP + ConnCache good enough for now. 135 my $ua = LWP::UserAgent->new(timeout => 3) 136 or die "FIXME"; 137 my $resp = $ua->head($path); 138 139 # at this point we're going to assume that any error is based on the device alone 140 # so we want to store the status and not return 141 if ($resp->is_success) { 142 # great, check the size against what's in the database 143 if ($resp->header('Content-Length') == $fid->length) { 144 #yay 145 } else { 146 #shit. 147 } 148 160 161 my $disk_size = $dfid->size_on_disk; 162 163 if (! defined $disk_size) { 164 warn("Connectivity problem reaching XXXFIXME\n"); 165 return CANT_CHECK; 166 } 167 168 # great, check the size against what's in the database 169 if ($disk_size == $fid->length) { 170 #yay 149 171 } else { 150 # easy one, the request failed for some reason, 500 would tend to imply that the 151 # mogstored is having issues so we should try again later, whereas a 404 is a 152 # total and permanent failure 153 #error("check_fid($fid, $level): " . $resp->code . " on device $devid"); 154 155 if ($resp->code == 404) { 156 #fucked! 157 } else { 158 # just unreachable 159 warn "TODO: foo is unreachable\n"; 160 return 0; 161 } 172 printf("FID %d corruption on devid $devid: e=%d, a=%d\n", 173 $fid->id, 174 $fid->length, 175 $disk_size, 176 ); 162 177 } 163 178 } … … 182 197 183 198 # make devfid objects from the devids that this fid is on, 184 my @dfids = map { MogileFS::DevID->new($_, $fid) } @{ $fid->devids };199 my @dfids = map { MogileFS::DevID->new($_, $fid) } $fid->devids; 185 200 186 201 # keep track if we found the file (with the right size) at least somewhere.
