Changeset 134
- Timestamp:
- 05/23/07 01:24:54 (3 years ago)
- Location:
- trunk
- Files:
-
- 4 added
- 11 modified
-
Changes (modified) (1 diff)
-
MANIFEST (modified) (3 diffs)
-
lib/Brackup/Backup.pm (modified) (8 diffs)
-
lib/Brackup/CompositeChunk.pm (added)
-
lib/Brackup/InventoryDatabase.pm (modified) (1 diff)
-
lib/Brackup/Restore.pm (modified) (1 diff)
-
lib/Brackup/Root.pm (modified) (3 diffs)
-
lib/Brackup/StoredChunk.pm (modified) (5 diffs)
-
lib/Brackup/Target.pm (modified) (2 diffs)
-
lib/Brackup/Target/Amazon.pm (modified) (1 diff)
-
lib/Brackup/Target/Filesystem.pm (modified) (1 diff)
-
lib/Brackup/Test.pm (modified) (1 diff)
-
t/03-combine-little-files.t (added)
-
t/data/000-dup1.txt (added)
-
t/data/000-dup2.txt (added)
Legend:
- Unmodified
- Added
- Removed
-
trunk/Changes
r130 r134 1 - support for merging little files together into big chunks 2 on the backup target. aka "tail packing". requires no changes 3 to target drivers. this should speed backups, as less network 4 round-trips. will also be cheaper, once Amazon starts charging 5 per number of HTTP requests in June. 6 7 - improved docs 8 1 9 1.01 (may 21, 2007) 2 10 -
trunk/MANIFEST
r130 r134 13 13 lib/Brackup/BackupStats.pm 14 14 lib/Brackup/ChunkIterator.pm 15 lib/Brackup/CompositeChunk.pm 15 16 lib/Brackup/Config.pm 16 17 lib/Brackup/ConfigSection.pm … … 33 34 MANIFEST This list of files 34 35 MANIFEST.SKIP 36 META.yml Module meta-data (added by MakeMaker) 35 37 t/00-use.t 36 38 t/01-backup.t 37 39 t/02-gpg.t 40 t/03-combine-little-files.t 41 t/data/000-dup1.txt 42 t/data/000-dup2.txt 38 43 t/data/huge-file.txt 39 44 t/data/my-link.txt … … 44 49 t/data/test-file.txt 45 50 TODO 46 META.yml Module meta-data (added by MakeMaker) -
trunk/lib/Brackup/Backup.pm
r124 r134 4 4 use Carp qw(croak); 5 5 use Brackup::ChunkIterator; 6 use Brackup::CompositeChunk; 6 7 use Brackup::GPGProcManager; 7 8 use Brackup::GPGProcess; … … 125 126 }; 126 127 128 my $merge_under = $root->merge_files_under; 129 my $comp_chunk = undef; 130 127 131 # records are either Brackup::File (for symlinks, directories, etc), or 128 132 # PositionedChunks, in which case the file can asked of the chunk … … 137 141 } 138 142 143 # have we already stored this chunk before? (iterative backup) 139 144 my $schunk; 140 145 if ($schunk = $target->stored_chunk_from_inventory($pchunk)) { … … 144 149 } 145 150 151 # weird case... have we stored this same pchunk digest in the 152 # current comp_chunk we're building? these aren't caught by 153 # the above inventory check, because chunks in a composite 154 # chunk aren't added to the inventory until after the the composite 155 # chunk has fully grown (because it's not until it's fully grown 156 # that we know the handle for it, its digest) 157 if ($comp_chunk && ($schunk = $comp_chunk->stored_chunk_from_dup_internal_raw($pchunk))) { 158 $pchunk->forget_chunkref; 159 push @stored_chunks, $schunk; 160 next; 161 } 162 146 163 $show_status->() unless $file_has_shown_status++; 147 164 $self->debug(" * storing chunk: ", $pchunk->as_string, "\n"); 148 165 149 my $handle;150 166 unless ($self->{dryrun}) { 151 167 $schunk = Brackup::StoredChunk->new($pchunk); … … 154 170 if ($gpg_rcpt) { 155 171 $schunk->set_encrypted_chunkref($gpg_pm->enc_chunkref_of($pchunk)); 172 } 173 174 # see if we should pack it into a bigger blob 175 my $chunk_size = $schunk->backup_length; 176 if ($merge_under && $chunk_size < $merge_under) { 177 if ($comp_chunk && ! $comp_chunk->can_fit($chunk_size)) { 178 $self->debug("Finalizing composite chunk $comp_chunk..."); 179 $comp_chunk->finalize; 180 $comp_chunk = undef; 181 } 182 $comp_chunk ||= Brackup::CompositeChunk->new($root, $target); 183 $comp_chunk->append_little_chunk($schunk); 184 } else { 185 # store it regularly, as its own chunk on the target 186 $target->store_chunk($schunk) 187 or die "Chunk storage failed.\n"; 188 $target->add_to_inventory($pchunk => $schunk); 156 189 } 157 190 … … 163 196 #}; 164 197 165 $target->store_chunk($schunk) 166 or die "Chunk storage failed.\n"; 167 $target->add_to_inventory($pchunk => $schunk); 198 168 199 $n_kb_up += $pchunk->length / 1024; 169 200 push @stored_chunks, $schunk; … … 173 204 174 205 # DEBUG: verify it got written correctly 175 if ($ENV{BRACKUP_PARANOID} && $handle) {206 if ($ENV{BRACKUP_PARANOID}) { 176 207 die "FIX UP TO NEW API"; 177 208 #my $saved_ref = $target->load_chunk($handle); … … 187 218 } 188 219 $end_file->(); 220 $comp_chunk->finalize if $comp_chunk; 189 221 190 222 unless ($self->{dryrun}) { -
trunk/lib/Brackup/InventoryDatabase.pm
r128 r134 119 119 B<Keys> 120 120 121 The key is the digest of the "raw" (pre-compression/encryption) file/chunk, and the value is 122 the digest of the file/chunk post-compression/encryption, along with its length. 121 The key is the digest of the "raw" (pre-compression/encryption) 122 file/chunk (with GPG recipient, if using encryption), and the value is 123 the digest of the chunk stored on the target, which contains the raw 124 chunk. The chunk stored on the target may contain other chunks, may 125 be compressed, encrypted, etc. 126 127 <raw_digest> --> <stored_digest> <stored_length> 128 <raw_digest>;to=<gpg_rcpt> --> <stored_digest> <stored_length> 123 129 124 130 For example: 125 131 126 sha1:e23c4b5f685e046e7cc50e30e378ab11391e528e =>132 sha1:e23c4b5f685e046e7cc50e30e378ab11391e528e;to=6BAFF35F => 127 133 sha1:d7257184899c9e6c4e26506f1c46f8b6562d9ee7 71223 128 134 129 Means that the chunk with sha1 contents "e23c4...", after being 130 compressed/encrypted, is stored on the target with digest 131 "d72571848...", with length 71,223 bytes. 135 Means that the chunk with sha1 contents "e23c4...", intended to be 136 en/de-crypted for 6BAFF35F, can be got by asking the target for the 137 chunk with digest "d72571848...", with length 71,223 bytes. 138 139 When using the Brackup feature which combines small files into larger 140 blobs, the inventory database instead stores values like: 141 142 <raw_digest>[;to=<gpg_rcpt>] --> 143 <stored_digest> <stored_length> <from_offset>-<to_offset> 144 145 Which is the same thing, but after fetching the composite chunk using 146 the stored digest provided, only the range provided from C<from_offset> to 147 C<to_offset> should be used. 132 148 133 149 =head1 SEE ALSO -
trunk/lib/Brackup/Restore.pm
r124 r134 188 188 189 189 my $len_chunk = length $$dataref; 190 unless ($len_chunk == $enc_len) { 191 die "Backup chunk $dig isn't of expected length: got $len_chunk, expecting $enc_len\n"; 190 191 # using just a range of the file 192 # TODO: inefficient! we don't want to download the chunk from the 193 # target multiple times. better to cache it locally, or at least 194 # only fetch a region from the target (but that's still kinda inefficient 195 # and pushes complexity into the Target interface) 196 if ($enc_len =~ /^(\d+)-(\d+)$/) { 197 my ($from, $to) = ($1, $2); 198 # file range. gotta be at least as big as bigger number 199 unless ($len_chunk >= $to) { 200 die "Backup chunk $dig isn't at least as big as range: got $len_chunk, needing $to\n"; 201 } 202 my $region = substr($$dataref, $from, $to-$from); 203 $dataref = \$region; 204 } else { 205 # using the whole chunk, so make sure fetched size matches 206 # expected size 207 unless ($len_chunk == $enc_len) { 208 die "Backup chunk $dig isn't of expected length: got $len_chunk, expecting $enc_len\n"; 209 } 192 210 } 193 211 -
trunk/lib/Brackup/Root.pm
r129 r134 20 20 $self->{gpg_path} = $conf->value('gpg_path') || "/usr/bin/gpg"; 21 21 $self->{gpg_rcpt} = $conf->value('gpg_recipient'); 22 $self->{chunk_size} = $conf->byte_value('chunk_size') ,22 $self->{chunk_size} = $conf->byte_value('chunk_size'); 23 23 $self->{ignore} = []; 24 25 $self->{merge_files_under} = $conf->byte_value('merge_files_under'); 26 $self->{max_composite_size} = $conf->byte_value('max_composite_chunk_size') || 2**20; 27 28 die "'max_composite_chunk_size' must be greater than 'merge_files_under'\n" unless 29 $self->{max_composite_size} > $self->{merge_files_under}; 24 30 25 31 $self->{gpg_args} = []; # TODO: let user set this. for now, not possible … … 31 37 return $self; 32 38 } 39 40 sub merge_files_under { $_[0]{merge_files_under} } 41 sub max_composite_size { $_[0]{max_composite_size} } 33 42 34 43 sub gpg_path { … … 279 288 just make the *.brackup metafiles larger. 280 289 290 =item B<merge_files_under> 291 292 In units of bytes, kB, MB, etc. If files are under this size. By 293 default this feature is off (value 0), purely because it's new, but 1 294 kB is a recommended size, and will probably be the default in the 295 future. Set it to 0 to explicitly disable. 296 297 =item B<max_composite_chunk_size> 298 299 In units of bytes, kB, MB, etc. The maximum size of a composite 300 chunk, holding lots of little files. If this is too big, you'll waste 301 more space with future iterative backups updating files locked into 302 this chunk with unchanged chunks. 303 304 Recommended, and default value, is 1 MB. 305 281 306 =back -
trunk/lib/Brackup/StoredChunk.pm
r110 r134 11 11 # backdigest - memoized 12 12 # _chunkref - memoized 13 # compchunk - composite chunk, if we were added to a composite chunk 14 # compfrom - offset in composite chunk where we start 15 # compto - offset in composite chunk where we end 13 16 14 17 sub new { … … 19 22 } 20 23 24 sub pchunk { $_[0]{pchunk} } 25 21 26 # create the 'lite' or 'handle' version of a storedchunk. can't get to 22 27 # the chunkref from this, but callers aren't won't. and we'll DIE if they 23 28 # try to access the chunkref. 24 sub new_from_inventory { 25 my ($class, $pchunk, $dig, $len) = @_; 26 return bless { 29 sub new_from_inventory_value { 30 my ($class, $pchunk, $invval) = @_; 31 32 my ($dig, $len, $range) = split /\s+/, $invval; 33 34 my $sc = bless { 27 35 pchunk => $pchunk, 28 36 backdigest => $dig, 29 37 backlength => $len, 30 38 }, $class; 39 40 # normal 41 return $sc unless $range; 42 43 # in case of little file in a composite chunk, 44 # we gotta be a range. 45 my ($from, $to) = $range =~ /^(\d+)-(\d+)$/ 46 or die "bogus range: $range"; 47 $sc->{compfrom} = $from; 48 $sc->{compto} = $to; 49 return $sc; 50 } 51 52 sub clone_but_for_pchunk { 53 my ($self, $pchunk) = @_; 54 my $copy = bless {}, ref $self; 55 foreach my $f (qw(backlength backdigest compchunk compfrom compto)) { 56 $copy->{$f} = $self->{$f}; 57 } 58 $copy->{pchunk} = $pchunk; 59 return $copy; 60 } 61 62 sub set_composite_chunk { 63 my ($self, $cchunk, $from, $to) = @_; 64 $self->{compchunk} = $cchunk; 65 66 # forget our backup length/digest. this handle information 67 # to the stored chunk should be asked of our composite 68 # chunk in the future, when it's done populating. 69 $self->{backdigest} = undef; 70 $self->{backlength} = undef; 71 $self->forget_chunkref; 72 73 $self->{compfrom} = $from; 74 $self->{compto} = $to; 75 } 76 77 sub range_in_composite { 78 my $self = shift; 79 return undef unless $self->{compfrom} || $self->{compto}; 80 return "$self->{compfrom}-$self->{compto}"; 31 81 } 32 82 … … 78 128 sub _populate_lengthdigest { 79 129 my $self = shift; 130 if (my $cchunk = $self->{compchunk}) { 131 $self->{backlength} = $cchunk->backup_length; 132 $self->{backdigest} = $cchunk->digest; 133 return 1; 134 } 135 80 136 my $dataref = $self->chunkref; 81 137 $self->{backlength} = CORE::length($$dataref); … … 121 177 my $self = shift; 122 178 my @parts = ($self->{pchunk}->offset, 123 $self->{pchunk}->length, 124 $self->backup_length, 125 $self->backup_digest, 126 ); 179 $self->{pchunk}->length); 180 181 if (my $range = $self->range_in_composite) { 182 push @parts, ( 183 $range, 184 $self->backup_digest, 185 ); 186 } else { 187 push @parts, ( 188 $self->backup_length, 189 $self->backup_digest, 190 ); 191 } 127 192 128 193 # if the inventory database is lost, it should be possible to … … 142 207 } 143 208 209 # aka "instructions to attach to a pchunk, on how to recover the pchunk from a target" 210 sub inventory_value { 211 my $self = shift; 212 213 # when this chunk was stored as part of a composite chunk, the instructions 214 # are of form: 215 # sha1:deadbeef 0-50 216 # which means download "sha1:deadbeef", then the contents will be in from 217 # byte offset 0 to byte offset 50 (length of 50). 218 if (my $range = $self->range_in_composite) { 219 return join(" ", 220 $self->backup_digest, 221 $self->backup_length, 222 $range); 223 } 224 225 # else, the historical format: 226 # sha1:deadbeef <length> 227 return join(" ", $self->backup_digest, $self->backup_length); 228 } 229 144 230 1; -
trunk/lib/Brackup/Target.pm
r129 r134 46 46 my $key = $pchunk->inventory_key; 47 47 my $db = $self->inventory_db; 48 $db->set($key => join(" ", $schunk->backup_digest, $schunk->backup_length));48 $db->set($key => $schunk->inventory_value); 49 49 } 50 50 … … 55 55 my $key = $pchunk->inventory_key; 56 56 my $db = $self->inventory_db; 57 my $ diglen= $db->get($key)57 my $invval = $db->get($key) 58 58 or return undef; 59 my ($digest, $length) = split /\s+/, $diglen; 60 return Brackup::StoredChunk->new_from_inventory($pchunk, $digest, $length); 59 return Brackup::StoredChunk->new_from_inventory_value($pchunk, $invval); 61 60 } 62 61 -
trunk/lib/Brackup/Target/Amazon.pm
r129 r134 99 99 my $dig = $chunk->backup_digest; 100 100 my $blen = $chunk->backup_length; 101 my $len = $chunk->length;102 101 my $chunkref = $chunk->chunkref; 103 102 -
trunk/lib/Brackup/Target/Filesystem.pm
r129 r134 74 74 my $dig = $chunk->backup_digest; 75 75 my $blen = $chunk->backup_length; 76 my $len = $chunk->length;77 76 78 77 my $path = $self->chunkpath($dig); -
trunk/lib/Brackup/Test.pm
r126 r134 75 75 } 76 76 ok(-s $meta_filename, "backup file has size"); 77 return $meta_filename;77 return wantarray ? ($meta_filename, $backup) : $meta_filename; 78 78 } 79 79
