#!/usr/bin/perl # jbackup.pl # Journal Backup Utility # This tool downloads a copy of your journal (all entries and all comments) in a nice-to-the-server # fashion and lets you export them in an easy to access XML format or an easy to read HTML format. ### DATABASE DOCUMENTATION ######################################################################## # There are a bunch of keys in the database. They're (hopefully) named in an easy to follow and # understand manner, but I'm documenting them here for quick reference. # # event:lastsync # The most recent item returned by the syncitems mode. This is just passed back to the # server to instruct it when to pick up again and start sending us more data. # # event:ids # Comma separated list of all valid jitemids. This is maintained so we don't have to # iterate through every key in the database to find event jitemids. # # event:lastgrab # The real date of the most recently downloaded event. This is set when we actually # get an event from the getevents mode. This date will match up with one of the dates # returned by syncitems. # # event:realtime: Time the server got this post (YYYY-MM-DD HH:MM:SS format). # event:subject: Subject of the event, may not be present. # event:anum: Arbitrary number for this event. # event:event: Text of the event. # event:eventtime: Time the user specified (YYYY-MM-DD HH:MM:SS format). # event:security: Present if not public. Values are 'private', 'usemask'. # event:allowmask: Present for security == usemask. Allowmask == 1 means Friends Only. # event:poster: If present, may be any username. Else, it's the user's journal. # These all contain various bits of data about the event. # # event:proplist: # List of all properties that are defined for this event. Comma separated. # # event:prop:: # Stores the values of the properties. is taken from the proplist. # # usermap: # For , contains the username. # # usermap:userids # All the valid userids. Same logic as event:ids. # # comment:ids # Should be familiar. All the valid jtalkids. Comma separated. # # comment:lastid # The most recently downloaded jtalkid as retrieved by the comment_body mode. # # comment:state: # Formatted string: ::: # This contains state information about a comment. Most of this information is subject to # change, and hence it's separate. # # comment:subject: Subject of the comment. May not be present. # comment:body: Text of the comment. May not be present for deleted comments. # comment:date: Date of the comment. In W3C date format. # As with events. Contains various bits of information about the comments. ################################################################################################### ## the program ## use strict; use Getopt::Long; use GDBM_File; use Data::Dumper; use XMLRPC::Lite; use XML::Parser; use Digest::MD5 qw(md5_hex); use Term::ReadKey; # get options my %opts; exit 1 unless GetOptions("dump=s" => \$opts{dumptype}, "sync" => \$opts{sync}, "user=s" => \$opts{user}, "help" => \$opts{help}, "server=s" => \$opts{server}, "port=i" => \$opts{port}, "quiet" => \$opts{quiet}, "publiconly" => \$opts{public}, "journal=s" => \$opts{usejournal}, "clean" => \$opts{clean}, "file=s" => \$opts{file}, "password=s" => \$opts{password}, "md5pass=s" => \$opts{md5password}, "alter-security=s" => \$opts{alter_security}, "confirm-alter" => \$opts{confirm_alter}, "no-comments" => \$opts{no_comments},); # hit up .jbackup for other options if (-e "$ENV{HOME}/.jbackup") { # read in the options open FILE, "<$ENV{HOME}/.jbackup"; foreach () { $opts{$1} = $2 if /^(.+)=(.+)[\r\n]*$/; } close FILE; } # setup some nice, sane defaults $opts{server} ||= 'www.livejournal.com'; $opts{port} += 0; $opts{verbose} = $opts{quiet} ? 0 : 1; $opts{server} = "$opts{server}:$opts{port}" if $opts{port} && $opts{port} != 80; # set some constants that should never need to change. my $COMMENTS_FETCH_META = 10000; # up to 10000 comments, the maximum for comment_meta my $COMMENTS_FETCH_BODY = 1000; # up to 1000 comments, the maximum for comment_body # now figure out what we're doing if ($opts{help} || !($opts{sync} || $opts{dumptype} || $opts{alter_security})) { print <; chomp $user; $opts{user} = $user; die "Need a username" unless $opts{user}; } if (!$opts{password} && !$opts{md5password} && $opts{sync}) { print "Password: "; ReadMode('noecho'); my $pass = ReadLine(0); ReadMode('normal'); chomp $pass; $opts{password} = $pass; print "\n"; die "Need a password" unless $opts{password}; } $opts{linkuser} = $opts{usejournal} || $opts{user}; # setup some global variables my %bak; my $filename = "$ENV{HOME}/$opts{user}." . ($opts{usejournal} ? "$opts{usejournal}." : '') . "jbak"; # setup database my $tied = do_tie(); # do something do_alter_security($opts{alter_security}, $opts{confirm_alter}) if $opts{alter_security}; do_sync() if $opts{sync}; do_dump($opts{dumptype}) if $opts{dumptype}; # clean up before we exit do_untie(); #### helper functions below here ############################################ sub d { # just dump a message to stderr if we're in verbose mode return unless $opts{verbose}; print STDERR shift(@_) . "\n"; } sub do_sync { ### ENTRY DOWNLOADING ### # see if we have any sync data saved my %sync; my $lastsync = $bak{"event:lastsync"}; my $synccount = 0; # get sync data my @usejournal = $opts{usejournal} ? ('usejournal', $opts{usejournal}) : (); while (1) { # contact server for list of items d("do_sync: calling syncitems with lastsync = " . ($lastsync || 'none yet')); my $hash = call_xmlrpc('syncitems', { lastsync => $lastsync, @usejournal }); # push this info, set lastsync foreach my $item (@{$hash->{syncitems} || []}) { next unless $item->{item} =~ /L-(\d+)/; $synccount++; $sync{$1} = [ $item->{action}, $item->{'time'} ]; $lastsync = $item->{'time'} if $item->{'time'} gt $lastsync; $bak{"event:realtime:$1"} = $item->{'time'}; } $bak{'event:lastsync'} = $lastsync; do_flush(); # last if necessary d("do_sync: got $hash->{count} of $hash->{total} syncitems."); last if $hash->{count} == $hash->{total}; } print "$synccount total new and/or updated entries.\n"; $bak{'event:lastsync'} = $lastsync; # helper sub my $realtime = sub { my $id = shift; return $sync{$id}->[1] if @{$sync{$id} || []}; return $bak{"event:realtime:$id"}; }; # get list of ids so far my %eventids = ( map { $_, 1 } split(',', $bak{"event:ids"}) ); # setup our download hash my $lastgrab = $bak{"event:lastgrab"}; my %data; while (1) { # shortcut to maybe not have to hit getvents last if $lastgrab eq $lastsync; # get newest item we have cached my $count = 0; d("do_sync: calling getevents with lastgrab = " . ($lastgrab || 'none yet')); my $hash = call_xmlrpc('getevents', { selecttype => 'syncitems', lastsync => $lastgrab, ver => 1, lineendings => 'unix', @usejournal, }); # parse incoming data one event at a time foreach my $evt (@{$hash->{events} || []}) { # got an event $count++; $eventids{$evt->{itemid}} = 1; $evt->{realtime} = $realtime->($evt->{itemid}); $lastgrab = $evt->{realtime} if $evt->{realtime} gt $lastgrab; save_event($evt); } $bak{"event:lastgrab"} = $lastgrab; $bak{"event:ids"} = join ',', keys %eventids; do_flush(); # do we all be done here? d("do_sync: got $count items."); last unless $count && $lastgrab; } ### COMMENT DOWNLOADING ### # see if we shouldn't be doing this return if $opts{no_comments}; # first we hit up the server to get a session my $hash = call_xmlrpc('sessiongenerate', { expiration => 'short' }); my $ljsession = $hash->{ljsession}; # downloaded meta data information my %meta; my @userids; # setup our parsing function my $maxid = 0; my $server_max_id = 0; my $server_next_id = 1; my $lasttag = ''; my $meta_handler = sub { # this sub actually processes incoming meta information $lasttag = $_[1]; shift; shift; # remove the Expat object and tag name my %temp = ( @_ ); # take the rest into our humble hash if ($lasttag eq 'comment') { # get some data on a comment $meta{$temp{id}} = { id => $temp{id}, posterid => $temp{posterid}+0, state => $temp{state} || 'A', }; update_comment($meta{$temp{id}}); } elsif ($lasttag eq 'usermap') { # put this data in our usermap $bak{"usermap:$temp{id}"} = $temp{user}; push @userids, $temp{id}; } }; my $meta_closer = sub { # we hit a closing tag so we're not in a tag anymore $lasttag = ''; }; my $meta_content = sub { # if we're in a maxid tag, we want to save that value so we know how much further # we have to go in downloading meta info return unless ($lasttag eq 'maxid') || ($lasttag eq 'nextid'); $server_max_id = $_[1] + 0 if ($lasttag eq 'maxid'); $server_next_id = $_[1] + 0 if ($lasttag eq 'nextid'); }; # hit up the server for metadata while (defined $server_next_id && $server_next_id =~ /^\d+$/) { my $content = do_authed_fetch('comment_meta', $server_next_id, $COMMENTS_FETCH_META, $ljsession); die "Some sort of error fetching metadata from server" unless $content; $server_next_id = undef; # now we want to XML parse this my $parser = new XML::Parser(Handlers => { Start => $meta_handler, Char => $meta_content, End => $meta_closer }); $parser->parse($content); } $bak{"comment:ids"} = join ',', keys %meta; $bak{"usermap:userids"} = join ',', @userids; # setup our handlers for body XML info my $lastid = $bak{"comment:lastid"}+0; my $curid = 0; my @tags; my $body_handler = sub { # this sub actually processes incoming body information $lasttag = $_[1]; push @tags, $lasttag; shift; shift; # remove the Expat object and tag name my %temp = ( @_ ); # take the rest into our humble hash if ($lasttag eq 'comment') { # get some data on a comment $curid = $temp{id}; $meta{$curid}{parentid} = $temp{parentid}+0; $meta{$curid}{jitemid} = $temp{jitemid}+0; # line below commented out because we shouldn't be trying to be clever like this ;p # $lastid = $curid if $curid > $lastid; } }; my $body_closer = sub { # we hit a closing tag so we're not in a tag anymore my $tag = pop @tags; $lasttag = $tags[0]; }; my $body_content = sub { # this grabs data inside of comments: body, subject, date return unless $curid; return unless $lasttag =~ /(?:body|subject|date)/; $meta{$curid}{$lasttag} .= $_[1]; # have to .= it, because the parser will split on punctuation such as an apostrophe # that may or may not be in the data stream, and we won't know until we've already # gotten some data }; # at this point we have a fully regenerated metadata cache and we want to grab a block of comments while (1) { my $content = do_authed_fetch('comment_body', $lastid+1, $COMMENTS_FETCH_BODY, $ljsession); die "Some sort of error fetching body data from server" unless $content; # now we want to XML parse this my $parser = new XML::Parser(Handlers => { Start => $body_handler, Char => $body_content, End => $body_closer }); $parser->parse($content); # now at this point what we have to decide whether we should loop again for more metadata $lastid += $COMMENTS_FETCH_BODY; last unless $lastid < $server_max_id; } # at this point we should have a set of fully formed comments, so let's save everything my $count = 0; foreach my $id (keys %meta) { next unless $meta{$id}{jitemid}; # jitemid == 0 means we didn't get body info on this comment $count++; save_comment($meta{$id}); } print "$count new comments downloaded.\n"; # update our lastid. we want this to always point to the last comment we downloaded, because # comment ids will never go backwards, and we can always count on the next one being > lastid $bak{"comment:lastid"} = $lastid if $count; } # save an event that we get sub save_event { my $data = shift; my $id = $data->{itemid}; # convenience # DO NOT SET REALTIME HERE. It is set by syncitems. foreach (qw(subject anum event eventtime security allowmask poster)) { next unless $data->{$_}; my $tmp = pack('C*', unpack('C*', $data->{$_})); $bak{"event:$_:$id"} = $tmp; } my @props; while (my ($p, $v) = each %{$data->{props} || {}}) { $bak{"event:prop:$id:$p"} = $v; push @props, $p; } $bak{"event:proplist:$id"} = join ',', @props; # so we don't have to sort through the whole database } # load up an event given an id sub load_event { my $id = shift; my %hash = ( props => {} ); foreach (qw(subject anum event eventtime security allowmask poster realtime)) { $hash{$_} = $bak{"event:$_:$id"}; } my $proplist = $bak{"event:proplist:$id"}; my @props = split ',', $proplist; foreach (@props) { $hash{props}->{$_} = $bak{"event:prop:$id:$_"}; } $hash{itemid} = $id; return \%hash; } # updates a comment (state and posterid) sub update_comment { my $new = shift; my $old = load_comment($new->{id}); return unless $old && $old->{id}; $old->{$_} = $new->{$_} foreach qw(state posterid); save_comment($old); } # takes in a comment hashref and saves it to the database sub save_comment { my $data = shift; $bak{"comment:state:$data->{id}"} = "$data->{state}:$data->{posterid}:$data->{jitemid}:$data->{parentid}"; foreach (qw(subject body date)) { next unless $data->{$_}; # GDBM doesn't deal with UTF-8, it only wants a string of bytes, so let's do that # by clearing the UTF-8 flag on our input scalars. my $tmp = pack('C*', unpack('C*', $data->{$_})); $bak{"comment:$_:$data->{id}"} = $tmp; } } # load a comment up into a hash and return the hash sub load_comment { my $id = shift; my $state = $bak{"comment:state:$id"}; return {} unless $state; my @data = ($1, $2, $3, $4) if $state =~ /^(\w):(\d+):(\d+):(\d+)$/; my %hash = ( id => $id, subject => $bak{"comment:subject:$id"}, body => $bak{"comment:body:$id"}, date => $bak{"comment:date:$id"}, state => $data[0] || 'D', posterid => $data[1]+0, jitemid => $data[2]+0, parentid => $data[3]+0, ); return \%hash; } sub do_authed_fetch { my ($mode, $startid, $numitems, $sess) = @_; d("do_authed_fetch: mode = $mode, startid = $startid, numitems = $numitems, sess = $sess"); # hit up the server with the specified information and return the raw content my $ua = LWP::UserAgent->new; $ua->agent('JBackup/1.0'); my $authas = $opts{usejournal} ? "&authas=$opts{usejournal}" : ''; my $request = HTTP::Request->new(GET => "http://$opts{server}/export_comments.bml?get=$mode&startid=$startid&numitems=$numitems$authas"); $request->push_header(Cookie => "ljsession=$sess"); my $response = $ua->request($request); return if $response->is_error(); my $xml = $response->content(); return $xml if $xml; # blah d("do_authed_fetch: failure! retrying"); return do_authed_fetch($mode, $startid, $numitems, $sess); } sub do_dump { # raw handler preemption my $dt = shift; return raw_dump() if $dt eq 'raw'; # put our data into a format usable by the dumpers d("do_dump: loading comments"); my %data; my @ids = split ',', $bak{"comment:ids"}; foreach my $id (@ids) { $data{$id} = load_comment($id); } # get the usermap loaded d("do_dump: loading users"); my %usermap; my @userids = split ',', $bak{"usermap:userids"}; foreach my $id (@userids) { $usermap{$id} = $bak{"usermap:$id"}; } # now let's hit up the events d("do_dump: loading events"); my %events; @ids = split ',', $bak{"event:ids"}; foreach my $id (@ids) { $events{$id} = load_event($id); delete $events{$id} if $opts{publiconly} && $events{$id}->{security} && $events{$id}->{security} ne 'public'; } # and now, the wild and crazy 'dump this' handler ... in case you can't tell, it just # dispatches to the appropriate dumper, and if an invalid dump type is specified, it # tells the user they can't do that my $content = ({html => \&dump_html, xml => \&dump_xml}->{$dt} || \&dump_invalid)->(\%data, \%usermap, \%events); if ($opts{file}) { # open file and print open FILE, ">$opts{file}" or die "do_dump: unable to open file: $!\n"; print FILE $content; close FILE; } else { # just throw it out, oh well print $content; } } sub do_alter_security { # raw handler preemption my ($newsec, $confirmed) = @_; # verify new security my ($security, $allowmask); if ($newsec eq 'friends') { ($security, $allowmask) = ('usemask', 1); } elsif ($newsec eq 'private') { ($security, $allowmask) = ('private', 0); } else { # probably a group? load their groups my $groups = call_xmlrpc('getfriendgroups', { ver => 1 }); foreach my $group (@{$groups->{friendgroups} || []}) { if ($group->{name} eq $newsec) { # it's this group, set it up ($security, $allowmask) = ('usemask', 1 << $group->{id}); } } } die "New security must be one of: friends, private, or the name of a group you have.\n" unless defined $security && defined $allowmask; d("do_alter_security: new security = $security ($allowmask)"); # load up the user's events d("do_alter_security: loading events"); my %events; my @ids = split ',', $bak{"event:ids"}; foreach my $id (@ids) { $events{$id} = load_event($id); # delete events that are not public delete $events{$id} if $events{$id}->{security} && $events{$id}->{security} ne 'public'; } # now spit out to the user what we're going to change unless ($confirmed) { foreach my $evt (sort { $a->{eventtime} cmp $b->{eventtime} } values %events) { my ($subj, $time) = ($evt->{subject} || '(no subject)', $evt->{eventtime}); my $ditemid = $evt->{itemid} * 256 + $evt->{anum}; $subj = substr($subj, 0, 40); printf "\%-45s\%s\n", $subj, "http://$opts{server}/users/$opts{linkuser}/$ditemid.html"; } return; } # if we're confirmed we get here and we should handle uploading the changed entries foreach my $evt (sort { $a->{eventtime} cmp $b->{eventtime} } values %events) { # make SURE we have event text (otherwise we delete their entry) die "FATAL: no event text for event itemid $evt->{itemid}!\n" unless $evt->{event}; # break up the event time my ($year, $mon, $day, $hour, $min); if ($evt->{eventtime} =~ /^(\d\d\d\d)-(\d\d)-(\d\d) (\d\d):(\d\d):\d\d$/) { ($year, $mon, $day, $hour, $min) = ($1, $2, $3, $4, $5); } else { # if we have no time, this is also fatal die "FATAL: $evt->{eventtime} does not match expected eventtime format.\n"; } # now call for the update my $hash = call_xmlrpc('editevent', { ver => 1, itemid => $evt->{itemid}, event => $evt->{event}, subject => $evt->{subject}, security => $security, allowmask => $allowmask, props => $evt->{props}, # hashref usejournal => $evt->{linkuser}, year => $year, mon => $mon, day => $day, hour => $hour, min => $min, }); # see what we got back and make sure it's kosher die "FATAL: Server sent back ($hash->{itemid}, $hash->{anum}) but expected ($evt->{itemid}, $evt->{anum}).\n" if $hash->{itemid} != $evt->{itemid} || $hash->{anum} != $evt->{anum}; # print success my $ditemid = $hash->{itemid} * 256 + $hash->{anum}; printf "\%s\n%-35s\%s\n\n", ($evt->{subject} || "(no subject)"), "public -> $security ($allowmask)", "http://$opts{server}/users/$opts{linkuser}/$ditemid.html"; } # tell user to run --sync print "WARNING: you should now run jbackup.pl again with the --sync\n" . "option, AFTER making a backup copy of your current jbak GDBM\n" . "file. That way, if anything got messed up, you still have your journal.\n"; } sub dump_invalid { d("dump_invalid: invalid dump type"); return "Invalid dump type specified. Valid values are xml, html, and raw.\n"; } # makes an array of trees of comments so they can easily be parsed in dumpers sub make_tree { d("make_tree: calculating"); my $comments = shift; my %jitems; my %children; while (my ($id, $data) = each %$comments) { if ($data->{parentid}) { # not a top level comment push @{$children{$data->{parentid}}}, $id; } else { # top level comment, so add it to the list push @{$jitems{$data->{jitemid}}}, $id; } } # now we want to sort all the comments by date while (my ($id, $list) = each %children) { $children{$id} = [ sort { $comments->{$a}{date} cmp $comments->{$b}{date} } @$list ]; } while (my ($id, $list) = each %jitems) { $jitems{$id} = [ sort { $comments->{$a}{date} cmp $comments->{$b}{date} } @$list ]; } # now we have all the location information necessary to construct our array my $creator; $creator = sub { my ($jitemid, $jtalkid) = @_; # two modes: first creates hashref for an entry, second an arrayref of comments if ($jitemid) { my @temp; foreach my $id (@{$jitems{$jitemid}}) { # we get comment ids here push @temp, $creator->(0, $id); } return \@temp; } elsif ($jtalkid) { my $hash = $comments->{$jtalkid}; push @{$hash->{children}}, $creator->(0, $_) foreach @{$children{$jtalkid} || []}; return $hash; } }; # create the result array to send back my %res; $res{$_} = $creator->($_, 0) foreach keys %jitems; # all done return \%res; } sub prune_nonvisible { # prunes out nonvisible trunks of the passed comment tree. a nonvisible trunk is defined # as a part of the comment tree that has no visible children. this could mean they're all # deleted, or perhaps they're all screened and we're hiding private data. however, note # that we show normally hidden things if a visible comment is further down the trunk, but # we want to show as little as possible, so we prune out most things. my $stem = shift; my $anyvis = 0; # any visible? # hit up each child my @list; foreach my $data (@{$stem->{children} || []}) { $data = prune_nonvisible($data); if ($data && %$data) { $anyvis = 1; push @list, $data; } } $stem->{children} = \@list; # now hop back and undefine this stem if necessary. we undefine if we have no visible # children and we are also not visible. $stem = undef if !$anyvis && $stem->{state} ne 'A'; return $stem; } sub dump_html { my ($comments, $users, $events) = @_; d("dump_html: dumping."); # dumper my $ret = ""; my $cdumper; $cdumper = sub { my ($ary, $link, $anum, $level) = @_; foreach my $data (@{$ary || []}) { # prune out paths that we shouldn't see $data = prune_nonvisible($data); next unless $data; # we have something to dump, so let's get to it $ret .= "
\n"; my $col = ($level % 2) ? '#bbb' : '#ddd'; $ret .= "
\n"; if ($data->{state} eq 'D') { $ret .= "(deleted comment)"; } elsif ($data->{state} eq 'S' && $opts{publiconly}) { $ret .= "(screened comment)"; } else { my $ditemid = $data->{id} * 256 + $anum; my $commentlink = "$link?thread=$ditemid#t$ditemid"; $ret .= $data->{posterid} ? "Comment by $users->{$data->{posterid}} " : "Anonymous comment "; $ret .= "on $data->{date}
\n"; $data->{subject} = $opts{clean} ? clean_subject($data->{subject}) : ehtml($data->{subject}); $ret .= "Subject: $data->{subject}
\n" if $data->{subject}; $data->{body} = $opts{clean} ? clean_comment($data->{body}) : ehtml($data->{body}); $ret .= $data->{body} . "\n
"; my $replylink = "$link?replyto=$ditemid"; $ret .= "(reply)\n"; } $ret .= "
\n"; # now hit up their children $cdumper->($data->{children}, $link, $anum, $level+1); $ret .= "
\n"; } }; # iterate through all entries, sorted by date my $tree = make_tree($comments); my $maxcount = scalar keys %$events; my $count = 0; foreach my $evt (sort { $a->{eventtime} cmp $b->{eventtime} } values %{$events || {}}) { $ret .= "
\n"; my $itemid = $evt->{itemid} * 256 + $evt->{anum}; my $link = "http://$opts{server}/users/$opts{linkuser}/$itemid.html"; $evt->{subject} = $opts{clean} ? clean_subject($evt->{subject}) : ehtml($evt->{subject}); $ret .= "$evt->{subject}" if $evt->{subject}; my $altposter = $evt->{poster} ? " (posted by $evt->{poster})" : ""; $ret .= "$altposter
\n"; $ret .= "$evt->{eventtime}

\n"; $evt->{event} = $opts{clean} ? clean_event($evt->{event}) : ehtml($evt->{event}); $ret .= "$evt->{event}
"; $ret .= "(reply)
\n"; $cdumper->($tree->{$evt->{itemid}}, $link, $evt->{anum}); # dump comments $ret .= "
\n"; $count++; unless ($count % 100) { my $str = sprintf "%.2f%% ...", ($count / $maxcount * 100); d($str); } } $ret .= ""; d("100.00% ..."); # just to make it look polished d("dump_html: done."); return $ret; } sub dump_xml { my ($comments, $users, $events) = @_; d("dump_xml: dumping."); # comment dumper my $ret; my $cdumper; $cdumper = sub { my ($ary, $level) = @_; my $res; foreach my $data (@{$ary || []}) { # prune out paths that we shouldn't see $data = prune_nonvisible($data); next unless $data; # we have something to dump, so let's get to it $res .= "\t\t\t\t{posterid}; $res .= " parentid='$data->{parentid}'" if $data->{parentid}; $res .= " state='$data->{state}'" if $data->{state} ne 'A'; $res .= ">\n"; $res .= "\t\t\t\t\t$data->{date}\n"; unless ($data->{state} eq 'D' || $data->{state} eq 'S' && $opts{publiconly}) { # spit out subject/body info foreach (qw(subject body)) { $data->{$_} = exml($data->{$_}); $res .= "\t\t\t\t\t<$_>$data->{$_}\n" if $data->{$_}; } } # now hit up their children my $sc = $cdumper->($data->{children}, $level+1); $res .= "\t\t\t\t\t\n$sc\t\t\t\t\t\n" if $sc; $res .= "\t\t\t\t\n"; } return $res; }; # dump xml formatted comments $ret .= "\n"; $ret .= "\n\t\n"; # now start iterating my $tree = make_tree($comments); my $maxcount = scalar keys %$events; my $count = 0; foreach my $evt (sort { $a->{eventtime} cmp $b->{eventtime} } values %{$events || {}}) { my $ditemid = $evt->{itemid} * 256 + $evt->{anum}; $ret .= "\t\t{security} && $evt->{security} ne 'public'; $ret .= " allowmask='$evt->{allowmask}'" if $evt->{allowmask}; $ret .= " poster='$evt->{poster}'" if $evt->{poster}; $ret .= ">\n"; foreach (qw(subject event)) { $evt->{$_} = exml($evt->{$_}); $ret .= "\t\t\t<$_>$evt->{$_}\n" if $evt->{$_}; } $ret .= "\t\t\t$evt->{eventtime}\n"; $ret .= "\t\t\t$evt->{realtime}\n"; my $p; while (my ($k, $v) = each %{$evt->{props} || {}}) { $k = exml($k); $v = exml($v); $p .= "\t\t\t\t\n"; } $ret .= "\t\t\t\n$p\t\t\t\n" if $p; my $c = $cdumper->($tree->{$evt->{itemid}}); # dump comments $ret .= "\t\t\t\n$c\t\t\t\n" if $c; $ret .= "\t\t\n"; $count++; unless ($count % 100) { my $str = sprintf "%.2f%% ...", ($count / $maxcount * 100); d($str); } } d("100.00% ..."); # spit and polish # close out, we're done $ret .= "\t\n\n"; d("dump_xml: done."); return $ret; } sub xmlrpc_call_helper { # helper function that makes life easier on folks that call xmlrpc stuff. this handles # running the actual request and checking for errors, as well as handling the cases where # we hit a problem and need to do something about it. (abort or retry.) my ($xmlrpc, $method, $req, $mode, $hash) = @_; d("\t\txmlrpc_call_helper: $method"); my $res; eval { $res = $xmlrpc->call($method, $req); }; if ($res && $res->fault) { # fatal error, so don't use d() as we want to print even in case of non-verbosity print STDERR "xmlrpc_call_helper error:\n\tString: " . $res->faultstring . "\n\tCode: " . $res->faultcode . "\n"; do_abort(); exit 1; } unless ($res) { # when server times out d("\t\txmlrpc_call_helper: timeout... retrying."); return call_xmlrpc($mode, $hash); } return $res->result; } sub call_xmlrpc { # also a way to help people do xmlrpc stuff easily. this method actually does the # challenge response stuff so we never send the user's password or md5 digest over # the intarweb. of course, we say nothing about the user's password security anyway... my ($mode, $hash) = @_; $hash ||= {}; my $xmlrpc = new XMLRPC::Lite; $xmlrpc->proxy("http://$opts{server}/interface/xmlrpc"); my $chal; while (!$chal) { my $get_chal = xmlrpc_call_helper($xmlrpc, 'LJ.XMLRPC.getchallenge'); $chal = $get_chal->{'challenge'}; } #d("\tcall_xmlrpc: challenge obtained: $chal"); my $response = md5_hex($chal . ($opts{md5password} ? $opts{md5password} : md5_hex($opts{password}))); #d("\tcall_xmlrpc: calling LJ.XMLRPC.$mode"); my $res = xmlrpc_call_helper($xmlrpc, "LJ.XMLRPC.$mode", { 'username' => $opts{user}, 'auth_method' => 'challenge', 'auth_challenge' => $chal, 'auth_response' => $response, %$hash, # interpolate $hash into our hash here...isn't Perl great? }, $mode, $hash); return $res; } sub do_flush { # simply flush ourselves d('do_flush: flushing database'); $tied->sync(); } sub do_tie { # try to open the database for access d("do_tie: tying database"); my $x = tie %bak, 'GDBM_File', $filename, &GDBM_WRCREAT, 0600 or die "Could not open/tie $filename: $!\n"; return $x; }; sub do_untie { # close our database. d("do_untie: untying database"); return untie %bak; }; sub do_abort { # hard abort. save our database and just exit right back to the OS. print STDERR "Aborted.\n"; do_untie(); exit 1; }; sub raw_dump { # dump out the raw GDBM data while (my ($k, $v) = each %bak) { print "$k = $v\n"; } } sub exml { # stolen from ljlib.pl, LJ::exml # fast path for the commmon case: return $_[0] unless $_[0] =~ /[&\"\'<>\x00-\x08\x0B\x0C\x0E-\x1F]/; # what are those character ranges? XML 1.0 allows: # #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF] my $a = shift; $a =~ s/\&/&/g; $a =~ s/\"/"/g; $a =~ s/\'/'/g; $a =~ s//>/g; $a =~ s/[\x00-\x08\x0B\x0C\x0E-\x1F]//g; return $a; } sub ehtml { # also stolen from ljlib.pl, LJ::ehtml # fast path for the commmon case: return $_[0] unless $_[0] =~ /[&\"\'<>]/; # this is faster than doing one substitution with a map: my $a = $_[0]; $a =~ s/\&/&/g; $a =~ s/\"/"/g; $a =~ s/\'/&\#39;/g; $a =~ s//>/g; return $a; } # yeah, the cleaners are pretty sad right now. the idea is that perhaps the LJ HTML cleaner can # be invoked if the user typed the --clean option, it just hasn't been coded in yet. for now, if # they specify --clean, we will just replace poll tags with links to the poll, and not do much else. sub clean_event { my $input = shift; $input =~ s!!View poll.!g; return $input; } sub clean_comment { my $input = shift; return $input; } sub clean_subject { my $input = shift; return $input; }