| 1 | #!/usr/bin/perl -w |
|---|
| 2 | |
|---|
| 3 | use strict; |
|---|
| 4 | use MT; |
|---|
| 5 | use MT::Author; |
|---|
| 6 | use MT::Entry; |
|---|
| 7 | use MT::Comment; |
|---|
| 8 | use MT::Trackback; |
|---|
| 9 | use MT::TBPing; |
|---|
| 10 | |
|---|
| 11 | package MT; |
|---|
| 12 | |
|---|
| 13 | my %param; |
|---|
| 14 | sub param { |
|---|
| 15 | my $app = shift; |
|---|
| 16 | my $p = shift; |
|---|
| 17 | @_ ? $param{$p} = shift : $param{$p}; |
|---|
| 18 | } |
|---|
| 19 | |
|---|
| 20 | package main; |
|---|
| 21 | |
|---|
| 22 | use Getopt::Long; |
|---|
| 23 | use Pod::Usage; |
|---|
| 24 | my $comments = 1; |
|---|
| 25 | my $trackbacks = 1; |
|---|
| 26 | GetOptions("comments!" => \$comments, |
|---|
| 27 | "trackbacks!" => \$trackbacks, |
|---|
| 28 | "help|?" => \my($help), |
|---|
| 29 | "blog_id=i" => \my($blog_id), |
|---|
| 30 | "offset=i" => \my($offset), |
|---|
| 31 | "limit=i" => \my($limit), |
|---|
| 32 | "config=s" => \my($cfg)) or pod2usage(2); |
|---|
| 33 | pod2usage(1) if $help; |
|---|
| 34 | ($comments || $trackbacks) or pod2usage(2); |
|---|
| 35 | |
|---|
| 36 | my $app = new MT(($cfg ? ('Config' => $cfg) : ())); |
|---|
| 37 | $app->param('blog_id', $blog_id) if $blog_id; |
|---|
| 38 | $app->param('limit', $limit) if $limit; |
|---|
| 39 | $app->param('offset', $offset) if $offset; |
|---|
| 40 | if ($comments) { |
|---|
| 41 | $app->param('_type', 'comment'); |
|---|
| 42 | find_junk($app); |
|---|
| 43 | } |
|---|
| 44 | if ($trackbacks) { |
|---|
| 45 | $app->param('_type', 'tb'); |
|---|
| 46 | find_junk($app); |
|---|
| 47 | } |
|---|
| 48 | |
|---|
| 49 | sub _cb_notjunktest_filter { |
|---|
| 50 | my ($eh, $obj) = @_; |
|---|
| 51 | require MT::JunkFilter; |
|---|
| 52 | MT::JunkFilter->filter($obj); |
|---|
| 53 | $obj->is_junk == 1 ? 0 : 1; |
|---|
| 54 | } |
|---|
| 55 | |
|---|
| 56 | sub find_junk { |
|---|
| 57 | my $app = MT->instance; |
|---|
| 58 | my $blog_id = $app->param('blog_id'); |
|---|
| 59 | my $offset = $app->param('offset') || 0; |
|---|
| 60 | my $limit = $app->param('limit'); |
|---|
| 61 | my $iter; |
|---|
| 62 | my $type = $app->param('_type'); |
|---|
| 63 | if ($type eq 'comment') { |
|---|
| 64 | $iter = MT::Comment->load_iter({ |
|---|
| 65 | ($blog_id ? (blog_id => $blog_id) : ()), |
|---|
| 66 | junk_status => 0 }, |
|---|
| 67 | {'sort' => 'created_on', |
|---|
| 68 | 'direction' => 'descend', |
|---|
| 69 | offset => $offset }); |
|---|
| 70 | } elsif ($type eq 'tb') { |
|---|
| 71 | $iter = MT::TBPing->load_iter({ |
|---|
| 72 | ($blog_id ? (blog_id => $blog_id) : ()), |
|---|
| 73 | junk_status => 0 }, |
|---|
| 74 | {'sort' => 'created_on', |
|---|
| 75 | 'direction' => 'descend', |
|---|
| 76 | offset => $offset }); |
|---|
| 77 | } else { |
|---|
| 78 | return; |
|---|
| 79 | } |
|---|
| 80 | my $i; |
|---|
| 81 | MT->_register_core_callbacks({NotJunkTest => |
|---|
| 82 | \&_cb_notjunktest_filter}); |
|---|
| 83 | |
|---|
| 84 | print "Scanning " . ($type eq 'comment' ? "Comments" : "TrackBacks") . "...\n"; |
|---|
| 85 | my $count = 0; |
|---|
| 86 | my $junk = 0; |
|---|
| 87 | while (my $obj = $iter->()) { |
|---|
| 88 | last if $limit && $count == $limit; |
|---|
| 89 | $count++; |
|---|
| 90 | my $subject = $obj->clone; |
|---|
| 91 | if (!MT->run_callbacks('NotJunkTest', $subject)) { |
|---|
| 92 | if ((my $err = MT->errstr) =~ m/\w/) { |
|---|
| 93 | print STDERR "** error from callback: " . $err . "\n" if $err; |
|---|
| 94 | MT->error(undef); |
|---|
| 95 | } |
|---|
| 96 | display_junk($subject); |
|---|
| 97 | $junk++; |
|---|
| 98 | } |
|---|
| 99 | } |
|---|
| 100 | |
|---|
| 101 | print "\nScanned: $count records\n"; |
|---|
| 102 | print " Found: $junk junk items\n\n"; |
|---|
| 103 | } |
|---|
| 104 | |
|---|
| 105 | sub display_junk { |
|---|
| 106 | my ($obj) = @_; |
|---|
| 107 | |
|---|
| 108 | my $entry; |
|---|
| 109 | my $cat; |
|---|
| 110 | my $type; |
|---|
| 111 | if (ref $obj eq 'MT::Comment') { |
|---|
| 112 | $entry = MT::Entry->load($obj->entry_id); |
|---|
| 113 | $type = 'comment'; |
|---|
| 114 | } else { |
|---|
| 115 | $type = 'trackback'; |
|---|
| 116 | my $tb = MT::Trackback->load($obj->tb_id); |
|---|
| 117 | if ($tb->entry_id) { |
|---|
| 118 | $entry = MT::Entry->load($tb->entry_id); |
|---|
| 119 | } elsif ($tb->category_id) { |
|---|
| 120 | $cat = MT::Category->load($tb->category_id); |
|---|
| 121 | } |
|---|
| 122 | } |
|---|
| 123 | |
|---|
| 124 | print "Junk $type found -- ID " . $obj->id . "\n"; |
|---|
| 125 | print "\tTarget: "; |
|---|
| 126 | if ($entry) { |
|---|
| 127 | print "Entry \"". $entry->title . "\" (" . $entry->id . ")\n"; |
|---|
| 128 | } elsif ($cat) { |
|---|
| 129 | print "Category \"" . $cat->label . "\" (" . $cat->id . ")\n"; |
|---|
| 130 | } |
|---|
| 131 | print "\t Log:\n"; |
|---|
| 132 | my $log = $obj->junk_log; |
|---|
| 133 | $log = "\t\t" . ($log || ''); |
|---|
| 134 | $log =~ s/\n/\n\t\t/gs; |
|---|
| 135 | $log =~ s/\n+$//s; |
|---|
| 136 | print $log . "\n"; |
|---|
| 137 | print "\t Score: " . $obj->junk_score . "\n"; |
|---|
| 138 | print "\tContent:\n"; |
|---|
| 139 | if ($type eq 'comment') { |
|---|
| 140 | print "\t\t Name: " . $obj->author . "\n"; |
|---|
| 141 | print "\t\t URL: " . $obj->url . "\n"; |
|---|
| 142 | print "\t\t E-mail: " . $obj->email . "\n"; |
|---|
| 143 | print "\t\t IP: " . $obj->ip . "\n"; |
|---|
| 144 | print "\t\t Authn: " . ($obj->commenter_id ? 'Yes: ' . (MT::Author->load($obj->commenter_id)->name) : 'No') . "\n"; |
|---|
| 145 | my $text = $obj->text; |
|---|
| 146 | $text = "\t\t\t" . $text; |
|---|
| 147 | $text =~ s/\n/\n\t\t\t/sg; |
|---|
| 148 | $text =~ s/\n+$//s; |
|---|
| 149 | print "\t\t Text:\n$text\n"; |
|---|
| 150 | } elsif ($type eq 'trackback') { |
|---|
| 151 | print "\t\t Blog: " . $obj->blog_name . "\n"; |
|---|
| 152 | print "\t\t URL: " . $obj->source_url . "\n"; |
|---|
| 153 | print "\t\t E-mail: " . $obj->email . "\n"; |
|---|
| 154 | print "\t\t IP: " . $obj->ip . "\n"; |
|---|
| 155 | print "\t\t Title: " . $obj->title . "\n"; |
|---|
| 156 | my $text = $obj->excerpt; |
|---|
| 157 | $text = "\t\t\t" . $text; |
|---|
| 158 | $text =~ s/\n/\n\t\t\t/sg; |
|---|
| 159 | $text =~ s/\n+$//s; |
|---|
| 160 | print "\t\tExcerpt:\n$text\n"; |
|---|
| 161 | } |
|---|
| 162 | print "\n"; |
|---|
| 163 | } |
|---|
| 164 | |
|---|
| 165 | __END__ |
|---|
| 166 | |
|---|
| 167 | =head1 NAME |
|---|
| 168 | |
|---|
| 169 | find-junk |
|---|
| 170 | |
|---|
| 171 | =head1 SYNOPSIS |
|---|
| 172 | |
|---|
| 173 | find-junk |
|---|
| 174 | --nocomments Prevents filtering comments |
|---|
| 175 | --notrackbacks Prevents filtering trackback pings |
|---|
| 176 | --blog_id <id> Limit scan to a particular blog |
|---|
| 177 | --offset <n> Specify number of records to skip |
|---|
| 178 | --limit <n> Specify number of records to process |
|---|
| 179 | --config <cfg> Specify MT configuration path and file |
|---|
| 180 | --help | -? Usage information |
|---|
| 181 | |
|---|
| 182 | By default, all comments and trackbacks are scanned (unless the comment |
|---|
| 183 | or trackback record has been intentionally identified by the user as |
|---|
| 184 | "not junk" or "is junk"). |
|---|