| 1 | # Movable Type (r) Open Source (C) 2006-2008 Six Apart, Ltd. |
|---|
| 2 | # This program is distributed under the terms of the |
|---|
| 3 | # GNU General Public License, version 2. |
|---|
| 4 | # |
|---|
| 5 | # $Id$ |
|---|
| 6 | |
|---|
| 7 | # Original copyright (c) 2004-2006, Brad Choate and Tobias Hoellrich |
|---|
| 8 | |
|---|
| 9 | package spamlookup; |
|---|
| 10 | |
|---|
| 11 | use strict; |
|---|
| 12 | use MT::JunkFilter qw(ABSTAIN); |
|---|
| 13 | |
|---|
| 14 | sub tborigin { |
|---|
| 15 | my $plugin = shift; |
|---|
| 16 | my ($obj) = @_; |
|---|
| 17 | |
|---|
| 18 | # only filter TrackBack pings... |
|---|
| 19 | return (ABSTAIN) unless UNIVERSAL::isa($obj, 'MT::TBPing'); |
|---|
| 20 | |
|---|
| 21 | my $domain = extract_domains($obj->source_url, 1); |
|---|
| 22 | |
|---|
| 23 | my $config = $plugin->get_config_hash('blog:' . $obj->blog_id); # config($plugin); |
|---|
| 24 | my $pingip = $obj->ip; |
|---|
| 25 | |
|---|
| 26 | if (domain_or_ip_in_whitelist($domain, $pingip, $config->{whitelist})) { |
|---|
| 27 | return (ABSTAIN); |
|---|
| 28 | } |
|---|
| 29 | |
|---|
| 30 | my $score = int($config->{tborigin_weight}) || 1; |
|---|
| 31 | my $domainip = checkdns($domain); |
|---|
| 32 | if (!$domainip) { |
|---|
| 33 | return (-1 * $score, MT->translate("Failed to resolve IP address for source URL [_1]", $obj->source_url)); |
|---|
| 34 | } |
|---|
| 35 | |
|---|
| 36 | my @domainip = split /\./, $domainip; |
|---|
| 37 | my @pingip = split /\./, $pingip; |
|---|
| 38 | |
|---|
| 39 | my $distance = 4; |
|---|
| 40 | foreach (0..3) { |
|---|
| 41 | if ($domainip[$_] == $pingip[$_]) { |
|---|
| 42 | $distance--; |
|---|
| 43 | } else { |
|---|
| 44 | last; |
|---|
| 45 | } |
|---|
| 46 | } |
|---|
| 47 | |
|---|
| 48 | return (ABSTAIN) if $distance < 3; |
|---|
| 49 | |
|---|
| 50 | # reverse lookup ip address if we can. if it matches to the |
|---|
| 51 | # domain of the source url, then ABSTAIN. |
|---|
| 52 | |
|---|
| 53 | my $hostname = reversedns($pingip); |
|---|
| 54 | if ($hostname) { |
|---|
| 55 | if (domain_or_ip_in_whitelist($hostname, undef, $config->{whitelist})) { |
|---|
| 56 | return (ABSTAIN); |
|---|
| 57 | } |
|---|
| 58 | $domain = lc $domain; |
|---|
| 59 | $hostname = lc $hostname; |
|---|
| 60 | if ($domain =~ m/\Q$hostname\E$/) { |
|---|
| 61 | return (ABSTAIN); |
|---|
| 62 | } |
|---|
| 63 | } |
|---|
| 64 | |
|---|
| 65 | # check distance of sender's IP. if it is too far from the |
|---|
| 66 | # source url domain, moderate/junk it. |
|---|
| 67 | if ($config->{tborigin_mode} == 2) { |
|---|
| 68 | $obj->moderate; |
|---|
| 69 | return (0, MT->translate("Moderating: Domain IP does not match ping IP for source URL [_1]; domain IP: [_2]; ping IP: [_3]", $obj->source_url, $domainip, $pingip)); |
|---|
| 70 | } |
|---|
| 71 | |
|---|
| 72 | if ($config->{tborigin_mode} == 1) { |
|---|
| 73 | return (-1 * $score, |
|---|
| 74 | MT->translate("Domain IP does not match ping IP for source URL [_1]; domain IP: [_2]; ping IP: [_3]", $obj->source_url, $domainip, $pingip)); |
|---|
| 75 | } |
|---|
| 76 | |
|---|
| 77 | return (ABSTAIN); |
|---|
| 78 | } |
|---|
| 79 | |
|---|
| 80 | sub urls { |
|---|
| 81 | my $plugin = shift; |
|---|
| 82 | my ($obj) = @_; |
|---|
| 83 | |
|---|
| 84 | my $config = $plugin->get_config_hash('blog:' . $obj->blog_id); # config($plugin); |
|---|
| 85 | |
|---|
| 86 | # URL tests... |
|---|
| 87 | |
|---|
| 88 | # count URLs... |
|---|
| 89 | my $nurls = 0; |
|---|
| 90 | my $text = $obj->all_text; |
|---|
| 91 | my @domains = extract_domains($text, 0, \$nurls); |
|---|
| 92 | |
|---|
| 93 | if ($config->{urlcount_none_mode}) { |
|---|
| 94 | return (int($config->{urlcount_none_weight}) || 1, |
|---|
| 95 | MT->translate("No links are present in feedback")) unless $nurls; |
|---|
| 96 | } |
|---|
| 97 | |
|---|
| 98 | my $domain; |
|---|
| 99 | if (UNIVERSAL::isa($obj, 'MT::Comment')) { |
|---|
| 100 | $domain = extract_domains($obj->url, 1); |
|---|
| 101 | } elsif (UNIVERSAL::isa($obj, 'MT::TBPing')) { |
|---|
| 102 | $domain = extract_domains($obj->source_url, 1); |
|---|
| 103 | } |
|---|
| 104 | |
|---|
| 105 | my $pingip = $obj->ip; |
|---|
| 106 | |
|---|
| 107 | if (domain_or_ip_in_whitelist($nurls == 1 ? $domain : undef, $pingip, $config->{whitelist})) { |
|---|
| 108 | return (ABSTAIN); |
|---|
| 109 | } |
|---|
| 110 | |
|---|
| 111 | if ($config->{urlcount_junk_mode}) { |
|---|
| 112 | if ($nurls >= $config->{urlcount_junk_limit}) { |
|---|
| 113 | return (-1 * (int($config->{urlcount_junk_weight}) || 1), |
|---|
| 114 | MT->translate("Number of links exceed junk limit ([_1])", $config->{urlcount_junk_limit})); |
|---|
| 115 | } |
|---|
| 116 | } |
|---|
| 117 | |
|---|
| 118 | if ($config->{urlcount_moderate_mode}) { |
|---|
| 119 | if ($nurls >= $config->{urlcount_moderate_limit}) { |
|---|
| 120 | $obj->moderate; |
|---|
| 121 | return (0, |
|---|
| 122 | MT->translate("Number of links exceed moderation limit ([_1])", $config->{urlcount_moderate_limit})); |
|---|
| 123 | } |
|---|
| 124 | } |
|---|
| 125 | return (ABSTAIN); |
|---|
| 126 | } |
|---|
| 127 | |
|---|
| 128 | sub link_memory { |
|---|
| 129 | my $plugin = shift; |
|---|
| 130 | my ($obj) = @_; |
|---|
| 131 | |
|---|
| 132 | my $config = $plugin->get_config_hash('blog:'.$obj->blog_id); # config($plugin); |
|---|
| 133 | |
|---|
| 134 | if ($config->{priorurl_mode}) { |
|---|
| 135 | # this lookup is only effective on SQL databases since the |
|---|
| 136 | # comment_url column is unindexed. |
|---|
| 137 | if (!UNIVERSAL::isa(MT::Object->driver, 'MT::ObjectDriver::DBM')) { |
|---|
| 138 | if (UNIVERSAL::isa($obj, 'MT::Comment')) { |
|---|
| 139 | my @textdomains = extract_domains($obj->text); |
|---|
| 140 | if (!@textdomains) { |
|---|
| 141 | my $url = $obj->url; |
|---|
| 142 | $url =~ s/^\s+|\s+$//gs; |
|---|
| 143 | if ($url =~ m!https?://\w+!) { # valid url requirement... |
|---|
| 144 | require MT::Comment; |
|---|
| 145 | my $terms = { url => $url, |
|---|
| 146 | blog_id => $obj->blog_id, |
|---|
| 147 | visible => 1 }; |
|---|
| 148 | my $args; |
|---|
| 149 | if (my $grey = $config->{priorurl_greyperiod}) { |
|---|
| 150 | my $ts = time; |
|---|
| 151 | $ts -= $grey * (24 * 60 * 60); |
|---|
| 152 | require MT::Util; |
|---|
| 153 | $ts = MT::Util::epoch2ts($obj->blog_id, $ts); |
|---|
| 154 | $terms->{created_on} = [undef, $ts]; |
|---|
| 155 | $args->{range_incl}{created_on} = 1; |
|---|
| 156 | } |
|---|
| 157 | my $c = MT::Comment->load($terms, $args); |
|---|
| 158 | if ($c) { |
|---|
| 159 | return ((int($config->{priorurl_weight}) || 1), |
|---|
| 160 | MT->translate("Link was previously published (comment id [_1]).", $c->id)); |
|---|
| 161 | } |
|---|
| 162 | } |
|---|
| 163 | } |
|---|
| 164 | } elsif (UNIVERSAL::isa($obj, 'MT::TBPing')) { |
|---|
| 165 | my $url = $obj->source_url; |
|---|
| 166 | $url =~ s/^\s+|\s+$//gs; |
|---|
| 167 | my $terms = { source_url => $url, |
|---|
| 168 | blog_id => $obj->blog_id, |
|---|
| 169 | visible => 1 }; |
|---|
| 170 | my $args; |
|---|
| 171 | if ($config->{priorurl_greyperiod_mode}) { |
|---|
| 172 | my $grey = $config->{priorurl_greyperiod}; |
|---|
| 173 | my $ts = time; |
|---|
| 174 | $ts -= $grey * (24 * 60 * 60); |
|---|
| 175 | require MT::Util; |
|---|
| 176 | $ts = MT::Util::epoch2ts($obj->blog_id, $ts); |
|---|
| 177 | $terms->{created_on} = [undef, $ts]; |
|---|
| 178 | $args->{range_incl}{created_on} = 1; |
|---|
| 179 | } |
|---|
| 180 | my $t = MT::TBPing->load($terms, $args); |
|---|
| 181 | if ($t) { |
|---|
| 182 | return ((int($config->{priorurl_weight}) || 1), |
|---|
| 183 | MT->translate("Link was previously published (TrackBack id [_1]).", $t->id)); |
|---|
| 184 | } |
|---|
| 185 | } |
|---|
| 186 | } |
|---|
| 187 | } |
|---|
| 188 | return (ABSTAIN); |
|---|
| 189 | } |
|---|
| 190 | |
|---|
| 191 | sub email_memory { |
|---|
| 192 | my $plugin = shift; |
|---|
| 193 | my ($obj) = @_; |
|---|
| 194 | |
|---|
| 195 | my $config = $plugin->get_config_hash('blog:'. $obj->blog_id); |
|---|
| 196 | |
|---|
| 197 | if ($config->{prioremail_mode}) { |
|---|
| 198 | # this lookup is only effective on SQL databases since the |
|---|
| 199 | # comment_url collumn is unindexed. |
|---|
| 200 | if (UNIVERSAL::isa($obj, 'MT::Comment')) { |
|---|
| 201 | my $email = $obj->email; |
|---|
| 202 | $email =~ s/^\s+|\s+$//gs; |
|---|
| 203 | if ($email =~ m/\w+@\w+/) { |
|---|
| 204 | require MT::Comment; |
|---|
| 205 | my $terms = { email => $email, |
|---|
| 206 | blog_id => $obj->blog_id, |
|---|
| 207 | visible => 1 }; |
|---|
| 208 | my $args; |
|---|
| 209 | if ($config->{prioremail_greyperiod}) { |
|---|
| 210 | my $grey = $config->{prioremail_greyperiod}; |
|---|
| 211 | my $ts = time; |
|---|
| 212 | $ts -= $grey * (24 * 60 * 60); |
|---|
| 213 | require MT::Util; |
|---|
| 214 | $ts = MT::Util::epoch2ts($obj->blog_id, $ts); |
|---|
| 215 | $terms->{created_on} = [undef, $ts]; |
|---|
| 216 | $args->{range_incl}{created_on} = 1; |
|---|
| 217 | } |
|---|
| 218 | my $c = MT::Comment->load($terms, $args); |
|---|
| 219 | if ($c) { |
|---|
| 220 | return ((int($config->{prioremail_weight}) || 1), |
|---|
| 221 | MT->translate("E-mail was previously published (comment id [_1]).", $c->id)); |
|---|
| 222 | } |
|---|
| 223 | } |
|---|
| 224 | } |
|---|
| 225 | } |
|---|
| 226 | |
|---|
| 227 | return (ABSTAIN); |
|---|
| 228 | } |
|---|
| 229 | |
|---|
| 230 | sub wordfilter { |
|---|
| 231 | my $plugin = shift; |
|---|
| 232 | my ($obj) = @_; |
|---|
| 233 | |
|---|
| 234 | my $config = $plugin->get_config_hash('blog:'. $obj->blog_id); |
|---|
| 235 | |
|---|
| 236 | my $text = ''; |
|---|
| 237 | if (UNIVERSAL::isa($obj, 'MT::Comment')) { |
|---|
| 238 | # Comment |
|---|
| 239 | $text = join "\n", |
|---|
| 240 | "name:". ($obj->author || ''), |
|---|
| 241 | "email:" . ($obj->email || ''), |
|---|
| 242 | "url:" . ($obj->url || ''), |
|---|
| 243 | "text:" . ($obj->text || ''); |
|---|
| 244 | } else { |
|---|
| 245 | # TrackBack ping |
|---|
| 246 | $text = join "\n", |
|---|
| 247 | "blog:". ($obj->blog_name || ''), |
|---|
| 248 | "title:" . ($obj->title || ''), |
|---|
| 249 | "url:" . ($obj->source_url || ''), |
|---|
| 250 | "text:" . ($obj->excerpt || ''); |
|---|
| 251 | } |
|---|
| 252 | |
|---|
| 253 | my $decodedtext = decode_entities($text); |
|---|
| 254 | $decodedtext = '' if $text eq $decodedtext; |
|---|
| 255 | |
|---|
| 256 | if ($config->{wordlist_junk}) { |
|---|
| 257 | my @matches = wordlist_match($text, $config->{wordlist_junk}); |
|---|
| 258 | if (@matches && $decodedtext) { |
|---|
| 259 | @matches = wordlist_match($decodedtext, $config->{wordlist_junk}); |
|---|
| 260 | } |
|---|
| 261 | if (@matches) { |
|---|
| 262 | my $total_score = 0; |
|---|
| 263 | my @log; |
|---|
| 264 | foreach (@matches) { |
|---|
| 265 | my ($patt, $match, $score) = @$_; |
|---|
| 266 | $total_score += $score; |
|---|
| 267 | push @log, MT->translate("Word Filter match on '[_1]': '[_2]'.", $patt, $match); |
|---|
| 268 | } |
|---|
| 269 | return (-1 * ($total_score || 1), \@log); |
|---|
| 270 | } |
|---|
| 271 | } |
|---|
| 272 | |
|---|
| 273 | if ($config->{wordlist_moderate}) { |
|---|
| 274 | my @matches = wordlist_match($text, $config->{wordlist_moderate}); |
|---|
| 275 | if (!@matches && $decodedtext) { |
|---|
| 276 | @matches = wordlist_match($decodedtext, $config->{wordlist_moderate}); |
|---|
| 277 | } |
|---|
| 278 | if (@matches) { |
|---|
| 279 | my @log; |
|---|
| 280 | foreach (@matches) { |
|---|
| 281 | my ($patt, $match, $score) = @$_; |
|---|
| 282 | push @log, MT->translate("Moderating for Word Filter match on '[_1]': '[_2]'.", $patt, $match); |
|---|
| 283 | } |
|---|
| 284 | $obj->moderate; |
|---|
| 285 | return (0, \@log); |
|---|
| 286 | } |
|---|
| 287 | } |
|---|
| 288 | |
|---|
| 289 | return (ABSTAIN); |
|---|
| 290 | } |
|---|
| 291 | |
|---|
| 292 | sub domainbl { |
|---|
| 293 | my $plugin = shift; |
|---|
| 294 | my ($obj) = @_; |
|---|
| 295 | |
|---|
| 296 | my $config = $plugin->get_config_hash('blog:' . $obj->blog_id); # config($plugin); |
|---|
| 297 | return (ABSTAIN) unless $config->{domainbl_mode}; |
|---|
| 298 | |
|---|
| 299 | my @domainbl_service = split /\s*,?\s+/, $config->{domainbl_service}; |
|---|
| 300 | return (ABSTAIN) unless @domainbl_service; |
|---|
| 301 | |
|---|
| 302 | my $text = $obj->all_text; |
|---|
| 303 | my @domains = extract_domains($text); |
|---|
| 304 | my $remote_ip = $obj->ip; |
|---|
| 305 | |
|---|
| 306 | if (domain_or_ip_in_whitelist(\@domains, $remote_ip, $config->{whitelist})) { |
|---|
| 307 | return (ABSTAIN); |
|---|
| 308 | } |
|---|
| 309 | |
|---|
| 310 | foreach my $domain (@domains) { |
|---|
| 311 | next if $domain !~ m/\./; # ignore domain if it is just a single word |
|---|
| 312 | if ($domain =~ m/^(\d+)\.(\d+)\.(\d+)\.(\d+)$/) { |
|---|
| 313 | $domain = "$4.$3.$2.$1"; |
|---|
| 314 | } |
|---|
| 315 | foreach my $service (@domainbl_service) { |
|---|
| 316 | $service =~ s/^\.//; |
|---|
| 317 | $service =~ s/^\s+|\s+$//gs; |
|---|
| 318 | if (checkdns("$domain.$service.")) { |
|---|
| 319 | my $log = MT->translate("domain '[_1]' found on service [_2]", $domain, $service); |
|---|
| 320 | if ($config->{domainbl_mode} == 2) { |
|---|
| 321 | $obj->moderate; |
|---|
| 322 | return (0, $log); |
|---|
| 323 | } else { |
|---|
| 324 | return (-1 * (int($config->{domainbl_weight}) || 1), $log); |
|---|
| 325 | } |
|---|
| 326 | } |
|---|
| 327 | } |
|---|
| 328 | } |
|---|
| 329 | return (ABSTAIN); |
|---|
| 330 | } |
|---|
| 331 | |
|---|
| 332 | sub ipbl { |
|---|
| 333 | my $plugin = shift; |
|---|
| 334 | my ($obj) = @_; |
|---|
| 335 | |
|---|
| 336 | return (ABSTAIN) unless $obj->ip; |
|---|
| 337 | |
|---|
| 338 | my $config = $plugin->get_config_hash('blog:'. $obj->blog_id); # config($plugin); |
|---|
| 339 | return (ABSTAIN) unless $config->{ipbl_mode}; |
|---|
| 340 | |
|---|
| 341 | my $remote_ip = $obj->ip; |
|---|
| 342 | |
|---|
| 343 | if (domain_or_ip_in_whitelist(undef, $remote_ip, $config->{whitelist})) { |
|---|
| 344 | return (ABSTAIN); |
|---|
| 345 | } |
|---|
| 346 | |
|---|
| 347 | my ($a, $b, $c, $d) = split /\./, $remote_ip; |
|---|
| 348 | return (ABSTAIN) unless $a && $b && $c &&$d; |
|---|
| 349 | |
|---|
| 350 | my @ipbl_service = split /\s*,?\s+/, $config->{ipbl_service}; |
|---|
| 351 | return (ABSTAIN) unless @ipbl_service; |
|---|
| 352 | |
|---|
| 353 | foreach my $service (@ipbl_service) { |
|---|
| 354 | $service =~ s/^\.//; |
|---|
| 355 | $service =~ s/^\s+|\s+$//gs; |
|---|
| 356 | if (checkdns("$d.$c.$b.$a.$service.")) { |
|---|
| 357 | my $log = MT->translate("[_1] found on service [_2]", $remote_ip, $service); |
|---|
| 358 | if ($config->{ipbl_mode} == 2) { |
|---|
| 359 | $obj->moderate; |
|---|
| 360 | return (0, $log); |
|---|
| 361 | } else { |
|---|
| 362 | return (-1 * (int($config->{ipbl_weight}) || 1), $log); |
|---|
| 363 | } |
|---|
| 364 | } |
|---|
| 365 | } |
|---|
| 366 | return (ABSTAIN); |
|---|
| 367 | } |
|---|
| 368 | |
|---|
| 369 | ## Utility functions... not methods |
|---|
| 370 | |
|---|
| 371 | sub checkdns { |
|---|
| 372 | my ($name) = @_; |
|---|
| 373 | if ($name =~ m/^\d+\.\d+\.\d+\.\d+$/) { |
|---|
| 374 | return $name; |
|---|
| 375 | } |
|---|
| 376 | require MT::Request; |
|---|
| 377 | my $cache = MT::Request->instance->cache('checkdns_cache') || {}; |
|---|
| 378 | return $cache->{$name} if exists $cache->{$name}; |
|---|
| 379 | my $iaddr = gethostbyname($name); |
|---|
| 380 | return 0 unless $iaddr; |
|---|
| 381 | require Socket; |
|---|
| 382 | my $ip = Socket::inet_ntoa($iaddr); |
|---|
| 383 | $cache->{$name} = $ip; |
|---|
| 384 | MT::Request->instance->cache('checkdns_cache', $cache); |
|---|
| 385 | return $ip ? $ip : undef; |
|---|
| 386 | } |
|---|
| 387 | |
|---|
| 388 | sub reversedns { |
|---|
| 389 | my ($ip) = @_; |
|---|
| 390 | require MT::Request; |
|---|
| 391 | my $cache = MT::Request->instance->cache('reversedns_cache') || {}; |
|---|
| 392 | return $cache->{$ip} if exists $cache->{$ip}; |
|---|
| 393 | require Socket; |
|---|
| 394 | my $iaddr = Socket::inet_aton($ip); |
|---|
| 395 | my $name = gethostbyaddr($iaddr, Socket::AF_INET()); |
|---|
| 396 | return undef unless $name; |
|---|
| 397 | $cache->{$ip} = $name; |
|---|
| 398 | MT::Request->instance->cache('reversedns_cache', $cache); |
|---|
| 399 | return $name; |
|---|
| 400 | } |
|---|
| 401 | |
|---|
| 402 | sub extract_domains { |
|---|
| 403 | my ($str, $mode, $total) = @_; |
|---|
| 404 | |
|---|
| 405 | $mode ||= 0; |
|---|
| 406 | # unmunge so we can see encoded urls as well |
|---|
| 407 | $str = lc decode_entities($str); |
|---|
| 408 | my @urls; |
|---|
| 409 | my %seen; |
|---|
| 410 | while ($str =~ m!(?:ht(?:tp)?s?:)?//(?:[a-z0-9\-\.\+:]+@)?([a-z0-9\.\-]+)!gi) { |
|---|
| 411 | my $domain = $1; |
|---|
| 412 | $domain =~ s/^\s+//s; |
|---|
| 413 | $domain =~ s/\s+$//s; |
|---|
| 414 | $domain =~ s/^www\.//s; |
|---|
| 415 | next unless $domain; |
|---|
| 416 | next unless $domain =~ m/\./; |
|---|
| 417 | my @parts = split /\./, $domain; |
|---|
| 418 | next unless @parts; |
|---|
| 419 | if (($domain =~ m/^\d+\.\d+\.\d+\.\d+$/) || ($domain =~ m/^\d+$/)) { |
|---|
| 420 | $$total++ if(defined($total)); |
|---|
| 421 | next if $seen{$domain}; |
|---|
| 422 | $seen{$domain} = 1; |
|---|
| 423 | push @urls, $domain; |
|---|
| 424 | next; |
|---|
| 425 | } |
|---|
| 426 | return $domain if $mode == 1; |
|---|
| 427 | $$total++ if(defined($total)); |
|---|
| 428 | next if $seen{$domain}; |
|---|
| 429 | if ($mode == 0) { # default mode, replicate for all subdomains |
|---|
| 430 | my $last = $#parts; |
|---|
| 431 | my $start = length($parts[$last]) < 3 ? 2 : 1; |
|---|
| 432 | if ($start > $last) { |
|---|
| 433 | $seen{$domain} = 1; |
|---|
| 434 | push @urls, $domain; |
|---|
| 435 | } |
|---|
| 436 | foreach (my $i = $start; $i <= $last; $i++) { |
|---|
| 437 | my $partial = join '.', @parts[$last - $i .. $last]; |
|---|
| 438 | next if $seen{$partial}; |
|---|
| 439 | $seen{$partial} = 1; |
|---|
| 440 | push @urls, $partial; |
|---|
| 441 | } |
|---|
| 442 | } else { |
|---|
| 443 | $seen{$domain} = 1; |
|---|
| 444 | push @urls, $domain; |
|---|
| 445 | } |
|---|
| 446 | } |
|---|
| 447 | |
|---|
| 448 | @urls; |
|---|
| 449 | } |
|---|
| 450 | |
|---|
| 451 | sub decode_entities { |
|---|
| 452 | my ($str) = @_; |
|---|
| 453 | if (eval { require HTML::Entities; 1 }) { |
|---|
| 454 | return HTML::Entities::decode($str); |
|---|
| 455 | } else { |
|---|
| 456 | # yanked from HTML::Entities, since some users don't have the module |
|---|
| 457 | my $c; |
|---|
| 458 | for ($str) { |
|---|
| 459 | s/(&\#(\d+);?)/$2 < 256 ? chr($2) : $1/eg; |
|---|
| 460 | s/(&\#[xX]([0-9a-fA-F]+);?)/$c = hex($2); $c < 256 ? chr($c) : $1/eg; |
|---|
| 461 | } |
|---|
| 462 | $str; |
|---|
| 463 | } |
|---|
| 464 | } |
|---|
| 465 | { |
|---|
| 466 | my $has_encode = eval { require Encode; 1; }; |
|---|
| 467 | |
|---|
| 468 | sub wordlist_match { |
|---|
| 469 | my ($text, $patterns) = @_; |
|---|
| 470 | |
|---|
| 471 | my $enc = MT::ConfigMgr->instance->PublishCharset; |
|---|
| 472 | if ($has_encode) { |
|---|
| 473 | $text = Encode::decode($enc, $text); |
|---|
| 474 | $patterns = Encode::decode($enc, $patterns); |
|---|
| 475 | } |
|---|
| 476 | |
|---|
| 477 | $text ||= ''; |
|---|
| 478 | my @patt = split /[\r\n]+/, $patterns; |
|---|
| 479 | my @matches; |
|---|
| 480 | foreach my $patt (@patt) { |
|---|
| 481 | next if $patt =~ m/^#/; |
|---|
| 482 | my $score = 1; |
|---|
| 483 | if ($patt =~ m/^(.*?) (\d+(?:\.\d+)?) *$/) { |
|---|
| 484 | $patt = $1; |
|---|
| 485 | $score = $2; |
|---|
| 486 | } |
|---|
| 487 | $patt =~ s/(^ +| +$)//g; |
|---|
| 488 | next if $patt eq ''; |
|---|
| 489 | |
|---|
| 490 | my $re_opt = MT::ConfigMgr->instance->DefaultLanguage eq 'ja' ? '' : '\b'; |
|---|
| 491 | if ($patt =~ m!^/!) { |
|---|
| 492 | my $re = $patt; |
|---|
| 493 | my ($opt) = $re =~ m!/([^/]*)$!; |
|---|
| 494 | $re =~ s!^/!!; |
|---|
| 495 | $re =~ s!/[^/]*$!!; |
|---|
| 496 | if ($opt) { |
|---|
| 497 | # increment any internal backreferences (\1), |
|---|
| 498 | # since we're wrapping the whole expression in |
|---|
| 499 | # a capturing group |
|---|
| 500 | $re =~ s/ \\(\d+) / '\\' . ($1 + 1) /gex; |
|---|
| 501 | |
|---|
| 502 | $re = '(?' . $opt . ':' . $re . ')'; |
|---|
| 503 | } |
|---|
| 504 | $re = eval { qr/($re)/ }; |
|---|
| 505 | $re = $re_opt . quotemeta($patt) . $re_opt if $@; |
|---|
| 506 | if ($has_encode) { |
|---|
| 507 | push @matches, [ Encode::encode($enc, $patt), |
|---|
| 508 | Encode::encode($enc, $1), int($score) ] if $text =~ m/($re)/; |
|---|
| 509 | } else { |
|---|
| 510 | push @matches, [ $patt, $1, int($score) ] if $text =~ m/($re)/; |
|---|
| 511 | } |
|---|
| 512 | } else { |
|---|
| 513 | my $re = $re_opt . quotemeta($patt) . $re_opt; |
|---|
| 514 | if ($has_encode) { |
|---|
| 515 | push @matches, [ Encode::encode($enc, $patt), |
|---|
| 516 | Encode::encode($enc, $1), int($score) ] if $text =~ m/($re)/i; |
|---|
| 517 | } else { |
|---|
| 518 | push @matches, [ $patt, $1, int($score) ] if $text =~ m/($re)/i; |
|---|
| 519 | } |
|---|
| 520 | } |
|---|
| 521 | } |
|---|
| 522 | @matches; |
|---|
| 523 | } |
|---|
| 524 | } |
|---|
| 525 | |
|---|
| 526 | sub domain_or_ip_in_whitelist { |
|---|
| 527 | my ($domain, $ip, $whitelist) = @_; |
|---|
| 528 | |
|---|
| 529 | if (ref $domain eq 'ARRAY') { |
|---|
| 530 | my %domains; |
|---|
| 531 | foreach my $domain (@$domain) { |
|---|
| 532 | my @whitelist = split /\r?\n/, $whitelist; |
|---|
| 533 | foreach my $whiteitem (@whitelist) { |
|---|
| 534 | next if $whiteitem =~ m/^#/; |
|---|
| 535 | if ($whiteitem =~ m/^\d{1,3}\.(?:\d{1,3}\.(?:\d{1,3}\.(?:\d{1,3})?)?)?$/) { |
|---|
| 536 | return 1 if defined $ip && ($ip =~ m/^\Q$whiteitem\E/); |
|---|
| 537 | } elsif ($whiteitem =~ m/\w/) { |
|---|
| 538 | next if defined $domain && ($domain =~ m/\Q$whiteitem\E$/i); |
|---|
| 539 | $domains{$domain} = 1; |
|---|
| 540 | } |
|---|
| 541 | } |
|---|
| 542 | } |
|---|
| 543 | @$domain = keys %domains; |
|---|
| 544 | return 0; |
|---|
| 545 | } |
|---|
| 546 | |
|---|
| 547 | $whitelist ||= ''; |
|---|
| 548 | my @whitelist = split /\r?\n/, $whitelist; |
|---|
| 549 | foreach my $whiteitem (@whitelist) { |
|---|
| 550 | next if $whiteitem =~ m/^#/; |
|---|
| 551 | if ($whiteitem =~ m/^\d{1,3}\.(?:\d{1,3}\.(?:\d{1,3}\.(?:\d{1,3})?)?)?$/) { |
|---|
| 552 | return 1 if defined $ip && ($ip =~ m/^\Q$whiteitem\E/); |
|---|
| 553 | } elsif ($whiteitem =~ m/\w/) { |
|---|
| 554 | return 1 if defined $domain && ($domain =~ m/\Q$whiteitem\E$/i); |
|---|
| 555 | } |
|---|
| 556 | } |
|---|
| 557 | |
|---|
| 558 | return 0; |
|---|
| 559 | } |
|---|
| 560 | |
|---|
| 561 | 1; |
|---|