root/branches/boomer/t/44-spider.t @ 1098

Revision 1098, 4.2 kB (checked in by hachi, 2 years ago)

Branching for boomer from release-19, rev 62318

  • Property svn:mime-type set to text/plain
  • Property svn:keywords set to Author Date Id Revision
Line 
1#!/usr/bin/perl -w
2
3use strict;
4
5use lib 't/lib', 'extlib', 'lib', '../lib', '../extlib';
6use Test::More tests => 4;
7use LWP::UserAgent::Local;
8use URI;
9use MT::Test qw(:db :data);
10
11my $username = "Chuck D"; # Melody
12my $password = "bass"; # Nelson
13
14unlink "t/cookie.jar";
15use HTTP::Cookies;
16my $cgipath = MT->config->CGIPath;
17$cgipath =~ s/\/*$//;
18my $cookie_jar = HTTP::Cookies->new(file => "t/cookie.jar");
19my $ua = new LWP::UserAgent::Local({ ScriptAlias => "$cgipath/",
20                                     AddHandler => 'cgi-script .cgi',
21                                     cookie_jar => $cookie_jar,
22                                 });
23my $start_link = "http://localhost" . $cgipath . "/mt.cgi?username=$username&password=$password";
24my $start_url = new URI($start_link);
25
26my %link_queue;
27my %links_checked;
28my $count = 0;
29my $link_count = 0;
30
31my @failures;
32my @notgoods;
33my @notgood_pages;
34my @fetched;
35my @warnings;
36my %modes_seen;
37
38my $skip_pattern = qr{logout|export|magic_token};
39my $must_match = qr{(/cgi-bin/|^\?).*mt\.cgi};
40my $warning_pattern = qr{Uninitialized};
41my $good_pattern = qr{Copyright .* 2001-\d+ Six Apart\. All Rights Reserved\.};
42my $bad_pattern = qr{<input\s+type="submit"\s+value="Log In" />|time\s+to\s+upgrade!}i;
43
44my $verbose = 0;
45my $debug = 1;
46my $test_mode = 1;
47
48$link_queue{$start_link} = $start_link;
49while (keys %link_queue && $count < 500) {
50    my ($curr_link, $its_parent) = %link_queue;
51    $link_count++;
52    delete $link_queue{$curr_link};
53
54    next unless $curr_link =~ m/$must_match/;
55    next if $skip_pattern && $curr_link =~ m/$skip_pattern/;
56
57    $curr_link = URI->new_abs($curr_link, $its_parent);
58    next if $curr_link->scheme ne 'http' && $curr_link->scheme ne 'https';
59    next if $curr_link->host ne $start_url->host();
60
61    unless ($links_checked{$curr_link}) {
62        print "REQUESTING $curr_link\n" if $verbose;
63        my $req = new HTTP::Request(GET => $curr_link)
64                          or die "a thousand deaths";
65        my $resp = $ua->request($req) or next;
66
67        #print STDERR "Response: [" . $resp->content() . "]\n" if $verbose;
68        use Data::Dumper;
69        print STDERR $resp->content() unless $resp->content() =~ m/$good_pattern/;
70
71        push @failures, $curr_link unless $resp->is_success;
72        my ($mode) = ($curr_link =~ m/__mode=([^&]*)/);
73        if ($mode) {
74            if (exists $modes_seen{$mode}) {
75                $modes_seen{$mode}++;
76            } else {
77                $modes_seen{$mode} = 1;
78            }
79        }
80        $count++;
81        my $content = $resp->content();
82        push @notgoods, $curr_link unless $content =~ m/$good_pattern/;
83        push @notgoods, $curr_link if $content =~ m/$bad_pattern/;
84        push @warnings, $curr_link if $content =~ m/$warning_pattern/;
85        push @fetched, $curr_link;
86        my @form_actions = $content =~ m|<form[^>]* action="([^"]*)">|gi; #"
87        my @links = $content =~ m|<[^>]*href="([^"]*)">|gi;   #"
88        @links = grep {$_ =~ /\S/} @links;
89        @links = map { s/\&amp\;/&/g; $_ } @links;
90        @links = map { s/\&offset=\d+//; $_ } @links;
91        @links = map { URI->new_abs($_, $curr_link) } @links;
92        $link_queue{$_} = $curr_link foreach (@links);
93        $links_checked{$curr_link}++;
94#         print join "\n", (keys %link_queue);
95#         print "\n";
96    }
97}
98
99# There should be at least a handful of pages!
100ok($count > 50);
101
102print "\nCrawled $count pages (saw $link_count links). ",
103    "\n", scalar @failures, " pages failed to load.\n";
104print "$_\n" foreach @failures;
105
106if ($verbose) {
107#     print "modes: ", join ", ", (keys %modes_seen), "\n";
108#     print "Fetched:\n";
109#     print "$_\n" foreach @fetched;
110    if ($debug) {
111        print "Faulty pages:";
112        print "$_\n" foreach @notgood_pages;
113    }
114    print "\n", scalar @notgoods, " pages appeared faulty:\n";
115    print "$_\n" foreach @notgoods;
116    print "\n", scalar @warnings, " pages produced warnings:\n";
117    print "$_\n" foreach @warnings;
118}
119
120if ($test_mode) {
121    print "# Checking that there were no failures.\n";
122    ok(!@failures);
123    print "# Checking that there were no warnings.\n";
124    ok(!@warnings);
125    print "# Checking that all pages were good.\n";
126    ok(!@notgoods);
127}
Note: See TracBrowser for help on using the browser.