X-Git-Url: https://git.camperquake.de/gitweb.cgi?p=quotesite.git;a=blobdiff_plain;f=quotesite%2FGermanBashGrabber.pm;h=361c43d15ef1227b7d276aef06e36a997ff73ee4;hp=3fb4e632f9bf88ca0eba392926cf62e70de5251c;hb=439c6b89ee6108272934b7266998d0bc4caebc68;hpb=7a2a97ead43f07fbeb25bca43f40c7f994cc77ab diff --git a/quotesite/GermanBashGrabber.pm b/quotesite/GermanBashGrabber.pm index 3fb4e63..361c43d 100644 --- a/quotesite/GermanBashGrabber.pm +++ b/quotesite/GermanBashGrabber.pm @@ -3,12 +3,12 @@ # # Grabber for german-bash.org -package GermanBashGrabber; +package quotesite::GermanBashGrabber; -use GrabberBase; -@ISA = qw(GrabberBase); +use quotesite::GrabberBase; +@ISA = qw(quotesite::GrabberBase); -use LWP::Simple qw(!get); +use LWP::UserAgent; use HTML::TokeParser; use Data::Dumper; @@ -19,7 +19,8 @@ sub new { my $self = $class->SUPER::new(); $self->{'NAME'} = 'germanbash'; - $self->{'PATTERNS'} = ['(http://(?:[-a-zA-Z0-9_.]+\.)*german-bash\.org/(\d+))']; + $self->{'PATTERNS'} = ['(http://(?:[-a-zA-Z0-9_.]+\.)*german-bash\.(?:org|de)/(\d+))', + '(http://(?:[-a-zA-Z0-9_.]+\.)*german-bash\.(?:org|de)/action/show/id/(\d+))']; bless($self, $class); $self->_prepare_parameters(); @@ -35,6 +36,7 @@ sub _parse { my $metadata = {}; my $p; my $t; + my $ua = LWP::UserAgent->new('agent' => 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0)'); $url =~ m|$pattern|; $url = $1; @@ -46,10 +48,12 @@ sub _parse { $metadata->{'CONTENT'} = undef; # Get the HTML file containing the quote - unless(defined($content = LWP::Simple::get(sprintf('http://german-bash.org/%s', $2)))) { + $content = $ua->get(sprintf('http://german-bash.org/%s', $2)); + unless($content->is_success) { $self->error('Could not download quote'); return undef; } + $content = $content->decoded_content(); $p = HTML::TokeParser->new(\$content); @@ -57,6 +61,7 @@ sub _parse { if (exists($t->[1]->{'class'}) && ($t->[1]->{'class'} eq 'zitat')) { $metadata->{'CONTENT'} = $p->get_text('/div'); $metadata->{'CONTENT'} =~ s/^\s*//mg; + last; } }