X-Git-Url: https://git.camperquake.de/gitweb.cgi?p=quotesite.git;a=blobdiff_plain;f=quotesite%2FGermanBashGrabber.pm;h=361c43d15ef1227b7d276aef06e36a997ff73ee4;hp=919f015b7f145fceba906cee016c3f1de1d9962e;hb=439c6b89ee6108272934b7266998d0bc4caebc68;hpb=134915427cbf80a82868b173766a9c0da84b0fba diff --git a/quotesite/GermanBashGrabber.pm b/quotesite/GermanBashGrabber.pm index 919f015..361c43d 100644 --- a/quotesite/GermanBashGrabber.pm +++ b/quotesite/GermanBashGrabber.pm @@ -3,12 +3,12 @@ # # Grabber for german-bash.org -package GermanBashGrabber; +package quotesite::GermanBashGrabber; -use GrabberBase; -@ISA = qw(GrabberBase); +use quotesite::GrabberBase; +@ISA = qw(quotesite::GrabberBase); -use LWP::Simple qw(!get); +use LWP::UserAgent; use HTML::TokeParser; use Data::Dumper; @@ -19,8 +19,8 @@ sub new { my $self = $class->SUPER::new(); $self->{'NAME'} = 'germanbash'; - $self->{'PATTERNS'} = ['(http://(?:[-a-zA-Z0-9_.]+\.)*german-bash\.org/(\d+))', - '(http://(?:[-a-zA-Z0-9_.]+\.)*german-bash\.org/action/show/id/(\d+))']; + $self->{'PATTERNS'} = ['(http://(?:[-a-zA-Z0-9_.]+\.)*german-bash\.(?:org|de)/(\d+))', + '(http://(?:[-a-zA-Z0-9_.]+\.)*german-bash\.(?:org|de)/action/show/id/(\d+))']; bless($self, $class); $self->_prepare_parameters(); @@ -36,6 +36,7 @@ sub _parse { my $metadata = {}; my $p; my $t; + my $ua = LWP::UserAgent->new('agent' => 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0)'); $url =~ m|$pattern|; $url = $1; @@ -47,10 +48,12 @@ sub _parse { $metadata->{'CONTENT'} = undef; # Get the HTML file containing the quote - unless(defined($content = LWP::Simple::get(sprintf('http://german-bash.org/%s', $2)))) { + $content = $ua->get(sprintf('http://german-bash.org/%s', $2)); + unless($content->is_success) { $self->error('Could not download quote'); return undef; } + $content = $content->decoded_content(); $p = HTML::TokeParser->new(\$content);