X-Git-Url: https://git.camperquake.de/gitweb.cgi?p=quotesite.git;a=blobdiff_plain;f=quotesite%2FGermanBashGrabber.pm;h=e65b66015b689afb25318c21ce566ce7530366e7;hp=8b52db905574ae769151e7afd85bc6a5e25cbb52;hb=3f99f34c478f21862b13f5799a629a6e96c1d69f;hpb=9fe1f066ac13ad6131acbcbcc252c036e3fa07f3 diff --git a/quotesite/GermanBashGrabber.pm b/quotesite/GermanBashGrabber.pm index 8b52db9..e65b660 100644 --- a/quotesite/GermanBashGrabber.pm +++ b/quotesite/GermanBashGrabber.pm @@ -8,7 +8,7 @@ package GermanBashGrabber; use GrabberBase; @ISA = qw(GrabberBase); -use LWP::Simple qw(!get); +use LWP::UserAgent; use HTML::TokeParser; use Data::Dumper; @@ -36,6 +36,7 @@ sub _parse { my $metadata = {}; my $p; my $t; + my $ua = LWP::UserAgent->new('agent' => 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0)'); $url =~ m|$pattern|; $url = $1; @@ -47,10 +48,12 @@ sub _parse { $metadata->{'CONTENT'} = undef; # Get the HTML file containing the quote - unless(defined($content = LWP::Simple::get(sprintf('http://german-bash.org/%s', $2)))) { + $content = $ua->get(sprintf('http://german-bash.org/%s', $2)); + unless($content->is_success) { $self->error('Could not download quote'); return undef; } + $content = $content->decoded_content(); $p = HTML::TokeParser->new(\$content); @@ -58,6 +61,7 @@ sub _parse { if (exists($t->[1]->{'class'}) && ($t->[1]->{'class'} eq 'zitat')) { $metadata->{'CONTENT'} = $p->get_text('/div'); $metadata->{'CONTENT'} =~ s/^\s*//mg; + last; } }