#
# Grabber for german-bash.org
-package GermanBashGrabber;
+package quotesite::GermanBashGrabber;
-use GrabberBase;
-@ISA = qw(GrabberBase);
+use quotesite::GrabberBase;
+@ISA = qw(quotesite::GrabberBase);
-use LWP::Simple qw(!get);
+use LWP::UserAgent;
use HTML::TokeParser;
use Data::Dumper;
my $self = $class->SUPER::new();
$self->{'NAME'} = 'germanbash';
- $self->{'PATTERNS'} = ['(http://(?:[-a-zA-Z0-9_.]+\.)*german-bash\.org/(\d+))'];
+ $self->{'PATTERNS'} = ['(http://(?:[-a-zA-Z0-9_.]+\.)*german-bash\.org/(\d+))',
+ '(http://(?:[-a-zA-Z0-9_.]+\.)*german-bash\.org/action/show/id/(\d+))'];
bless($self, $class);
$self->_prepare_parameters();
my $metadata = {};
my $p;
my $t;
+ my $ua = LWP::UserAgent->new('agent' => 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0)');
$url =~ m|$pattern|;
$url = $1;
$metadata->{'CONTENT'} = undef;
# Get the HTML file containing the quote
- unless(defined($content = LWP::Simple::get(sprintf('http://german-bash.org/%s', $2)))) {
+ $content = $ua->get(sprintf('http://german-bash.org/%s', $2));
+ unless($content->is_success) {
$self->error('Could not download quote');
return undef;
}
+ $content = $content->decoded_content();
$p = HTML::TokeParser->new(\$content);
if (exists($t->[1]->{'class'}) && ($t->[1]->{'class'} eq 'zitat')) {
$metadata->{'CONTENT'} = $p->get_text('/div');
$metadata->{'CONTENT'} =~ s/^\s*//mg;
+ last;
}
}