GermanBash: add german-bash.de as domain
[quotesite.git] / quotesite / GermanBashGrabber.pm
index 3fb4e63..361c43d 100644 (file)
@@ -3,12 +3,12 @@
 #
 # Grabber for german-bash.org
 
-package GermanBashGrabber;
+package quotesite::GermanBashGrabber;
 
-use GrabberBase;
-@ISA = qw(GrabberBase);
+use quotesite::GrabberBase;
+@ISA = qw(quotesite::GrabberBase);
 
-use LWP::Simple qw(!get);
+use LWP::UserAgent;
 use HTML::TokeParser;
 use Data::Dumper;
 
@@ -19,7 +19,8 @@ sub new {
     my $self = $class->SUPER::new();
 
     $self->{'NAME'} = 'germanbash';
-    $self->{'PATTERNS'} = ['(http://(?:[-a-zA-Z0-9_.]+\.)*german-bash\.org/(\d+))'];
+    $self->{'PATTERNS'} = ['(http://(?:[-a-zA-Z0-9_.]+\.)*german-bash\.(?:org|de)/(\d+))',
+                           '(http://(?:[-a-zA-Z0-9_.]+\.)*german-bash\.(?:org|de)/action/show/id/(\d+))'];
 
     bless($self, $class);
     $self->_prepare_parameters();
@@ -35,6 +36,7 @@ sub _parse {
     my $metadata = {};
     my $p;
     my $t;
+    my $ua = LWP::UserAgent->new('agent' => 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0)');
 
     $url =~ m|$pattern|;
     $url = $1;
@@ -46,10 +48,12 @@ sub _parse {
     $metadata->{'CONTENT'} = undef;
 
     # Get the HTML file containing the quote
-    unless(defined($content = LWP::Simple::get(sprintf('http://german-bash.org/%s', $2)))) {
+    $content = $ua->get(sprintf('http://german-bash.org/%s', $2));
+    unless($content->is_success) {
         $self->error('Could not download quote');
         return undef;
     }
+    $content = $content->decoded_content();
 
     $p = HTML::TokeParser->new(\$content);
 
@@ -57,6 +61,7 @@ sub _parse {
         if (exists($t->[1]->{'class'}) && ($t->[1]->{'class'} eq 'zitat')) {
             $metadata->{'CONTENT'} = $p->get_text('/div');
             $metadata->{'CONTENT'} =~ s/^\s*//mg;
+            last;
         }
     }