From: Ralf Ertzinger <ralf@skytale.net>
Date: Mon, 19 Aug 2013 21:41:03 +0000 (+0200)
Subject: Merge branch 'master' of ssh://git.camperquake.de:22003/quotesite
X-Git-Url: https://git.camperquake.de/gitweb.cgi?p=quotesite.git;a=commitdiff_plain;h=87482e72603201424440b68d760b59e573b8eafd;hp=18eb17221c111a8442538c54d2527e8c06b25803

Merge branch 'master' of ssh://git.camperquake.de:22003/quotesite
---

diff --git a/quotesite.pl b/quotesite.pl
index 6e4728b..47fb872 100644
--- a/quotesite.pl
+++ b/quotesite.pl
@@ -74,7 +74,7 @@ my $quotesite_commands = {
 
     'help' => sub {
         cmd_help(@_);
-    }.
+    },
 
     'enable' => sub {
         cmd_enable(@_);
@@ -362,7 +362,7 @@ sub init_quotesite {
 
     _load_modules($plugindir);
 
-    unless (defined(@grabbers)) {
+    unless (@grabbers) {
         write_irssi('No grabbers found, can not proceed.');
         return;
     }
diff --git a/quotesite/AmazonGrabber.pm b/quotesite/AmazonGrabber.pm
new file mode 100644
index 0000000..bc91cb5
--- /dev/null
+++ b/quotesite/AmazonGrabber.pm
@@ -0,0 +1,77 @@
+# (c) 2007 by Ralf Ertzinger <ralf@camperquake.de>
+# licensed under GNU GPL v2
+#
+# Grabber for Amazon
+
+package quotesite::AmazonGrabber;
+
+use quotesite::GrabberBase;
+@ISA = qw(quotesite::GrabberBase);
+
+use LWP::Simple qw(!get);
+use HTML::TokeParser;
+use Data::Dumper;
+use Encode;
+
+use strict;
+
+sub new {
+    my $class = shift;
+    my $self = $class->SUPER::new();
+
+    $self->{'NAME'} = 'amazon';
+    $self->{'PATTERNS'} = ['(https?://(?:[-a-zA-Z0-9_.]+\.)*amazon\.(?:com|de|co\.uk|fr)/.*[dg]p(?:/product)?/([[:alnum:]]{10}))'];
+
+    bless($self, $class);
+    $self->_prepare_parameters();
+
+    return $self;
+}
+
+sub _parse {
+    my $self = shift;
+    my $url = shift;
+    my $pattern = shift;
+    my $content;
+    my $metadata = {};
+    my $p;
+    my $t;
+    my $t2;
+
+    $url =~ m|$pattern|;
+    $url = $1;
+
+    $metadata->{'URL'} = $url;
+    $metadata->{'ID'} = $2;
+    $metadata->{'TYPE'} = 'quote';
+    $metadata->{'SOURCE'} = $self->{'NAME'};
+    $metadata->{'CONTENT'} = undef;
+
+    # Get the HTML file containing the quote
+    unless(defined($content = LWP::Simple::get($url))) {
+        $self->error('Could not download quote');
+        return undef;
+    }
+
+    $self->debug($content);
+
+    $p = HTML::TokeParser->new(\$content);
+
+    OUTER: while ($t = $p->get_tag('h1')) {
+        if (exists($t->[1]->{'class'}) && ($t->[1]->{'class'} eq 'parseasinTitle')) {
+            $metadata->{'CONTENT'} = encode('utf8', decode('iso8859-1', $p->get_text('/h1')));
+            $metadata->{'CONTENT'} =~ s/^\s*//;
+            $metadata->{'CONTENT'} =~ s/\s*$//;
+            last OUTER;
+        }
+    }
+
+    unless(defined($metadata->{'CONTENT'})) {
+        $self->error('Could not extract quote content');
+        return undef;
+    }
+
+    return $metadata;
+}
+
+1;
diff --git a/quotesite/AppNetGrabber.pm b/quotesite/AppNetGrabber.pm
new file mode 100644
index 0000000..d92edec
--- /dev/null
+++ b/quotesite/AppNetGrabber.pm
@@ -0,0 +1,72 @@
+# (c) 2010 by Ralf Ertzinger <ralf@camperquake.de>
+# licensed under GNU GPL v2
+#
+# Grabber for app.net
+
+package quotesite::AppNetGrabber;
+
+use quotesite::GrabberBase;
+@ISA = qw(quotesite::GrabberBase);
+
+use Data::Dumper;
+use JSON;
+use Encode;
+
+use strict;
+
+sub new {
+    my $class = shift;
+    my $self = $class->SUPER::new();
+
+    $self->{'NAME'} = 'app.net';
+    $self->{'PATTERNS'} = ['(https?://alpha\.app\.net/[^/]+/post/(\d+))'];
+
+    bless($self, $class);
+    $self->_prepare_parameters();
+
+    return $self;
+}
+
+sub _parse {
+    my $self = shift;
+    my $url = shift;
+    my $pattern = shift;
+    my $content;
+    my $metadata = {};
+    my $p = XML::Simple->new();
+    my $t;
+
+    $url =~ m|$pattern|;
+    $url = $1;
+
+    $metadata->{'URL'} = $url;
+    $metadata->{'ID'} = $2;
+    $metadata->{'TYPE'} = 'quote';
+    $metadata->{'SOURCE'} = $self->{'NAME'};
+    $metadata->{'CONTENT'} = undef;
+
+    # Get the JSON file containing the quote
+    unless(defined($content = $self->simple_get(sprintf('https://alpha-api.app.net/stream/0/posts/%s', $2)))) {
+        $self->error('Could not download quote');
+        return undef;
+    }
+
+    unless(defined($t = JSON->new->utf8->decode($content))) {
+        $self->error('Could not parse JSON metadata');
+        return undef;
+    }
+
+    $self->debug("JSON content: %s", Dumper($t));
+
+    $metadata->{'CONTENT'} = $t->{'data'}->{'text'};
+    $metadata->{'ID'} = $t->{'data'}->{'user'}->{'username'};
+
+    unless(defined($metadata->{'CONTENT'})) {
+        $self->error('Could not extract quote content');
+        return undef;
+    }
+
+    return $metadata;
+}
+
+1;
diff --git a/quotesite/Base.pm b/quotesite/Base.pm
index ccbb657..ebd65c3 100644
--- a/quotesite/Base.pm
+++ b/quotesite/Base.pm
@@ -4,6 +4,8 @@
 package quotesite::Base;
 
 use strict;
+use LWP::UserAgent;
+use HTTP::Cookies;
 use Data::Dumper;
 
 sub new {
@@ -170,4 +172,30 @@ sub setdebug {
     $self->{'_DEBUG'} = shift;
 }
 
+sub ua {
+    my $self = shift;
+    my $ua;
+
+    $ua = LWP::UserAgent->new(
+            'agent' => 'Mozilla/5.0',
+            'cookie_jar' => HTTP::Cookies->new,
+            'timeout' => 15,
+            );
+
+    $self->{_CACHED_UA} = $ua;
+
+    return $ua;
+}
+
+sub simple_get {
+    my $self = shift;
+    my $url = shift;
+    my $ua = shift || $self->ua();
+    my $r;
+
+    $r = $ua->get($url);
+    return $r->decoded_content() if $r->is_success();
+    return undef;
+}
+
 1;
diff --git a/quotesite/GermanBashGrabber.pm b/quotesite/GermanBashGrabber.pm
index 747968c..361c43d 100644
--- a/quotesite/GermanBashGrabber.pm
+++ b/quotesite/GermanBashGrabber.pm
@@ -19,8 +19,8 @@ sub new {
     my $self = $class->SUPER::new();
 
     $self->{'NAME'} = 'germanbash';
-    $self->{'PATTERNS'} = ['(http://(?:[-a-zA-Z0-9_.]+\.)*german-bash\.org/(\d+))',
-                           '(http://(?:[-a-zA-Z0-9_.]+\.)*german-bash\.org/action/show/id/(\d+))'];
+    $self->{'PATTERNS'} = ['(http://(?:[-a-zA-Z0-9_.]+\.)*german-bash\.(?:org|de)/(\d+))',
+                           '(http://(?:[-a-zA-Z0-9_.]+\.)*german-bash\.(?:org|de)/action/show/id/(\d+))'];
 
     bless($self, $class);
     $self->_prepare_parameters();
diff --git a/quotesite/QdbGrabber.pm b/quotesite/QdbGrabber.pm
index f9c7387..411be01 100644
--- a/quotesite/QdbGrabber.pm
+++ b/quotesite/QdbGrabber.pm
@@ -11,6 +11,7 @@ use quotesite::GrabberBase;
 use LWP::Simple qw(!get);
 use HTML::TokeParser;
 use Data::Dumper;
+use Encode;
 
 use strict;
 
@@ -56,7 +57,7 @@ sub _parse {
 
     OUTER: while ($t = $p->get_tag('span')) {
         if (exists($t->[1]->{'class'}) && ($t->[1]->{'class'} eq 'qt')) {
-            $metadata->{'CONTENT'} = $p->get_text('/span');
+            $metadata->{'CONTENT'} = encode('utf8', decode('iso8859-1', $p->get_text('/span')));
             last OUTER;
         }
     }