Merge branch 'master' of ssh://git.camperquake.de:22003/quotesite

author Ralf Ertzinger <ralf@skytale.net>

Mon, 19 Aug 2013 21:41:03 +0000 (23:41 +0200)

committer Ralf Ertzinger <ralf@skytale.net>

Mon, 19 Aug 2013 21:41:03 +0000 (23:41 +0200)
author Ralf Ertzinger <ralf@skytale.net>
Mon, 19 Aug 2013 21:41:03 +0000 (23:41 +0200)
committer Ralf Ertzinger <ralf@skytale.net>
Mon, 19 Aug 2013 21:41:03 +0000 (23:41 +0200)
diff --git a/quotesite.pl b/quotesite.pl

index 6e4728b..47fb872 100644 (file)
--- a/quotesite.pl
+++ b/quotesite.pl
@@ -74,7 +74,7 @@ my $quotesite_commands = {
  
      'help' => sub {
          cmd_help(@_);
-    }.
+    },
  
      'enable' => sub {
          cmd_enable(@_);
@@ -362,7 +362,7 @@ sub init_quotesite {
  
      _load_modules($plugindir);
  
-    unless (defined(@grabbers)) {
+    unless (@grabbers) {
          write_irssi('No grabbers found, can not proceed.');
          return;
      }
diff --git a/quotesite/AmazonGrabber.pm b/quotesite/AmazonGrabber.pm

new file mode 100644 (file)

index 0000000..bc91cb5
--- /dev/null
+++ b/quotesite/AmazonGrabber.pm
@@ -0,0 +1,77 @@
+# (c) 2007 by Ralf Ertzinger <ralf@camperquake.de>
+# licensed under GNU GPL v2
+#
+# Grabber for Amazon
+
+package quotesite::AmazonGrabber;
+
+use quotesite::GrabberBase;
+@ISA = qw(quotesite::GrabberBase);
+
+use LWP::Simple qw(!get);
+use HTML::TokeParser;
+use Data::Dumper;
+use Encode;
+
+use strict;
+
+sub new {
+    my $class = shift;
+    my $self = $class->SUPER::new();
+
+    $self->{'NAME'} = 'amazon';
+    $self->{'PATTERNS'} = ['(https?://(?:[-a-zA-Z0-9_.]+\.)*amazon\.(?:com|de|co\.uk|fr)/.*[dg]p(?:/product)?/([[:alnum:]]{10}))'];
+
+    bless($self, $class);
+    $self->_prepare_parameters();
+
+    return $self;
+}
+
+sub _parse {
+    my $self = shift;
+    my $url = shift;
+    my $pattern = shift;
+    my $content;
+    my $metadata = {};
+    my $p;
+    my $t;
+    my $t2;
+
+    $url =~ m|$pattern|;
+    $url = $1;
+
+    $metadata->{'URL'} = $url;
+    $metadata->{'ID'} = $2;
+    $metadata->{'TYPE'} = 'quote';
+    $metadata->{'SOURCE'} = $self->{'NAME'};
+    $metadata->{'CONTENT'} = undef;
+
+    # Get the HTML file containing the quote
+    unless(defined($content = LWP::Simple::get($url))) {
+        $self->error('Could not download quote');
+        return undef;
+    }
+
+    $self->debug($content);
+
+    $p = HTML::TokeParser->new(\$content);
+
+    OUTER: while ($t = $p->get_tag('h1')) {
+        if (exists($t->[1]->{'class'}) && ($t->[1]->{'class'} eq 'parseasinTitle')) {
+            $metadata->{'CONTENT'} = encode('utf8', decode('iso8859-1', $p->get_text('/h1')));
+            $metadata->{'CONTENT'} =~ s/^\s*//;
+            $metadata->{'CONTENT'} =~ s/\s*$//;
+            last OUTER;
+        }
+    }
+
+    unless(defined($metadata->{'CONTENT'})) {
+        $self->error('Could not extract quote content');
+        return undef;
+    }
+
+    return $metadata;
+}
+
+1;
diff --git a/quotesite/AppNetGrabber.pm b/quotesite/AppNetGrabber.pm

new file mode 100644 (file)

index 0000000..d92edec
--- /dev/null
+++ b/quotesite/AppNetGrabber.pm
@@ -0,0 +1,72 @@
+# (c) 2010 by Ralf Ertzinger <ralf@camperquake.de>
+# licensed under GNU GPL v2
+#
+# Grabber for app.net
+
+package quotesite::AppNetGrabber;
+
+use quotesite::GrabberBase;
+@ISA = qw(quotesite::GrabberBase);
+
+use Data::Dumper;
+use JSON;
+use Encode;
+
+use strict;
+
+sub new {
+    my $class = shift;
+    my $self = $class->SUPER::new();
+
+    $self->{'NAME'} = 'app.net';
+    $self->{'PATTERNS'} = ['(https?://alpha\.app\.net/[^/]+/post/(\d+))'];
+
+    bless($self, $class);
+    $self->_prepare_parameters();
+
+    return $self;
+}
+
+sub _parse {
+    my $self = shift;
+    my $url = shift;
+    my $pattern = shift;
+    my $content;
+    my $metadata = {};
+    my $p = XML::Simple->new();
+    my $t;
+
+    $url =~ m|$pattern|;
+    $url = $1;
+
+    $metadata->{'URL'} = $url;
+    $metadata->{'ID'} = $2;
+    $metadata->{'TYPE'} = 'quote';
+    $metadata->{'SOURCE'} = $self->{'NAME'};
+    $metadata->{'CONTENT'} = undef;
+
+    # Get the JSON file containing the quote
+    unless(defined($content = $self->simple_get(sprintf('https://alpha-api.app.net/stream/0/posts/%s', $2)))) {
+        $self->error('Could not download quote');
+        return undef;
+    }
+
+    unless(defined($t = JSON->new->utf8->decode($content))) {
+        $self->error('Could not parse JSON metadata');
+        return undef;
+    }
+
+    $self->debug("JSON content: %s", Dumper($t));
+
+    $metadata->{'CONTENT'} = $t->{'data'}->{'text'};
+    $metadata->{'ID'} = $t->{'data'}->{'user'}->{'username'};
+
+    unless(defined($metadata->{'CONTENT'})) {
+        $self->error('Could not extract quote content');
+        return undef;
+    }
+
+    return $metadata;
+}
+
+1;
diff --git a/quotesite/Base.pm b/quotesite/Base.pm

index ccbb657..ebd65c3 100644 (file)
--- a/quotesite/Base.pm
+++ b/quotesite/Base.pm
@@ -4,6 +4,8 @@
  package quotesite::Base;
  
  use strict;
+use LWP::UserAgent;
+use HTTP::Cookies;
  use Data::Dumper;
  
  sub new {
@@ -170,4 +172,30 @@ sub setdebug {
      $self->{'_DEBUG'} = shift;
  }
  
+sub ua {
+    my $self = shift;
+    my $ua;
+
+    $ua = LWP::UserAgent->new(
+            'agent' => 'Mozilla/5.0',
+            'cookie_jar' => HTTP::Cookies->new,
+            'timeout' => 15,
+            );
+
+    $self->{_CACHED_UA} = $ua;
+
+    return $ua;
+}
+
+sub simple_get {
+    my $self = shift;
+    my $url = shift;
+    my $ua = shift || $self->ua();
+    my $r;
+
+    $r = $ua->get($url);
+    return $r->decoded_content() if $r->is_success();
+    return undef;
+}
+
  1;
diff --git a/quotesite/GermanBashGrabber.pm b/quotesite/GermanBashGrabber.pm

index 747968c..361c43d 100644 (file)
--- a/quotesite/GermanBashGrabber.pm
+++ b/quotesite/GermanBashGrabber.pm
@@ -19,8 +19,8 @@ sub new {
      my $self = $class->SUPER::new();
  
      $self->{'NAME'} = 'germanbash';
-    $self->{'PATTERNS'} = ['(http://(?:[-a-zA-Z0-9_.]+\.)*german-bash\.org/(\d+))',
-                           '(http://(?:[-a-zA-Z0-9_.]+\.)*german-bash\.org/action/show/id/(\d+))'];
+    $self->{'PATTERNS'} = ['(http://(?:[-a-zA-Z0-9_.]+\.)*german-bash\.(?:org|de)/(\d+))',
+                           '(http://(?:[-a-zA-Z0-9_.]+\.)*german-bash\.(?:org|de)/action/show/id/(\d+))'];
  
      bless($self, $class);
      $self->_prepare_parameters();
diff --git a/quotesite/QdbGrabber.pm b/quotesite/QdbGrabber.pm

index f9c7387..411be01 100644 (file)
--- a/quotesite/QdbGrabber.pm
+++ b/quotesite/QdbGrabber.pm
@@ -11,6 +11,7 @@ use quotesite::GrabberBase;
  use LWP::Simple qw(!get);
  use HTML::TokeParser;
  use Data::Dumper;
+use Encode;
  
  use strict;
  
@@ -56,7 +57,7 @@ sub _parse {
  
      OUTER: while ($t = $p->get_tag('span')) {
          if (exists($t->[1]->{'class'}) && ($t->[1]->{'class'} eq 'qt')) {
-            $metadata->{'CONTENT'} = $p->get_text('/span');
+            $metadata->{'CONTENT'} = encode('utf8', decode('iso8859-1', $p->get_text('/span')));
              last OUTER;
          }
      }
author	Ralf Ertzinger <ralf@skytale.net>
	Mon, 19 Aug 2013 21:41:03 +0000 (23:41 +0200)
committer	Ralf Ertzinger <ralf@skytale.net>
	Mon, 19 Aug 2013 21:41:03 +0000 (23:41 +0200)
quotesite.pl		patch \| blob \| history
quotesite/AmazonGrabber.pm	[new file with mode: 0644]	patch \| blob
quotesite/AppNetGrabber.pm	[new file with mode: 0644]	patch \| blob
quotesite/Base.pm		patch \| blob \| history
quotesite/GermanBashGrabber.pm		patch \| blob \| history
quotesite/QdbGrabber.pm		patch \| blob \| history