From: Ralf Ertzinger Date: Mon, 21 Jun 2010 20:30:05 +0000 (+0200) Subject: Add a grabber for Twitter status messages X-Git-Url: https://git.camperquake.de/gitweb.cgi?p=quotesite.git;a=commitdiff_plain;h=89a192885c8132e2fe1076faa442e46627e31ba7 Add a grabber for Twitter status messages --- diff --git a/quotesite/TwitterGrabber.pm b/quotesite/TwitterGrabber.pm new file mode 100644 index 0000000..57e0ec4 --- /dev/null +++ b/quotesite/TwitterGrabber.pm @@ -0,0 +1,71 @@ +# (c) 2010 by Ralf Ertzinger +# licensed under GNU GPL v2 +# +# Grabber for twitter + +package TwitterGrabber; + +use GrabberBase; +@ISA = qw(GrabberBase); + +use LWP::Simple qw(!get); +use Data::Dumper; +use XML::Simple; +use Encode; + +use strict; + +sub new { + my $class = shift; + my $self = $class->SUPER::new(); + + $self->{'NAME'} = 'twitter.com'; + $self->{'PATTERNS'} = ['(http://twitter.com/[^/]+/status/(\d+))']; + + bless($self, $class); + $self->_prepare_parameters(); + + return $self; +} + +sub _parse { + my $self = shift; + my $url = shift; + my $pattern = shift; + my $content; + my $metadata = {}; + my $p = XML::Simple->new(); + my $t; + + $url =~ m|$pattern|; + $url = $1; + + $metadata->{'URL'} = $url; + $metadata->{'ID'} = $2; + $metadata->{'TYPE'} = 'quote'; + $metadata->{'SOURCE'} = $self->{'NAME'}; + $metadata->{'CONTENT'} = undef; + + # Get the XML file containing the quote + unless(defined($content = LWP::Simple::get(sprintf('http://api.twitter.com/1/statuses/show/%s.xml', $2)))) { + $self->error('Could not download quote'); + return undef; + } + + unless(defined($t = $p->XMLin($content))) { + $self->error('Could not parse XML metadata'); + return undef; + } + + $metadata->{'CONTENT'} = $t->{'text'}; + $metadata->{'ID'} = $t->{'user'}->{'screen_name'} . '/' . $metadata->{'ID'}; + + unless(defined($metadata->{'CONTENT'})) { + $self->error('Could not extract quote content'); + return undef; + } + + return $metadata; +} + +1;