From: Ralf Ertzinger Date: Tue, 23 Nov 2010 14:29:56 +0000 (+0100) Subject: Twitter: Decode HTML entities in tweet X-Git-Url: https://git.camperquake.de/gitweb.cgi?p=quotesite.git;a=commitdiff_plain;h=54cdc91228ff95d3b0407baa00adc215272c24a0 Twitter: Decode HTML entities in tweet --- diff --git a/quotesite/TwitterGrabber.pm b/quotesite/TwitterGrabber.pm index 48b3c11..286b962 100644 --- a/quotesite/TwitterGrabber.pm +++ b/quotesite/TwitterGrabber.pm @@ -11,6 +11,7 @@ use GrabberBase; use LWP::Simple qw(!get); use Data::Dumper; use XML::Simple; +use HTML::Entities qw(decode_entities); use Encode; use strict; @@ -57,7 +58,7 @@ sub _parse { return undef; } - $metadata->{'CONTENT'} = $t->{'text'}; + $metadata->{'CONTENT'} = encode("utf8", decode_entities(decode("utf8", $t->{'text'}))); $metadata->{'ID'} = $t->{'user'}->{'screen_name'} . '/' . $metadata->{'ID'}; unless(defined($metadata->{'CONTENT'})) {