From: Ralf Ertzinger Date: Mon, 21 Dec 2009 19:42:38 +0000 (+0100) Subject: - Decode HTML entities in YouTube titles X-Git-Url: https://git.camperquake.de/gitweb.cgi?a=commitdiff_plain;h=ee65b016c8edb3a4ef52d4170504b27fcdbf27f0;hp=269604a96f3edab3ed911d7a6931eef58c834560;p=videosite.git - Decode HTML entities in YouTube titles --- diff --git a/videosite/YouTubeGrabber.pm b/videosite/YouTubeGrabber.pm index 83cee59..edb5c99 100644 --- a/videosite/YouTubeGrabber.pm +++ b/videosite/YouTubeGrabber.pm @@ -14,6 +14,8 @@ use videosite::GrabberBase; use LWP::UserAgent; use HTTP::Cookies; use HTML::TokeParser; +use HTML::Entities qw(decode_entities); +use Encode; use Data::Dumper; use videosite::JSArrayParser; @@ -100,6 +102,11 @@ sub _parse { if ('meta' eq $tag->[0]) { if ('title' eq $tag->[1]->{'name'}) { $metadata->{'TITLE'} = $tag->[1]->{'content'}; + # Convert HTML entities in the title. This is a bit convoluted. + $metadata->{'TITLE'} = encode("utf8", + decode_entities( + decode("utf8", $metadata->{'TITLE'}))); + $self->debug('Title found: %s', $metadata->{'TITLE'}); } } elsif ('script' eq $tag->[0]) {