From ee65b016c8edb3a4ef52d4170504b27fcdbf27f0 Mon Sep 17 00:00:00 2001 From: Ralf Ertzinger Date: Mon, 21 Dec 2009 20:42:38 +0100 Subject: [PATCH] - Decode HTML entities in YouTube titles --- videosite/YouTubeGrabber.pm | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/videosite/YouTubeGrabber.pm b/videosite/YouTubeGrabber.pm index 83cee59..edb5c99 100644 --- a/videosite/YouTubeGrabber.pm +++ b/videosite/YouTubeGrabber.pm @@ -14,6 +14,8 @@ use videosite::GrabberBase; use LWP::UserAgent; use HTTP::Cookies; use HTML::TokeParser; +use HTML::Entities qw(decode_entities); +use Encode; use Data::Dumper; use videosite::JSArrayParser; @@ -100,6 +102,11 @@ sub _parse { if ('meta' eq $tag->[0]) { if ('title' eq $tag->[1]->{'name'}) { $metadata->{'TITLE'} = $tag->[1]->{'content'}; + # Convert HTML entities in the title. This is a bit convoluted. + $metadata->{'TITLE'} = encode("utf8", + decode_entities( + decode("utf8", $metadata->{'TITLE'}))); + $self->debug('Title found: %s', $metadata->{'TITLE'}); } } elsif ('script' eq $tag->[0]) { -- 1.8.3.1