X-Git-Url: https://git.camperquake.de/gitweb.cgi?a=blobdiff_plain;f=videosite%2FYouTubeGrabber.pm;h=4449d1fa8122ee4ca362f517b9752e5e14217705;hb=e3ddf9a3b8b6daf7ab0b5de3d5b26a1bd79eda15;hp=2ed52a757fcd0d5a1c0ba143e15c86683ff2a2b9;hpb=7b6cd96a0ad9a3bed4f771df3cc3d541470427f8;p=videosite.git diff --git a/videosite/YouTubeGrabber.pm b/videosite/YouTubeGrabber.pm index 2ed52a7..4449d1f 100644 --- a/videosite/YouTubeGrabber.pm +++ b/videosite/YouTubeGrabber.pm @@ -14,6 +14,8 @@ use videosite::GrabberBase; use LWP::UserAgent; use HTTP::Cookies; use HTML::TokeParser; +use HTML::Entities qw(decode_entities); +use Encode; use Data::Dumper; use videosite::JSArrayParser; @@ -24,8 +26,9 @@ sub new { my $self = $class->SUPER::new(); $self->{'NAME'} = 'youtube'; - $self->{'PATTERNS'} = ['(http://(?:[-a-zA-Z0-9_.]+\.)*youtube.(?:com|de|co.uk)/watch\?(?:.+=.+&)*v=([-a-zA-Z0-9_]+))', - '(http://(?:[-a-zA-Z0-9_.]+\.)*youtube.(?:com|de|co.uk)/v/([-a-zA-Z0-9_]+))']; + $self->{'PATTERNS'} = ['(http://(?:[-a-zA-Z0-9_.]+\.)*youtube\.(?:com|de|co.uk)/watch\?(?:.+=.+&)*v=([-a-zA-Z0-9_]+))', + '(http://(?:[-a-zA-Z0-9_.]+\.)*youtube\.(?:com|de|co.uk)/v/([-a-zA-Z0-9_]+))', + '(http://(?:[-a-zA-Z0-9_.]+\.)*youtu\.be/([-a-zA-Z0-9_]+))']; $self->{'_PARAMS'} = { 'QUALITY' => ['normal', 'Quality of the video to download.', { 'normal' => 'standard resolution flash video', @@ -56,10 +59,10 @@ sub _parse { my $videourl; my $quality = $self->_getval('QUALITY'); my %preflist = ( - 'hd' => [22, 35, 18, 34, 6, 5], - 'h264' => [18, 34, 22, 35, 6, 5], - 'high' => [34, 35, 18, 22, 6, 5], - 'normal' => [6, 5, 34, 35, 18, 22]); + 'hd' => [37, 22, 35, 18, 34, 6, 5], + 'h264' => [18, 34, 37, 22, 35, 6, 5], + 'high' => [34, 35, 18, 37, 22, 6, 5], + 'normal' => [6, 5, 34, 35, 18, 22, 37]); my $preflist; my $jsp; @@ -73,6 +76,8 @@ sub _parse { $metadata->{'TITLE'} = undef; $metadata->{'DLURL'} = undef; + $self->debug("Matched id %s from pattern %s", $2, $pattern); + $preflist = $preflist{$quality}; $self->debug("Quality: %s, preflist: [%s]", $quality, join(", ", @{$preflist})); @@ -100,6 +105,11 @@ sub _parse { if ('meta' eq $tag->[0]) { if ('title' eq $tag->[1]->{'name'}) { $metadata->{'TITLE'} = $tag->[1]->{'content'}; + # Convert HTML entities in the title. This is a bit convoluted. + $metadata->{'TITLE'} = encode("utf8", + decode_entities( + decode("utf8", $metadata->{'TITLE'}))); + $self->debug('Title found: %s', $metadata->{'TITLE'}); } } elsif ('script' eq $tag->[0]) { @@ -111,6 +121,7 @@ sub _parse { $self->debug("Found SWF_ARGS: %s", $args); $jsp = videosite::JSArrayParser->new(); + $self->debug("Using %s to parse", ref($jsp)); $r = $jsp->parse($args); unless(defined($r)) { @@ -159,7 +170,9 @@ sub _parse { } elsif ($_ == 34) { $self->debug('Found flv,h264: %s', $urls{$_}); } elsif ($_ == 22) { - $self->debug('Found mp4,h264,large: %s', $urls{$_}); + $self->debug('Found mp4,h264,720p: %s', $urls{$_}); + } elsif ($_ == 37) { + $self->debug('Found mp4,h264,1080p: %s', $urls{$_}); } elsif ($_ == 18) { $self->debug('Found mp4,h264: %s', $urls{$_}); } elsif ($_ == 5) { @@ -181,7 +194,7 @@ sub _parse { last SWF_ARGS; } } elsif ('div' eq $tag->[0]) { - if (exists($tag->[1]->{'class'}) and ('errorBox' eq $tag->[1]->{'class'})) { + if (exists($tag->[1]->{'class'}) and ('yt-alert-content' eq $tag->[1]->{'class'})) { $self->error("Could not get video data for youtube %s: %s", $metadata->{'ID'}, $p->get_trimmed_text()); return undef;