X-Git-Url: https://git.camperquake.de/gitweb.cgi?a=blobdiff_plain;f=videosite%2FYouTubeGrabber.pm;h=1a331d1809ec4b1abe2479c43ddfd9d7f79f16c6;hb=7c348bce55d63b4a3b9798dd29603e0a84996e6f;hp=be1add14b767f473712f45248f9ee38214892d3a;hpb=240248a0d5da5315a8a26135ef4a9998205238e8;p=videosite.git diff --git a/videosite/YouTubeGrabber.pm b/videosite/YouTubeGrabber.pm index be1add1..1a331d1 100644 --- a/videosite/YouTubeGrabber.pm +++ b/videosite/YouTubeGrabber.pm @@ -26,16 +26,17 @@ my %preflist = ( 'high' => [34, 35, 18, 37, 22, 6, 5, 38, 43], 'normal' => [6, 5, 34, 35, 18, 22, 37, 38, 43]); my %videoformats = ( - 43 => 'webm,360p', - 44 => 'webm,480p', - 45 => 'webm,720p', - 38 => 'mp4,h264,4k', - 37 => 'mp4,h264,1080p', - 35 => 'flv,h264,large', - 34 => 'flv,h264', - 22 => 'mp4,h264,720p', - 18 => 'mp4,h264', - 5 => 'flv,flv', + # Container/Video codec/Audio codec/Resolution + 5 => 'FLV/Sorenson/MP3/240p', + 18 => 'MP4/H264/AAC/360p', + 22 => 'MP4/H264/AAC/720p', + 34 => 'FLV/H264/AAC/360p', + 35 => 'FLV/H264/AAC/480p', + 37 => 'MP4/H264/AAC/1080p', + 38 => 'MP4/H264/AAC/3072p', + 43 => 'WebM/VP8/Vorbis/360p', + 44 => 'WebM/VP8/Vorbis/480p', + 45 => 'WebM/VP8/Vorbis/720p', ); sub new { @@ -58,7 +59,8 @@ sub new { 'h264' => 'high resolution MPEG4 video', 'hd' => 'HD720 resolution'}], 'USERNAME' => ['', 'Username to use for YouTube login'], - 'PASSWORD' => ['', 'Password to use for YouTube login']}; + 'PASSWORD' => ['', 'Password to use for YouTube login'], + 'HTTPS' => [1, 'Whether to use HTTPS (if available) to connect to YouTube']}; bless($self, $class); $self->_prepare_parameters(); @@ -113,8 +115,8 @@ sub _parse_by_video_info { $preflist = $preflist{$quality}; $self->debug("Quality: %s, preflist: [%s]", $quality, join(", ", @{$preflist})); - $videourl = sprintf('https://www.youtube.com/get_video_info?video_id=%s&eurl=%s', - $id, 'http%3A%2F%2Fwww%2Eyoutube%2Ecom%2F'); + $videourl = sprintf('%s://www.youtube.com/get_video_info?video_id=%s&eurl=%s', + $self->_getval('HTTPS')?'https':'http', $id, 'http%3A%2F%2Fwww%2Eyoutube%2Ecom%2F'); $self->debug("Video info URL: %s", $videourl); $r = $ua->get($videourl); @@ -127,28 +129,34 @@ sub _parse_by_video_info { $self->debug('Content from get_video_info: %s', $content); # Decode content - $content = { split /[&=]/, $content }; + $content = $self->decode_querystring($content); if ($content->{'status'} ne 'ok') { $self->debug("Non OK status code found: %s", $content->{'status'}); return undef; } - unless(exists($content->{'fmt_url_map'}) and exists($content->{'title'})) { - $self->debug("No fmt_url_map or no title found"); + if (exists($content->{'fmt_url_map'})) { + # Decode fmt_url_map + $urls = $self->decode_hexurl($content->{'fmt_url_map'}); + $urls = { split /[\|,]/, $urls }; + } elsif (exists($content->{'url_encoded_fmt_stream_map'})) { + $urls = $self->_decode_url_encoded_fmt_stream_map($content->{'url_encoded_fmt_stream_map'}, 1); + } else { + $self->debug("No URL data found"); return undef; } - # Decode fmt_url_map - $urls = $content->{'fmt_url_map'}; - $urls =~ s/%(..)/chr(hex($1))/ge; - $urls = { split /[\|,]/, $urls }; + unless(exists($content->{'title'})) { + $self->debug("No title found"); + return undef; + } $self->__pick_url($urls, $preflist, $metadata); $metadata->{'TITLE'} = $content->{'title'}; $metadata->{'TITLE'} =~ s/\+/ /g; - $metadata->{'TITLE'} =~ s/%(..)/chr(hex($1))/ge; + $metadata->{'TITLE'} = $self->decode_hexurl($metadata->{'TITLE'}); $metadata->{'TITLE'} = decode("utf8", $metadata->{'TITLE'}); $self->debug('Title found: %s', $metadata->{'TITLE'}); @@ -189,7 +197,7 @@ sub _parse_by_scrape { $preflist = $preflist{$quality}; $self->debug("Quality: %s, preflist: [%s]", $quality, join(", ", @{$preflist})); - $videourl = sprintf('https://www.youtube.com/watch?v=%s', $id); + $videourl = sprintf('%s://www.youtube.com/watch?v=%s', $self->_getval('HTTPS')?'https':'http', $id); unless(defined($r = $ua->get($videourl))) { $self->error('Could not download %s', $url); @@ -244,7 +252,7 @@ sub _parse_by_scrape { $self->debug("Video has fmt_url_map: %s", $urls); - $urls =~ s/%([[:xdigit:]]{2})/chr(hex($1))/ge; + $urls = $self->decode_hexurl($urls); %urls = split(/[\|,]/, $urls); $self->debug("Pagetype: old (SWF_ARGS), fmt_url_map"); @@ -256,7 +264,7 @@ sub _parse_by_scrape { my @fmt; $self->debug('Video has fmt_map'); - $fmt =~ s/%([[:xdigit:]]{2})/chr(hex($1))/ge; + $fmt = $self->decode_hexurl($fmt); @fmt = split(/,/, $fmt); foreach (@fmt) { @_=split(/\//); @@ -281,10 +289,10 @@ sub _parse_by_scrape { my $urls = $1; $self->debug("Video has fmt_url_map: %s", $urls); - $urls =~ s/%([[:xdigit:]]{2})/chr(hex($1))/ge; + $urls = $self->decode_hexurl($urls); %urls = split(/[\|,]/, $urls); $self->debug("Pagetype: 2010 (swfHTML), fmt_url_map"); - } elsif ($e =~ m|\x27PLAYER_CONFIG\x27:\s+(.+)\}\);|) { + } elsif ($e =~ m|\x27PLAYER_CONFIG\x27:\s+(.+)(?:\}\);)?|) { my $args = $1; $self->debug("Found PLAYER_CONFIG: %s", $args); @@ -309,11 +317,13 @@ sub _parse_by_scrape { %urls = split(/[\|,]/, $urls); foreach (keys(%urls)) { - my $u = $urls{$_}; - $u =~ s/%([[:xdigit:]]{2})/chr(hex($1))/ge; - $urls{$_} = $u; + $urls{$_} = $self->decode_hexurl($urls{$_}); } $self->debug("Pagetype: 2011 (PLAYER_CONFIG), fmt_url_map"); + } elsif (exists($r->{'args'}) and exists($r->{'args'}->{'url_encoded_fmt_stream_map'}) and ($r->{'args'}->{'url_encoded_fmt_stream_map'} ne '')) { + %urls = %{$self->_decode_url_encoded_fmt_stream_map($r->{'args'}->{'url_encoded_fmt_stream_map'}, 0)}; + + $self->debug("Pagetype: 2011 (PLAYER_CONFIG), url_encoded_fmt_stream_map"); } else { $self->error('fmt_url_map not found in PLAYER_CONFIG'); return undef; @@ -451,6 +461,33 @@ sub __login { return ($ua->get($videourl), $cookie); } +# Take an encoded url_encoded_fmt_stream_map and return a hash +# matching video IDs to download URLs +sub _decode_url_encoded_fmt_stream_map { + my $self = shift; + my $data = shift; + my $dataencoded = shift; + my @data; + + $data = $self->decode_hexurl($data) if $dataencoded; + # This will + # - Split the decoded string into segments (along ,) + # - Interpret each segment as a concatenated key-value list (key and value separated by =, pairs separated by & + # - URL-decode each key and value _again_ + # + # @data will be an array of hash references + + @data = map { { map { $self->decode_hexurl($_) } split /[&=]/ } } split /,/, $data; + $self->debug("_decode_url_encoded_fmt_stream_map() decoded %s", Dumper(\@data)); + + # From each array entry, pick the itag and the url values and return that + # as a hash reference + + return { map { $_->{'itag'}, $_->{'url'} } @data }; +} + + + sub __pick_url { my $self = shift; my $urls = shift; @@ -477,3 +514,4 @@ sub __pick_url { } 1; +