Merge branch 'master' of http://10.200.0.3/GIT/videosite
[videosite.git] / videosite / YouTubeGrabber.pm
index 8079f9f..a7ed395 100644 (file)
@@ -35,6 +35,7 @@ my %videoformats = (
     22 => 'MP4/H264/AAC/720p',      # isommp42, High
     34 => 'FLV/H264/AAC/360p',      # Main
     35 => 'FLV/H264/AAC/480p',      # Main
+    36 => '3GP/MPEG4-Visual/240p',
     37 => 'MP4/H264/AAC/1080p',     # High
     38 => 'MP4/H264/AAC/3072p',     # High
     43 => 'WebM/VP8/Vorbis/360p',   
@@ -55,7 +56,9 @@ sub new {
     my $self = $class->SUPER::new();
 
     $self->{'NAME'} = 'youtube';
-    $self->{'PATTERNS'} = ['(https?://(?:[-a-zA-Z0-9_.]+\.)*youtube\.(?:com|de|co.uk)/watch(?:_popup)?\?(?:.+=.+&)*v=([-a-zA-Z0-9_]+))',
+    $self->{_SELFTESTURL} = 'http://www.youtube.com/watch?v=dMH0bHeiRNg';
+    $self->{_SELFTESTTITLE} = 'Evolution of Dance - By Judson Laipply';
+    $self->{'PATTERNS'} = ['(https?://(?:[-a-zA-Z0-9_.]+\.)*youtube\.(?:com|de|co.uk)/watch(?:_popup)?\?.*?v=([-a-zA-Z0-9_]+))',
                            '(https?://(?:[-a-zA-Z0-9_.]+\.)*youtube\.(?:com|de|co.uk)/watch\#\!v=([-a-zA-Z0-9_]+))',
                            '(https?://(?:[-a-zA-Z0-9_.]+\.)*youtube\.(?:com|de|co.uk)/v/([-a-zA-Z0-9_]+))',
                            '(https?://(?:[-a-zA-Z0-9_.]+\.)*youtube\.(?:com|de|co.uk)/user/[[:alnum:]]+\?v=([-a-zA-Z0-9_]+))',
@@ -121,7 +124,6 @@ sub _parse_by_video_info {
     $metadata->{'SOURCE'} = $self->{'NAME'};
     $metadata->{'TITLE'} = undef;
     $metadata->{'DLURL'} = undef;
-    $metadata->{'COOKIE'} = undef;
 
     $preflist = $preflist{$quality};
     $self->debug("Quality: %s, preflist: [%s]", $quality, join(", ", @{$preflist}));
@@ -202,7 +204,6 @@ sub _parse_by_scrape {
     $metadata->{'SOURCE'} = $self->{'NAME'};
     $metadata->{'TITLE'} = undef;
     $metadata->{'DLURL'} = undef;
-    $metadata->{'COOKIE'} = undef;
 
 
     $preflist = $preflist{$quality};
@@ -219,7 +220,6 @@ sub _parse_by_scrape {
         $self->debug('Video requires age verification');
         my @logindata = $self->__login($videourl, $ua);
         $r = $logindata[0];
-        $metadata->{'COOKIE'} = $logindata[1];
         unless(defined($r)) {
             $self->error('Could not log into YouTube');
             return undef;
@@ -245,10 +245,69 @@ sub _parse_by_scrape {
             $e = $p->get_text();
             $self->debug("Found script: %s", $e);
 
-            if ($e =~ m|\x27SWF_ARGS\x27:\s+(.+),|) {
+#            if ($e =~ m|\x27SWF_ARGS\x27:\s+(.+),|) {
+#                my $args = $1;
+#
+#                $self->debug("Found SWF_ARGS: %s", $args);
+#                $jsp = videosite::JSArrayParser->new();
+#                $self->debug("Using %s to parse", ref($jsp));
+#                $r = $jsp->parse($args);
+#
+#                unless(defined($r)) {
+#                    $self->error("Found information hash, but could not parse");
+#                    return undef;
+#                }
+#
+#                if (exists($r->{'fmt_url_map'}) and ($r->{'fmt_url_map'} ne '')) {
+#                    my $urls =  $r->{'fmt_url_map'};
+#
+#                    $self->debug("Video has fmt_url_map: %s", $urls);
+#
+#                    $urls = $self->decode_hexurl($urls);
+#                    %urls = split(/[\|,]/, $urls);
+#                    $self->debug("Pagetype: old (SWF_ARGS), fmt_url_map");
+#
+#                } elsif (exists($r->{'t'}) and ($r->{'t'} ne '')) {
+#                    my $thash = $r->{'t'};
+#
+#                    if (exists($r->{'fmt_map'}) && ($r->{'fmt_map'} ne '')) {
+#                        my $fmt = $r->{'fmt_map'};
+#                        my @fmt;
+#
+#                        $self->debug('Video has fmt_map');
+#                        $fmt = $self->decode_hexurl($fmt);
+#                        @fmt = split(/,/, $fmt);
+#                        foreach (@fmt) {
+#                            @_=split(/\//);
+#                            $urls{$_[0]} =  sprintf('http://www.youtube.com/get_video?video_id=%s&fmt=%d&t=%s', 
+#                                $metadata->{'ID'},
+#                                $_[0],
+#                                $thash);
+#                        }
+#                        $self->debug("Pagetype: 2009 (SWF_ARGS), t with fmt_map");
+#
+#                    } else {
+#                        $urls{5} = sprintf('http://www.youtube.com/get_video?video_id=%s&t=%s',
+#                            $metadata->{'ID'},
+#                            $thash);
+#                        $self->debug("Pagetype: 2009 (SWF_ARGS), t without fmt_map");
+#                    }
+#                } else {
+#                    $self->error('Neither fmt_url_map nor t found in video information hash');
+#                    return undef;
+#                }
+#            } elsif ($e =~ m|var swfHTML = .*fmt_url_map=([^\&]+)\&|) {
+#                my $urls = $1;
+#                $self->debug("Video has fmt_url_map: %s", $urls);
+#
+#                $urls = $self->decode_hexurl($urls);
+#                %urls = split(/[\|,]/, $urls);
+#                $self->debug("Pagetype: 2010 (swfHTML), fmt_url_map");
+#            } elsif ($e =~ m|\x27PLAYER_CONFIG\x27:\s+(.+)(?:\}\);)?|) {
+             if ($e =~ m|\x27PLAYER_CONFIG\x27:\s+(.+)(?:\}\);)?|) {
                 my $args = $1;
+                $self->debug("Found PLAYER_CONFIG: %s", $args);
 
-                $self->debug("Found SWF_ARGS: %s", $args);
                 $jsp = videosite::JSArrayParser->new();
                 $self->debug("Using %s to parse", ref($jsp));
                 $r = $jsp->parse($args);
@@ -258,54 +317,32 @@ sub _parse_by_scrape {
                     return undef;
                 }
 
-                if (exists($r->{'fmt_url_map'}) and ($r->{'fmt_url_map'} ne '')) {
-                    my $urls =  $r->{'fmt_url_map'};
+                if (exists($r->{'args'}) and exists($r->{'args'}->{'ps'}) and ($r->{'args'}->{'ps'} eq 'live')) {
+                    $self->error("Video URL seems to point to a live stream, cannot save this");
+                    return undef;
+                }
+
+                if (exists($r->{'args'}) and exists($r->{'args'}->{'fmt_url_map'}) and ($r->{'args'}->{'fmt_url_map'} ne '')) {
+                    my $urls = $r->{'args'}->{'fmt_url_map'};
 
                     $self->debug("Video has fmt_url_map: %s", $urls);
 
-                    $urls = $self->decode_hexurl($urls);
                     %urls = split(/[\|,]/, $urls);
-                    $self->debug("Pagetype: old (SWF_ARGS), fmt_url_map");
-
-                } elsif (exists($r->{'t'}) and ($r->{'t'} ne '')) {
-                    my $thash = $r->{'t'};
-
-                    if (exists($r->{'fmt_map'}) && ($r->{'fmt_map'} ne '')) {
-                        my $fmt = $r->{'fmt_map'};
-                        my @fmt;
-
-                        $self->debug('Video has fmt_map');
-                        $fmt = $self->decode_hexurl($fmt);
-                        @fmt = split(/,/, $fmt);
-                        foreach (@fmt) {
-                            @_=split(/\//);
-                            $urls{$_[0]} =  sprintf('http://www.youtube.com/get_video?video_id=%s&fmt=%d&t=%s', 
-                                $metadata->{'ID'},
-                                $_[0],
-                                $thash);
-                        }
-                        $self->debug("Pagetype: 2009 (SWF_ARGS), t with fmt_map");
-
-                    } else {
-                        $urls{5} = sprintf('http://www.youtube.com/get_video?video_id=%s&t=%s',
-                            $metadata->{'ID'},
-                            $thash);
-                        $self->debug("Pagetype: 2009 (SWF_ARGS), t without fmt_map");
+                    foreach (keys(%urls)) {
+                        $urls{$_} = $self->decode_hexurl($urls{$_});
                     }
+                    $self->debug("Pagetype: 2011 (PLAYER_CONFIG), fmt_url_map");
+                } elsif (exists($r->{'args'}) and exists($r->{'args'}->{'url_encoded_fmt_stream_map'}) and ($r->{'args'}->{'url_encoded_fmt_stream_map'} ne '')) {
+                    %urls = %{$self->_decode_url_encoded_fmt_stream_map($r->{'args'}->{'url_encoded_fmt_stream_map'}, 0)};
+
+                    $self->debug("Pagetype: 2011 (PLAYER_CONFIG), url_encoded_fmt_stream_map");
                 } else {
-                    $self->error('Neither fmt_url_map nor t found in video information hash');
+                    $self->error('fmt_url_map not found in PLAYER_CONFIG');
                     return undef;
                 }
-            } elsif ($e =~ m|var swfHTML = .*fmt_url_map=([^\&]+)\&|) {
-                my $urls = $1;
-                $self->debug("Video has fmt_url_map: %s", $urls);
-
-                $urls = $self->decode_hexurl($urls);
-                %urls = split(/[\|,]/, $urls);
-                $self->debug("Pagetype: 2010 (swfHTML), fmt_url_map");
-            } elsif ($e =~ m|\x27PLAYER_CONFIG\x27:\s+(.+)(?:\}\);)?|) {
+            } elsif ($e =~ m|yt\.playerConfig\s*=\s*(.+);\n|) {
                 my $args = $1;
-                $self->debug("Found PLAYER_CONFIG: %s", $args);
+                $self->debug("Found yt.playerConfig: %s", $args);
 
                 $jsp = videosite::JSArrayParser->new();
                 $self->debug("Using %s to parse", ref($jsp));
@@ -321,26 +358,17 @@ sub _parse_by_scrape {
                     return undef;
                 }
 
-                if (exists($r->{'args'}) and exists($r->{'args'}->{'fmt_url_map'}) and ($r->{'args'}->{'fmt_url_map'} ne '')) {
-                    my $urls = $r->{'args'}->{'fmt_url_map'};
-
-                    $self->debug("Video has fmt_url_map: %s", $urls);
-
-                    %urls = split(/[\|,]/, $urls);
-                    foreach (keys(%urls)) {
-                        $urls{$_} = $self->decode_hexurl($urls{$_});
-                    }
-                    $self->debug("Pagetype: 2011 (PLAYER_CONFIG), fmt_url_map");
-                } elsif (exists($r->{'args'}) and exists($r->{'args'}->{'url_encoded_fmt_stream_map'}) and ($r->{'args'}->{'url_encoded_fmt_stream_map'} ne '')) {
+                if (exists($r->{'args'}) and exists($r->{'args'}->{'url_encoded_fmt_stream_map'}) and ($r->{'args'}->{'url_encoded_fmt_stream_map'} ne '')) {
                     %urls = %{$self->_decode_url_encoded_fmt_stream_map($r->{'args'}->{'url_encoded_fmt_stream_map'}, 0)};
 
-                    $self->debug("Pagetype: 2011 (PLAYER_CONFIG), url_encoded_fmt_stream_map");
+                    $self->debug("Pagetype: 2012 (yt.playerConfig), url_encoded_fmt_stream_map");
                 } else {
-                    $self->error('fmt_url_map not found in PLAYER_CONFIG');
+                    $self->error('url_map not found in yt.playerConfig');
                     return undef;
                 }
             }
 
+
             if (%urls) {
                 $self->__pick_url(\%urls, $preflist, $metadata);
                 last SWF_ARGS;
@@ -517,6 +545,11 @@ sub __pick_url {
         if (exists($urls->{$_})) {
             $self->debug("Selected URL with quality level %s", $_);
             $metadata->{'DLURL'} = $urls->{$_};
+            if (exists($videoformats{$_})) {
+                $metadata->{'FORMAT'} = $videoformats{$_};
+            } else {
+                $metadata->{'FORMAT'} = 'unknown';
+            }
             last;
         }
     }