Manual merge for DailyMotionGrabber.pm
[videosite.git] / videosite / DailyMotionGrabber.pm
index 981fef7..74e5030 100644 (file)
@@ -8,8 +8,7 @@ package videosite::DailyMotionGrabber;
 use videosite::GrabberBase;
 @ISA = qw(videosite::GrabberBase);
 
-use LWP::Simple qw(!get);
-use HTML::Parser;
+use HTML::TokeParser;
 use videosite::JSArrayParser;
 use Data::Dumper;
 
@@ -17,16 +16,15 @@ use strict;
 
 sub new {
     my $class = shift;
-    my $self = $class->SUPER::new();
-
-    $self->{'NAME'} = 'dailymotion';
-    $self->{'PATTERNS'} = ['(http://(?:[-a-zA-Z0-9_.]+\.)*dailymotion.com/(?:[^/]+/)*video/([-a-zA-Z0-9_]+))'];
-
-    bless($self, $class);
-
-    $self->_prepare_parameters();
-
-    return $self;
+    my $self = $class->SUPER::new(
+        NAME => 'dailymotion',
+        _SELFTESTURL => 'http://www.dailymotion.com/video/xylv6u_moon-duo-sleepwalker_music',
+        _SELFTESTTITLE => 'Moon Duo - Sleepwalker',
+        PATTERNS => ['(http://(?:[-a-zA-Z0-9_.]+\.)*dailymotion.com/(?:[^/]+/)*video/([-a-zA-Z0-9_]+))'],
+        @_,
+    );
+
+    return bless($self, $class);
 }
 
 sub _parse {
@@ -35,7 +33,7 @@ sub _parse {
     my $pattern = shift;
     my $content;
     my $metadata = {};
-    my $p = HTML::Parser->new(api_version => 3);
+    my $p;
     my @accum;
     my @text;
     my $e;
@@ -50,73 +48,69 @@ sub _parse {
     $metadata->{'TITLE'} = undef;
     $metadata->{'DLURL'} = undef;
 
-    unless(defined($content = LWP::Simple::get(sprintf('http://www.dailymotion.com/video/%s', $2)))) {
+    unless(defined($content = $self->simple_get(sprintf('http://www.dailymotion.com/video/%s', $2)))) {
         $self->error('Could not download %s', $url);
         return undef;
     }
 
-    $p->handler(start => \@accum, "tagname, attr");
-    $p->handler(text => \@text, "text");
-    $p->report_tags(qw(meta script));
-    $p->utf8_mode(1);
-    $p->parse($content);
+    $p = HTML::TokeParser->new(\$content);
 
     # Look for the title in the meta tags
-    foreach $e (@accum) {
+    while ($e = $p->get_tag('meta', 'script')) {
         if ('meta' eq $e->[0]) {
-            if ('title' eq $e->[1]->{'name'}) {
+            if (exists($e->[1]->{'property'}) && ('og:title' eq $e->[1]->{'property'})) {
                 $metadata->{'TITLE'} = $e->[1]->{'content'};
-                $metadata->{'TITLE'} =~ s/^Dailymotion\s+-\s+//;
-                $metadata->{'TITLE'} =~ s/(?:\s+-\s+.*)?$//;
             }
-        }
-    }
+        } elsif ('script' eq $e->[0]) {
+            my $c = $p->get_text();
 
-    # Look for the download URL
-    foreach $e (@text) {
-        if ($e->[0] =~ m|\.addVariable\("sequence",\s*"([^\"]+)"|) {
-            my $sequence = $1;
-            my $jsp = videosite::JSArrayParser->new();
-            my $main;
-            my $s;
+            $self->debug("Found script: %s", $c);
 
-            $sequence =~ s/%(..)/chr(hex($1))/ge;
-            $self->debug("Found sequence: %s", $sequence);
+            if ($c =~ m|flashvars = (.+);$|m) {
+                my $flashvars = $1;
+                my $jsp = videosite::JSArrayParser->new();
+                my $l;
+                my $s;
+                my $sequence;
 
-            $self->debug("Using %s to parse", ref($jsp));
-            $sequence = $jsp->parse($sequence);
-            $self->debug(Dumper($sequence));
+                $self->debug("Found flashvars: %s", $flashvars);
 
-            unless(defined($sequence)) {
-                $self->error("Found sequence, but could not parse");
-                return undef;
-            } else {
-                $self->debug("Parsed sequence: %s", Dumper($sequence));
+                $self->debug("Using %s to parse", ref($jsp));
+                $flashvars = $jsp->parse($flashvars);
+                $self->debug("Parsed flashvars: %s", Dumper($flashvars));
 
-                foreach (@{$sequence}) {
-                    if (exists($_->{'name'}) && ($_->{'name'} eq 'main')) {
-                        # Found main section
-                        $main = $_->{'layerList'};
-                    }
-                }
-                unless(defined($main)) {
-                    $self->error("Could not find layerList[main]");
+                $sequence = $flashvars->{'sequence'};
+                $sequence =~ s/%(..)/chr(hex($1))/ge;
+
+                $self->debug("Decoded sequence: %s", $sequence);
+                $sequence =  $jsp->parse($sequence);
+
+                unless(defined($sequence)) {
+                    $self->error("Found sequence, but could not parse");
                     return undef;
-                }
+                } else {
+                    $self->debug("Parsed sequence: %s", Dumper($sequence));
+
+                    $l = $self->_fetch_layer($sequence->{'sequence'}, "root/layerList", "background/sequenceList", "main/layerList", "video/param");
+                    unless(defined($l)) {
+                        $self->error("Could not find video layer");
+                        return undef;
+                    }
 
-                foreach (@{$main}) {
-                    if (exists($_->{'name'}) && ($_->{'name'} eq 'video')) {
-                        # Found video section
-                        if (exists($_->{'param'}->{'hdURL'})) {
-                            $metadata->{'DLURL'} = $_->{'param'}->{'hdURL'};
-                        } elsif (exists($_->{'param'}->{'hqURL'})) {
-                            $metadata->{'DLURL'} = $_->{'param'}->{'hqURL'};
-                        } elsif (exists($_->{'param'}->{'hqURL'})) {
-                            $metadata->{'DLURL'} = $_->{'param'}->{'sdURL'};
-                        } else {
-                            $self->error("Video section found, but no URLs");
-                            return undef;
-                        }
+                    # Found video section
+                    if (exists($l->{'hd1080URL'})) {
+                        $metadata->{'DLURL'} = $l->{'hd1080URL'};
+                    } elsif (exists($l->{'hd720URL'})) {
+                        $metadata->{'DLURL'} = $l->{'hd720URL'};
+                    } elsif (exists($l->{'hqURL'})) {
+                        $metadata->{'DLURL'} = $l->{'hqURL'};
+                    } elsif (exists($l->{'sdURL'})) {
+                        $metadata->{'DLURL'} = $l->{'sdURL'};
+                    } elsif (exists($l->{'ldURL'})) {
+                        $metadata->{'DLURL'} = $l->{'ldURL'};
+                    } else {
+                        $self->error("Video section found, but no URLs");
+                        return undef;
                     }
                 }
             }
@@ -131,4 +125,37 @@ sub _parse {
     return $metadata;
 }
 
+sub _fetch_layer {
+    my $self = shift;
+    my $sequence = shift;
+    my $point = shift;
+    my $next;
+    my @points = @_;
+    my $l;
+
+    $self->debug("Looking for %s in %s", $point, Dumper($sequence));
+
+    unless(defined($point)) {
+        $self->debug("Reached last point");
+        return $sequence;
+    }
+    ($point, $next) = split(/\//, $point, 2);
+
+    foreach (@{$sequence}) {
+        if (exists($_->{'name'}) and ($_->{'name'} eq $point)) {
+            if (exists($_->{$next})) {
+                $self->debug("Using %s in %s", $next, $point);
+                return $self->_fetch_layer($_->{$next}, @points);
+            } else {
+                $self->debug("%s found, but no %s", $point, $next);
+                return undef;
+            }
+
+        }
+    }
+
+    $self->debug("Could not find entry named %s", $point);
+    return undef;
+}
+
 1;