X-Git-Url: https://git.camperquake.de/gitweb.cgi?a=blobdiff_plain;f=videosite%2FDailyMotionGrabber.pm;h=74e5030f8ef2e76272e0eae5408fd7fe7b1594f5;hb=afe9b4975a8a82cec484ac15c73e7fd0c3c1dbbf;hp=981fef7b7c00cfb1a2841428872282b24dd4e8d8;hpb=d8933ed04195911738700ee2046d709da4d16925;p=videosite.git diff --git a/videosite/DailyMotionGrabber.pm b/videosite/DailyMotionGrabber.pm index 981fef7..74e5030 100644 --- a/videosite/DailyMotionGrabber.pm +++ b/videosite/DailyMotionGrabber.pm @@ -8,8 +8,7 @@ package videosite::DailyMotionGrabber; use videosite::GrabberBase; @ISA = qw(videosite::GrabberBase); -use LWP::Simple qw(!get); -use HTML::Parser; +use HTML::TokeParser; use videosite::JSArrayParser; use Data::Dumper; @@ -17,16 +16,15 @@ use strict; sub new { my $class = shift; - my $self = $class->SUPER::new(); - - $self->{'NAME'} = 'dailymotion'; - $self->{'PATTERNS'} = ['(http://(?:[-a-zA-Z0-9_.]+\.)*dailymotion.com/(?:[^/]+/)*video/([-a-zA-Z0-9_]+))']; - - bless($self, $class); - - $self->_prepare_parameters(); - - return $self; + my $self = $class->SUPER::new( + NAME => 'dailymotion', + _SELFTESTURL => 'http://www.dailymotion.com/video/xylv6u_moon-duo-sleepwalker_music', + _SELFTESTTITLE => 'Moon Duo - Sleepwalker', + PATTERNS => ['(http://(?:[-a-zA-Z0-9_.]+\.)*dailymotion.com/(?:[^/]+/)*video/([-a-zA-Z0-9_]+))'], + @_, + ); + + return bless($self, $class); } sub _parse { @@ -35,7 +33,7 @@ sub _parse { my $pattern = shift; my $content; my $metadata = {}; - my $p = HTML::Parser->new(api_version => 3); + my $p; my @accum; my @text; my $e; @@ -50,73 +48,69 @@ sub _parse { $metadata->{'TITLE'} = undef; $metadata->{'DLURL'} = undef; - unless(defined($content = LWP::Simple::get(sprintf('http://www.dailymotion.com/video/%s', $2)))) { + unless(defined($content = $self->simple_get(sprintf('http://www.dailymotion.com/video/%s', $2)))) { $self->error('Could not download %s', $url); return undef; } - $p->handler(start => \@accum, "tagname, attr"); - $p->handler(text => \@text, "text"); - $p->report_tags(qw(meta script)); - $p->utf8_mode(1); - $p->parse($content); + $p = HTML::TokeParser->new(\$content); # Look for the title in the meta tags - foreach $e (@accum) { + while ($e = $p->get_tag('meta', 'script')) { if ('meta' eq $e->[0]) { - if ('title' eq $e->[1]->{'name'}) { + if (exists($e->[1]->{'property'}) && ('og:title' eq $e->[1]->{'property'})) { $metadata->{'TITLE'} = $e->[1]->{'content'}; - $metadata->{'TITLE'} =~ s/^Dailymotion\s+-\s+//; - $metadata->{'TITLE'} =~ s/(?:\s+-\s+.*)?$//; } - } - } + } elsif ('script' eq $e->[0]) { + my $c = $p->get_text(); - # Look for the download URL - foreach $e (@text) { - if ($e->[0] =~ m|\.addVariable\("sequence",\s*"([^\"]+)"|) { - my $sequence = $1; - my $jsp = videosite::JSArrayParser->new(); - my $main; - my $s; + $self->debug("Found script: %s", $c); - $sequence =~ s/%(..)/chr(hex($1))/ge; - $self->debug("Found sequence: %s", $sequence); + if ($c =~ m|flashvars = (.+);$|m) { + my $flashvars = $1; + my $jsp = videosite::JSArrayParser->new(); + my $l; + my $s; + my $sequence; - $self->debug("Using %s to parse", ref($jsp)); - $sequence = $jsp->parse($sequence); - $self->debug(Dumper($sequence)); + $self->debug("Found flashvars: %s", $flashvars); - unless(defined($sequence)) { - $self->error("Found sequence, but could not parse"); - return undef; - } else { - $self->debug("Parsed sequence: %s", Dumper($sequence)); + $self->debug("Using %s to parse", ref($jsp)); + $flashvars = $jsp->parse($flashvars); + $self->debug("Parsed flashvars: %s", Dumper($flashvars)); - foreach (@{$sequence}) { - if (exists($_->{'name'}) && ($_->{'name'} eq 'main')) { - # Found main section - $main = $_->{'layerList'}; - } - } - unless(defined($main)) { - $self->error("Could not find layerList[main]"); + $sequence = $flashvars->{'sequence'}; + $sequence =~ s/%(..)/chr(hex($1))/ge; + + $self->debug("Decoded sequence: %s", $sequence); + $sequence = $jsp->parse($sequence); + + unless(defined($sequence)) { + $self->error("Found sequence, but could not parse"); return undef; - } + } else { + $self->debug("Parsed sequence: %s", Dumper($sequence)); + + $l = $self->_fetch_layer($sequence->{'sequence'}, "root/layerList", "background/sequenceList", "main/layerList", "video/param"); + unless(defined($l)) { + $self->error("Could not find video layer"); + return undef; + } - foreach (@{$main}) { - if (exists($_->{'name'}) && ($_->{'name'} eq 'video')) { - # Found video section - if (exists($_->{'param'}->{'hdURL'})) { - $metadata->{'DLURL'} = $_->{'param'}->{'hdURL'}; - } elsif (exists($_->{'param'}->{'hqURL'})) { - $metadata->{'DLURL'} = $_->{'param'}->{'hqURL'}; - } elsif (exists($_->{'param'}->{'hqURL'})) { - $metadata->{'DLURL'} = $_->{'param'}->{'sdURL'}; - } else { - $self->error("Video section found, but no URLs"); - return undef; - } + # Found video section + if (exists($l->{'hd1080URL'})) { + $metadata->{'DLURL'} = $l->{'hd1080URL'}; + } elsif (exists($l->{'hd720URL'})) { + $metadata->{'DLURL'} = $l->{'hd720URL'}; + } elsif (exists($l->{'hqURL'})) { + $metadata->{'DLURL'} = $l->{'hqURL'}; + } elsif (exists($l->{'sdURL'})) { + $metadata->{'DLURL'} = $l->{'sdURL'}; + } elsif (exists($l->{'ldURL'})) { + $metadata->{'DLURL'} = $l->{'ldURL'}; + } else { + $self->error("Video section found, but no URLs"); + return undef; } } } @@ -131,4 +125,37 @@ sub _parse { return $metadata; } +sub _fetch_layer { + my $self = shift; + my $sequence = shift; + my $point = shift; + my $next; + my @points = @_; + my $l; + + $self->debug("Looking for %s in %s", $point, Dumper($sequence)); + + unless(defined($point)) { + $self->debug("Reached last point"); + return $sequence; + } + ($point, $next) = split(/\//, $point, 2); + + foreach (@{$sequence}) { + if (exists($_->{'name'}) and ($_->{'name'} eq $point)) { + if (exists($_->{$next})) { + $self->debug("Using %s in %s", $next, $point); + return $self->_fetch_layer($_->{$next}, @points); + } else { + $self->debug("%s found, but no %s", $point, $next); + return undef; + } + + } + } + + $self->debug("Could not find entry named %s", $point); + return undef; +} + 1;