X-Git-Url: https://git.camperquake.de/gitweb.cgi?a=blobdiff_plain;f=videosite%2FDailyMotionGrabber.pm;h=74e5030f8ef2e76272e0eae5408fd7fe7b1594f5;hb=afe9b4975a8a82cec484ac15c73e7fd0c3c1dbbf;hp=081b4b4034e748371f91917ffde49687400d0a2a;hpb=7d7ff7b6405f55adf3e083808da57b6bc6a31312;p=videosite.git diff --git a/videosite/DailyMotionGrabber.pm b/videosite/DailyMotionGrabber.pm index 081b4b4..74e5030 100644 --- a/videosite/DailyMotionGrabber.pm +++ b/videosite/DailyMotionGrabber.pm @@ -3,29 +3,28 @@ # (c) 2007 by Ralf Ertzinger # licensed under GNU GPL v2 -package DailyMotionGrabber; +package videosite::DailyMotionGrabber; -use GrabberBase; -@ISA = qw(GrabberBase); +use videosite::GrabberBase; +@ISA = qw(videosite::GrabberBase); -use LWP::Simple qw(!get); -use HTML::Parser; +use HTML::TokeParser; +use videosite::JSArrayParser; use Data::Dumper; use strict; sub new { my $class = shift; - my $self = $class->SUPER::new(); - - $self->{'NAME'} = 'dailymotion'; - $self->{'PATTERNS'} = ['(http://(?:[-a-zA-Z0-9_.]+\.)*dailymotion.com/(?:[^/]+/)*video/([-a-zA-Z0-9_]+))']; - - bless($self, $class); - - $self->_prepare_parameters(); - - return $self; + my $self = $class->SUPER::new( + NAME => 'dailymotion', + _SELFTESTURL => 'http://www.dailymotion.com/video/xylv6u_moon-duo-sleepwalker_music', + _SELFTESTTITLE => 'Moon Duo - Sleepwalker', + PATTERNS => ['(http://(?:[-a-zA-Z0-9_.]+\.)*dailymotion.com/(?:[^/]+/)*video/([-a-zA-Z0-9_]+))'], + @_, + ); + + return bless($self, $class); } sub _parse { @@ -34,7 +33,7 @@ sub _parse { my $pattern = shift; my $content; my $metadata = {}; - my $p = HTML::Parser->new(api_version => 3); + my $p; my @accum; my @text; my $e; @@ -45,37 +44,76 @@ sub _parse { $metadata->{'URL'} = $url; $metadata->{'ID'} = $2; $metadata->{'TYPE'} = 'video'; - $metadata->{'SOURCE'} = 'dailymotion'; + $metadata->{'SOURCE'} = $self->{'NAME'}; $metadata->{'TITLE'} = undef; $metadata->{'DLURL'} = undef; - unless(defined($content = LWP::Simple::get(sprintf('http://www.dailymotion.com/video/%s', $2)))) { + unless(defined($content = $self->simple_get(sprintf('http://www.dailymotion.com/video/%s', $2)))) { $self->error('Could not download %s', $url); return undef; } - $p->handler(start => \@accum, "tagname, attr"); - $p->handler(text => \@text, "text"); - $p->report_tags(qw(meta script)); - $p->utf8_mode(1); - $p->parse($content); + $p = HTML::TokeParser->new(\$content); # Look for the title in the meta tags - foreach $e (@accum) { + while ($e = $p->get_tag('meta', 'script')) { if ('meta' eq $e->[0]) { - if ('title' eq $e->[1]->{'name'}) { + if (exists($e->[1]->{'property'}) && ('og:title' eq $e->[1]->{'property'})) { $metadata->{'TITLE'} = $e->[1]->{'content'}; - $metadata->{'TITLE'} =~ s/^Dailymotion\s*:\s*//; } - } - } - - # Look for the download URL - foreach $e (@text) { - if ($e->[0] =~ m|\.addVariable\("url", "([^\"]+)"|) { - $metadata->{'DLURL'} = $1; - $metadata->{'DLURL'} =~ s/%(..)/chr(hex($1))/ge; - + } elsif ('script' eq $e->[0]) { + my $c = $p->get_text(); + + $self->debug("Found script: %s", $c); + + if ($c =~ m|flashvars = (.+);$|m) { + my $flashvars = $1; + my $jsp = videosite::JSArrayParser->new(); + my $l; + my $s; + my $sequence; + + $self->debug("Found flashvars: %s", $flashvars); + + $self->debug("Using %s to parse", ref($jsp)); + $flashvars = $jsp->parse($flashvars); + $self->debug("Parsed flashvars: %s", Dumper($flashvars)); + + $sequence = $flashvars->{'sequence'}; + $sequence =~ s/%(..)/chr(hex($1))/ge; + + $self->debug("Decoded sequence: %s", $sequence); + $sequence = $jsp->parse($sequence); + + unless(defined($sequence)) { + $self->error("Found sequence, but could not parse"); + return undef; + } else { + $self->debug("Parsed sequence: %s", Dumper($sequence)); + + $l = $self->_fetch_layer($sequence->{'sequence'}, "root/layerList", "background/sequenceList", "main/layerList", "video/param"); + unless(defined($l)) { + $self->error("Could not find video layer"); + return undef; + } + + # Found video section + if (exists($l->{'hd1080URL'})) { + $metadata->{'DLURL'} = $l->{'hd1080URL'}; + } elsif (exists($l->{'hd720URL'})) { + $metadata->{'DLURL'} = $l->{'hd720URL'}; + } elsif (exists($l->{'hqURL'})) { + $metadata->{'DLURL'} = $l->{'hqURL'}; + } elsif (exists($l->{'sdURL'})) { + $metadata->{'DLURL'} = $l->{'sdURL'}; + } elsif (exists($l->{'ldURL'})) { + $metadata->{'DLURL'} = $l->{'ldURL'}; + } else { + $self->error("Video section found, but no URLs"); + return undef; + } + } + } } } @@ -87,4 +125,37 @@ sub _parse { return $metadata; } +sub _fetch_layer { + my $self = shift; + my $sequence = shift; + my $point = shift; + my $next; + my @points = @_; + my $l; + + $self->debug("Looking for %s in %s", $point, Dumper($sequence)); + + unless(defined($point)) { + $self->debug("Reached last point"); + return $sequence; + } + ($point, $next) = split(/\//, $point, 2); + + foreach (@{$sequence}) { + if (exists($_->{'name'}) and ($_->{'name'} eq $point)) { + if (exists($_->{$next})) { + $self->debug("Using %s in %s", $next, $point); + return $self->_fetch_layer($_->{$next}, @points); + } else { + $self->debug("%s found, but no %s", $point, $next); + return undef; + } + + } + } + + $self->debug("Could not find entry named %s", $point); + return undef; +} + 1;