From: Ralf Ertzinger Date: Tue, 30 Apr 2013 13:28:38 +0000 (+0200) Subject: Fix Dailymotion grabber X-Git-Url: https://git.camperquake.de/gitweb.cgi?p=videosite.git;a=commitdiff_plain;h=9bb78db68a79470e45dd6400321e3dd96ffcb20b Fix Dailymotion grabber --- diff --git a/videosite/DailyMotionGrabber.pm b/videosite/DailyMotionGrabber.pm index 6544882..fc1f08c 100644 --- a/videosite/DailyMotionGrabber.pm +++ b/videosite/DailyMotionGrabber.pm @@ -8,7 +8,7 @@ package videosite::DailyMotionGrabber; use videosite::GrabberBase; @ISA = qw(videosite::GrabberBase); -use HTML::Parser; +use HTML::TokeParser; use videosite::JSArrayParser; use Data::Dumper; @@ -19,6 +19,8 @@ sub new { my $self = $class->SUPER::new(); $self->{'NAME'} = 'dailymotion'; + $self->{_SELFTESTURL} = 'http://www.dailymotion.com/video/xylv6u_moon-duo-sleepwalker_music'; + $self->{_SELFTESTTITLE} = 'Moon Duo - Sleepwalker'; $self->{'PATTERNS'} = ['(http://(?:[-a-zA-Z0-9_.]+\.)*dailymotion.com/(?:[^/]+/)*video/([-a-zA-Z0-9_]+))']; bless($self, $class); @@ -34,7 +36,7 @@ sub _parse { my $pattern = shift; my $content; my $metadata = {}; - my $p = HTML::Parser->new(api_version => 3); + my $p; my @accum; my @text; my $e; @@ -54,60 +56,65 @@ sub _parse { return undef; } - $p->handler(start => \@accum, "tagname, attr"); - $p->handler(text => \@text, "text"); - $p->report_tags(qw(meta script)); - $p->utf8_mode(1); - $p->parse($content); + $p = HTML::TokeParser->new(\$content); # Look for the title in the meta tags - foreach $e (@accum) { + while ($e = $p->get_tag('meta', 'script')) { if ('meta' eq $e->[0]) { - if ('title' eq $e->[1]->{'name'}) { + if (exists($e->[1]->{'property'}) && ('og:title' eq $e->[1]->{'property'})) { $metadata->{'TITLE'} = $e->[1]->{'content'}; - $metadata->{'TITLE'} =~ s/^Dailymotion\s+-\s+//; - $metadata->{'TITLE'} =~ s/(?:\s+-\s+.*)?$//; } - } - } + } elsif ('script' eq $e->[0]) { + my $c = $p->get_text(); - # Look for the download URL - foreach $e (@text) { - if ($e->[0] =~ m|\.addVariable\("sequence",\s*"([^\"]+)"|) { - my $sequence = $1; - my $jsp = videosite::JSArrayParser->new(); - my $l; - my $s; + $self->debug("Found script: %s", $c); - $sequence =~ s/%(..)/chr(hex($1))/ge; - $self->debug("Found sequence: %s", $sequence); + if ($c =~ m|flashvars = (.+);$|m) { + my $flashvars = $1; + my $jsp = videosite::JSArrayParser->new(); + my $l; + my $s; + my $sequence; - $self->debug("Using %s to parse", ref($jsp)); - $sequence = $jsp->parse($sequence); - $self->debug(Dumper($sequence)); + $self->debug("Found flashvars: %s", $flashvars); - unless(defined($sequence)) { - $self->error("Found sequence, but could not parse"); - return undef; - } else { - $self->debug("Parsed sequence: %s", Dumper($sequence)); + $self->debug("Using %s to parse", ref($jsp)); + $flashvars = $jsp->parse($flashvars); + $self->debug("Parsed flashvars: %s", Dumper($flashvars)); - $l = $self->_fetch_layer($sequence, "root/layerList", "background/sequenceList", "main/layerList", "video/param"); - unless(defined($l)) { - $self->error("Could not find video layer"); - return undef; - } + $sequence = $flashvars->{'sequence'}; + $sequence =~ s/%(..)/chr(hex($1))/ge; - # Found video section - if (exists($l->{'videoPluginParameters'}->{'hdURL'})) { - $metadata->{'DLURL'} = $l->{'videoPluginParameters'}->{'hdURL'}; - } elsif (exists($l->{'videoPluginParameters'}->{'hqURL'})) { - $metadata->{'DLURL'} = $l->{'videoPluginParameters'}->{'hqURL'}; - } elsif (exists($l->{'videoPluginParameters'}->{'hqURL'})) { - $metadata->{'DLURL'} = $l->{'videoPluginParameters'}->{'sdURL'}; - } else { - $self->error("Video section found, but no URLs"); + $self->debug("Decoded sequence: %s", $sequence); + $sequence = $jsp->parse($sequence); + + unless(defined($sequence)) { + $self->error("Found sequence, but could not parse"); return undef; + } else { + $self->debug("Parsed sequence: %s", Dumper($sequence)); + + $l = $self->_fetch_layer($sequence->{'sequence'}, "root/layerList", "background/sequenceList", "main/layerList", "video/param"); + unless(defined($l)) { + $self->error("Could not find video layer"); + return undef; + } + + # Found video section + if (exists($l->{'hd1080URL'})) { + $metadata->{'DLURL'} = $l->{'hd1080URL'}; + } elsif (exists($l->{'hd720URL'})) { + $metadata->{'DLURL'} = $l->{'hd720URL'}; + } elsif (exists($l->{'hqURL'})) { + $metadata->{'DLURL'} = $l->{'hqURL'}; + } elsif (exists($l->{'sdURL'})) { + $metadata->{'DLURL'} = $l->{'sdURL'}; + } elsif (exists($l->{'ldURL'})) { + $metadata->{'DLURL'} = $l->{'ldURL'}; + } else { + $self->error("Video section found, but no URLs"); + return undef; + } } } }