From: Ralf Ertzinger Date: Tue, 30 Apr 2013 13:51:23 +0000 (+0200) Subject: Manual merge for DailyMotionGrabber.pm X-Git-Url: https://git.camperquake.de/gitweb.cgi?p=videosite.git;a=commitdiff_plain;h=afe9b4975a8a82cec484ac15c73e7fd0c3c1dbbf;hp=-c Manual merge for DailyMotionGrabber.pm --- afe9b4975a8a82cec484ac15c73e7fd0c3c1dbbf diff --combined videosite/DailyMotionGrabber.pm index 3e1e77f,fc1f08c..74e5030 --- a/videosite/DailyMotionGrabber.pm +++ b/videosite/DailyMotionGrabber.pm @@@ -8,7 -8,7 +8,7 @@@ package videosite::DailyMotionGrabber use videosite::GrabberBase; @ISA = qw(videosite::GrabberBase); - use HTML::Parser; + use HTML::TokeParser; use videosite::JSArrayParser; use Data::Dumper; @@@ -16,13 -16,18 +16,15 @@@ use strict sub new { my $class = shift; - my $self = $class->SUPER::new(); - - $self->{'NAME'} = 'dailymotion'; - $self->{_SELFTESTURL} = 'http://www.dailymotion.com/video/xylv6u_moon-duo-sleepwalker_music'; - $self->{_SELFTESTTITLE} = 'Moon Duo - Sleepwalker'; - $self->{'PATTERNS'} = ['(http://(?:[-a-zA-Z0-9_.]+\.)*dailymotion.com/(?:[^/]+/)*video/([-a-zA-Z0-9_]+))']; - - bless($self, $class); - - $self->_prepare_parameters(); - - return $self; + my $self = $class->SUPER::new( + NAME => 'dailymotion', ++ _SELFTESTURL => 'http://www.dailymotion.com/video/xylv6u_moon-duo-sleepwalker_music', ++ _SELFTESTTITLE => 'Moon Duo - Sleepwalker', + PATTERNS => ['(http://(?:[-a-zA-Z0-9_.]+\.)*dailymotion.com/(?:[^/]+/)*video/([-a-zA-Z0-9_]+))'], + @_, + ); + + return bless($self, $class); } sub _parse { @@@ -31,7 -36,7 +33,7 @@@ my $pattern = shift; my $content; my $metadata = {}; - my $p = HTML::Parser->new(api_version => 3); + my $p; my @accum; my @text; my $e; @@@ -51,60 -56,65 +53,65 @@@ return undef; } - $p->handler(start => \@accum, "tagname, attr"); - $p->handler(text => \@text, "text"); - $p->report_tags(qw(meta script)); - $p->utf8_mode(1); - $p->parse($content); + $p = HTML::TokeParser->new(\$content); # Look for the title in the meta tags - foreach $e (@accum) { + while ($e = $p->get_tag('meta', 'script')) { if ('meta' eq $e->[0]) { - if ('title' eq $e->[1]->{'name'}) { + if (exists($e->[1]->{'property'}) && ('og:title' eq $e->[1]->{'property'})) { $metadata->{'TITLE'} = $e->[1]->{'content'}; - $metadata->{'TITLE'} =~ s/^Dailymotion\s+-\s+//; - $metadata->{'TITLE'} =~ s/(?:\s+-\s+.*)?$//; } - } - } + } elsif ('script' eq $e->[0]) { + my $c = $p->get_text(); - # Look for the download URL - foreach $e (@text) { - if ($e->[0] =~ m|\.addVariable\("sequence",\s*"([^\"]+)"|) { - my $sequence = $1; - my $jsp = videosite::JSArrayParser->new(); - my $l; - my $s; - - $sequence =~ s/%(..)/chr(hex($1))/ge; - $self->debug("Found sequence: %s", $sequence); - - $self->debug("Using %s to parse", ref($jsp)); - $sequence = $jsp->parse($sequence); - $self->debug(Dumper($sequence)); - - unless(defined($sequence)) { - $self->error("Found sequence, but could not parse"); - return undef; - } else { - $self->debug("Parsed sequence: %s", Dumper($sequence)); - - $l = $self->_fetch_layer($sequence, "root/layerList", "background/sequenceList", "main/layerList", "video/param"); - unless(defined($l)) { - $self->error("Could not find video layer"); - return undef; - } + $self->debug("Found script: %s", $c); - # Found video section - if (exists($l->{'videoPluginParameters'}->{'hdURL'})) { - $metadata->{'DLURL'} = $l->{'videoPluginParameters'}->{'hdURL'}; - } elsif (exists($l->{'videoPluginParameters'}->{'hqURL'})) { - $metadata->{'DLURL'} = $l->{'videoPluginParameters'}->{'hqURL'}; - } elsif (exists($l->{'videoPluginParameters'}->{'hqURL'})) { - $metadata->{'DLURL'} = $l->{'videoPluginParameters'}->{'sdURL'}; - } else { - $self->error("Video section found, but no URLs"); + if ($c =~ m|flashvars = (.+);$|m) { + my $flashvars = $1; + my $jsp = videosite::JSArrayParser->new(); + my $l; + my $s; + my $sequence; + + $self->debug("Found flashvars: %s", $flashvars); + + $self->debug("Using %s to parse", ref($jsp)); + $flashvars = $jsp->parse($flashvars); + $self->debug("Parsed flashvars: %s", Dumper($flashvars)); + + $sequence = $flashvars->{'sequence'}; + $sequence =~ s/%(..)/chr(hex($1))/ge; + + $self->debug("Decoded sequence: %s", $sequence); + $sequence = $jsp->parse($sequence); + + unless(defined($sequence)) { + $self->error("Found sequence, but could not parse"); return undef; + } else { + $self->debug("Parsed sequence: %s", Dumper($sequence)); + + $l = $self->_fetch_layer($sequence->{'sequence'}, "root/layerList", "background/sequenceList", "main/layerList", "video/param"); + unless(defined($l)) { + $self->error("Could not find video layer"); + return undef; + } + + # Found video section + if (exists($l->{'hd1080URL'})) { + $metadata->{'DLURL'} = $l->{'hd1080URL'}; + } elsif (exists($l->{'hd720URL'})) { + $metadata->{'DLURL'} = $l->{'hd720URL'}; + } elsif (exists($l->{'hqURL'})) { + $metadata->{'DLURL'} = $l->{'hqURL'}; + } elsif (exists($l->{'sdURL'})) { + $metadata->{'DLURL'} = $l->{'sdURL'}; + } elsif (exists($l->{'ldURL'})) { + $metadata->{'DLURL'} = $l->{'ldURL'}; + } else { + $self->error("Video section found, but no URLs"); + return undef; + } } } }