X-Git-Url: https://git.camperquake.de/gitweb.cgi?p=videosite.git;a=blobdiff_plain;f=videosite%2FDailyMotionGrabber.pm;h=74e5030f8ef2e76272e0eae5408fd7fe7b1594f5;hp=3e1e77fcc9bfd85b5089abb42ae23b89155ed7b0;hb=afe9b4975a8a82cec484ac15c73e7fd0c3c1dbbf;hpb=fc449da24ace2ba9ec5304afaae579d9fe55e967 diff --git a/videosite/DailyMotionGrabber.pm b/videosite/DailyMotionGrabber.pm index 3e1e77f..74e5030 100644 --- a/videosite/DailyMotionGrabber.pm +++ b/videosite/DailyMotionGrabber.pm @@ -8,7 +8,7 @@ package videosite::DailyMotionGrabber; use videosite::GrabberBase; @ISA = qw(videosite::GrabberBase); -use HTML::Parser; +use HTML::TokeParser; use videosite::JSArrayParser; use Data::Dumper; @@ -18,6 +18,8 @@ sub new { my $class = shift; my $self = $class->SUPER::new( NAME => 'dailymotion', + _SELFTESTURL => 'http://www.dailymotion.com/video/xylv6u_moon-duo-sleepwalker_music', + _SELFTESTTITLE => 'Moon Duo - Sleepwalker', PATTERNS => ['(http://(?:[-a-zA-Z0-9_.]+\.)*dailymotion.com/(?:[^/]+/)*video/([-a-zA-Z0-9_]+))'], @_, ); @@ -31,7 +33,7 @@ sub _parse { my $pattern = shift; my $content; my $metadata = {}; - my $p = HTML::Parser->new(api_version => 3); + my $p; my @accum; my @text; my $e; @@ -51,60 +53,65 @@ sub _parse { return undef; } - $p->handler(start => \@accum, "tagname, attr"); - $p->handler(text => \@text, "text"); - $p->report_tags(qw(meta script)); - $p->utf8_mode(1); - $p->parse($content); + $p = HTML::TokeParser->new(\$content); # Look for the title in the meta tags - foreach $e (@accum) { + while ($e = $p->get_tag('meta', 'script')) { if ('meta' eq $e->[0]) { - if ('title' eq $e->[1]->{'name'}) { + if (exists($e->[1]->{'property'}) && ('og:title' eq $e->[1]->{'property'})) { $metadata->{'TITLE'} = $e->[1]->{'content'}; - $metadata->{'TITLE'} =~ s/^Dailymotion\s+-\s+//; - $metadata->{'TITLE'} =~ s/(?:\s+-\s+.*)?$//; } - } - } + } elsif ('script' eq $e->[0]) { + my $c = $p->get_text(); - # Look for the download URL - foreach $e (@text) { - if ($e->[0] =~ m|\.addVariable\("sequence",\s*"([^\"]+)"|) { - my $sequence = $1; - my $jsp = videosite::JSArrayParser->new(); - my $l; - my $s; + $self->debug("Found script: %s", $c); - $sequence =~ s/%(..)/chr(hex($1))/ge; - $self->debug("Found sequence: %s", $sequence); + if ($c =~ m|flashvars = (.+);$|m) { + my $flashvars = $1; + my $jsp = videosite::JSArrayParser->new(); + my $l; + my $s; + my $sequence; - $self->debug("Using %s to parse", ref($jsp)); - $sequence = $jsp->parse($sequence); - $self->debug(Dumper($sequence)); + $self->debug("Found flashvars: %s", $flashvars); - unless(defined($sequence)) { - $self->error("Found sequence, but could not parse"); - return undef; - } else { - $self->debug("Parsed sequence: %s", Dumper($sequence)); + $self->debug("Using %s to parse", ref($jsp)); + $flashvars = $jsp->parse($flashvars); + $self->debug("Parsed flashvars: %s", Dumper($flashvars)); - $l = $self->_fetch_layer($sequence, "root/layerList", "background/sequenceList", "main/layerList", "video/param"); - unless(defined($l)) { - $self->error("Could not find video layer"); - return undef; - } + $sequence = $flashvars->{'sequence'}; + $sequence =~ s/%(..)/chr(hex($1))/ge; - # Found video section - if (exists($l->{'videoPluginParameters'}->{'hdURL'})) { - $metadata->{'DLURL'} = $l->{'videoPluginParameters'}->{'hdURL'}; - } elsif (exists($l->{'videoPluginParameters'}->{'hqURL'})) { - $metadata->{'DLURL'} = $l->{'videoPluginParameters'}->{'hqURL'}; - } elsif (exists($l->{'videoPluginParameters'}->{'hqURL'})) { - $metadata->{'DLURL'} = $l->{'videoPluginParameters'}->{'sdURL'}; - } else { - $self->error("Video section found, but no URLs"); + $self->debug("Decoded sequence: %s", $sequence); + $sequence = $jsp->parse($sequence); + + unless(defined($sequence)) { + $self->error("Found sequence, but could not parse"); return undef; + } else { + $self->debug("Parsed sequence: %s", Dumper($sequence)); + + $l = $self->_fetch_layer($sequence->{'sequence'}, "root/layerList", "background/sequenceList", "main/layerList", "video/param"); + unless(defined($l)) { + $self->error("Could not find video layer"); + return undef; + } + + # Found video section + if (exists($l->{'hd1080URL'})) { + $metadata->{'DLURL'} = $l->{'hd1080URL'}; + } elsif (exists($l->{'hd720URL'})) { + $metadata->{'DLURL'} = $l->{'hd720URL'}; + } elsif (exists($l->{'hqURL'})) { + $metadata->{'DLURL'} = $l->{'hqURL'}; + } elsif (exists($l->{'sdURL'})) { + $metadata->{'DLURL'} = $l->{'sdURL'}; + } elsif (exists($l->{'ldURL'})) { + $metadata->{'DLURL'} = $l->{'ldURL'}; + } else { + $self->error("Video section found, but no URLs"); + return undef; + } } } }