X-Git-Url: https://git.camperquake.de/gitweb.cgi?a=blobdiff_plain;f=videosite%2FVimeoGrabber.pm;h=19dfe94e56f6380eef342f5f5eef1d3066c63733;hb=8052f22003809d346863ad01dab1a1c9dad507b8;hp=3e8ce6a4a1277abbc2f71d0dea2c532046afcb9c;hpb=d5eef9cc4f276f621e37618a09e9b304b378a2df;p=videosite.git diff --git a/videosite/VimeoGrabber.pm b/videosite/VimeoGrabber.pm index 3e8ce6a..19dfe94 100644 --- a/videosite/VimeoGrabber.pm +++ b/videosite/VimeoGrabber.pm @@ -3,29 +3,28 @@ # # Grabber for vimeo.com -package VimeoGrabber; +package videosite::VimeoGrabber; -use GrabberBase; -@ISA = qw(GrabberBase); +use videosite::GrabberBase; +@ISA = qw(videosite::GrabberBase); -use LWP::Simple qw(!get); -use XML::Simple; -use Digest::MD5 qw(md5_hex); +use HTML::TokeParser; +use videosite::JSArrayParser; use Data::Dumper; use strict; sub new { my $class = shift; - my $self = $class->SUPER::new(); - - $self->{'NAME'} = 'vimeo'; - $self->{'PATTERNS'} = ['(http://(?:[-a-zA-Z0-9_.]+\.)*vimeo.com/(\d+))']; - - bless($self, $class); - $self->_prepare_parameters(); - - return $self; + my $self = $class->SUPER::new( + NAME => 'vimeo', + _SELFTESTURL => 'http://vimeo.com/35055590', + _SELFTESTTITLE => 'Hello', + PATTERNS => ['(http://(?:[-a-zA-Z0-9_.]+\.)*vimeo.com/(?:m/)?(\d+))'], + @_, + ); + + return bless($self, $class); } sub _parse { @@ -34,9 +33,10 @@ sub _parse { my $pattern = shift; my $content; my $metadata = {}; - my $p = XML::Simple->new(); - my $t; + my $p; + my $e; my $dlurl; + my $hd; my $dlpath; my $timestamp; my $hash; @@ -52,36 +52,52 @@ sub _parse { $metadata->{'DLURL'} = undef; # Get the XML file containing the video metadata - unless(defined($content = LWP::Simple::get(sprintf('http://www.vimeo.com/moogaloop/load/clip:%s/local?context=default&context_id=undefined', $2)))) { - $self->error('Could not download XML metadata'); - return undef; - } - - unless(defined($t = $p->XMLin($content, KeepRoot => 1))) { - $self->error('Could not parse XML metadata'); + unless(defined($content = $self->simple_get(sprintf('http://vimeo.com/%s', $2)))) { + $self->error('Could not download site'); return undef; } - $dlurl = $t->{'xml'}->{'video'}->{'hd_file'} || $t->{'xml'}->{'video'}->{'file'}; - $timestamp = $t->{'xml'}->{'timestamp'}; - - unless(defined($dlurl)) { - return undef; - } - - # Vimeo appends a hash to the download URL, in order to thwart people like me. - # Unfortunately the algorithm isn't that complicated :) - if ($dlurl =~ m|http://bitcast.vimeo.com(.+)|) { - $dlpath = $1; - $timestamp += 1800; - $hash = md5_hex(sprintf('redFiretruck%s?e=%d', $dlpath, $timestamp)); - } else { - return undef; + $p = HTML::TokeParser->new(\$content); + + while ($e = $p->get_tag('script')) { + if ($e->[0] eq 'script') { + my $t = $p->get_text(); + + if ($t =~ m|clip\d+_\d+ = (.*\});Player|s) { + my $jsp = videosite::JSArrayParser->new(); + my $r; + + $self->debug("Found raw config: %s", $1); + $r = $jsp->parse($1); + + unless(defined($r)) { + $self->error("Found information hash, but could not parse"); + return undef; + } + + $self->debug("Found parsed config: %s", Dumper($r)); + + unless(exists($r->{'config'}->{'request'})) { + $self->error("Required information not found in hash"); + return undef; + } + + $metadata->{'TITLE'} = $r->{'config'}->{'video'}->{'title'}; + $hd = grep { $_ eq 'hd' } @{$r->{'config'}->{'video'}->{'files'}->{'h264'}}; + $self->debug("HD: %d", $hd); + $r = $r->{'config'}->{'request'}; + + $metadata->{'DLURL'} = sprintf("http://%s/play_redirect?clip_id=%d&sig=%s&time=%d&quality=%s&codecs=H264,VP8,VP6", + $r->{'player_url'}, + $metadata->{'ID'}, + $r->{'signature'}, + $r->{'timestamp'}, + $hd?'hd':'sd', + ); + } + } } - $metadata->{'DLURL'} = sprintf('%s?e=%d&h=%s', $dlurl, $timestamp, $hash); - $metadata->{'TITLE'} = $t->{'xml'}->{'video'}->{'caption'}; - unless(defined($metadata->{'DLURL'}) && defined($metadata->{'TITLE'})) { $self->error('Could not extract download URL and title'); return undef;