X-Git-Url: https://git.camperquake.de/gitweb.cgi?a=blobdiff_plain;f=videosite%2FVimeoGrabber.pm;h=19dfe94e56f6380eef342f5f5eef1d3066c63733;hb=8d918b07ad100a4d7b96cf5df640cef0a8a0411d;hp=ba76cff9aff0032c82d0c6372429ff02a003249c;hpb=70217a8a3216bb95f5a45740cb92df22086d32b8;p=videosite.git diff --git a/videosite/VimeoGrabber.pm b/videosite/VimeoGrabber.pm index ba76cff..19dfe94 100644 --- a/videosite/VimeoGrabber.pm +++ b/videosite/VimeoGrabber.pm @@ -8,24 +8,23 @@ package videosite::VimeoGrabber; use videosite::GrabberBase; @ISA = qw(videosite::GrabberBase); -use LWP::UserAgent; -use XML::Simple; -use Digest::MD5 qw(md5_hex); +use HTML::TokeParser; +use videosite::JSArrayParser; use Data::Dumper; use strict; sub new { my $class = shift; - my $self = $class->SUPER::new(); - - $self->{'NAME'} = 'vimeo'; - $self->{'PATTERNS'} = ['(http://(?:[-a-zA-Z0-9_.]+\.)*vimeo.com/(\d+))']; - - bless($self, $class); - $self->_prepare_parameters(); - - return $self; + my $self = $class->SUPER::new( + NAME => 'vimeo', + _SELFTESTURL => 'http://vimeo.com/35055590', + _SELFTESTTITLE => 'Hello', + PATTERNS => ['(http://(?:[-a-zA-Z0-9_.]+\.)*vimeo.com/(?:m/)?(\d+))'], + @_, + ); + + return bless($self, $class); } sub _parse { @@ -34,14 +33,13 @@ sub _parse { my $pattern = shift; my $content; my $metadata = {}; - my $p = XML::Simple->new(); - my $t; + my $p; + my $e; my $dlurl; my $hd; my $dlpath; my $timestamp; my $hash; - my $ua = LWP::UserAgent->new(agent => 'Mozilla'); $url =~ m|$pattern|; $url = $1; @@ -54,50 +52,52 @@ sub _parse { $metadata->{'DLURL'} = undef; # Get the XML file containing the video metadata - $content = $ua->get(sprintf('http://www.vimeo.com/moogaloop/load/clip:%s', $2)); - unless ($content->is_success()) { - $self->error('Could not download XML metadata'); + unless(defined($content = $self->simple_get(sprintf('http://vimeo.com/%s', $2)))) { + $self->error('Could not download site'); return undef; } - $content = $content->decoded_content(); - - unless(defined($t = $p->XMLin($content, KeepRoot => 1))) { - $self->error('Could not parse XML metadata'); - return undef; + $p = HTML::TokeParser->new(\$content); + + while ($e = $p->get_tag('script')) { + if ($e->[0] eq 'script') { + my $t = $p->get_text(); + + if ($t =~ m|clip\d+_\d+ = (.*\});Player|s) { + my $jsp = videosite::JSArrayParser->new(); + my $r; + + $self->debug("Found raw config: %s", $1); + $r = $jsp->parse($1); + + unless(defined($r)) { + $self->error("Found information hash, but could not parse"); + return undef; + } + + $self->debug("Found parsed config: %s", Dumper($r)); + + unless(exists($r->{'config'}->{'request'})) { + $self->error("Required information not found in hash"); + return undef; + } + + $metadata->{'TITLE'} = $r->{'config'}->{'video'}->{'title'}; + $hd = grep { $_ eq 'hd' } @{$r->{'config'}->{'video'}->{'files'}->{'h264'}}; + $self->debug("HD: %d", $hd); + $r = $r->{'config'}->{'request'}; + + $metadata->{'DLURL'} = sprintf("http://%s/play_redirect?clip_id=%d&sig=%s&time=%d&quality=%s&codecs=H264,VP8,VP6", + $r->{'player_url'}, + $metadata->{'ID'}, + $r->{'signature'}, + $r->{'timestamp'}, + $hd?'hd':'sd', + ); + } + } } - if (exists($t->{'xml'}->{'video'}->{'isHD'}) and (0 != $t->{'xml'}->{'video'}->{'isHD'})) { - $self->debug('Selecting HD video'); - $hd = '/?q=hd'; - } else { - $self->debug('Selecting SD video'); - $hd = ''; - } - $timestamp = $t->{'xml'}->{'request_signature_expires'}; - $hash = $t->{'xml'}->{'request_signature'}; - $dlurl = sprintf('http://vimeo.com/moogaloop/play/clip:%s/%s/%d%s', $metadata->{'ID'}, $hash, $timestamp, $hd); - - unless(defined($dlurl)) { - $self->error('No dlurl found in XML'); - return undef; - } - - # # Vimeo appends a hash to the download URL, in order to thwart people like me. - # # Unfortunately the algorithm isn't that complicated :) - # if ($dlurl =~ m|http://bitcast.vimeo.com(.+)|) { - # $dlpath = $1; - # $timestamp += 1800; - # $hash = md5_hex(sprintf('redFiretruck%s?e=%d', $dlpath, $timestamp)); - # } else { - # $self->error('Unknown dlurl scheme: %s', $dlurl); - # return undef; - # } - - # $metadata->{'DLURL'} = sprintf('%s?e=%d&h=%s', $dlurl, $timestamp, $hash); - $metadata->{'DLURL'} = $dlurl; - $metadata->{'TITLE'} = $t->{'xml'}->{'video'}->{'caption'}; - unless(defined($metadata->{'DLURL'}) && defined($metadata->{'TITLE'})) { $self->error('Could not extract download URL and title'); return undef;