From: Ralf Ertzinger Date: Sat, 28 Jul 2012 19:32:19 +0000 (+0200) Subject: VimeoGrabber: rework to new site layout X-Git-Url: https://git.camperquake.de/gitweb.cgi?p=videosite.git;a=commitdiff_plain;h=424740b4bc310d7266236c12505787159d26613d VimeoGrabber: rework to new site layout --- diff --git a/videosite/VimeoGrabber.pm b/videosite/VimeoGrabber.pm index 6b1dd60..68295ce 100644 --- a/videosite/VimeoGrabber.pm +++ b/videosite/VimeoGrabber.pm @@ -8,8 +8,8 @@ package videosite::VimeoGrabber; use videosite::GrabberBase; @ISA = qw(videosite::GrabberBase); -use XML::Simple; -use Digest::MD5 qw(md5_hex); +use HTML::TokeParser; +use videosite::JSArrayParser; use Data::Dumper; use strict; @@ -35,8 +35,8 @@ sub _parse { my $pattern = shift; my $content; my $metadata = {}; - my $p = XML::Simple->new(); - my $t; + my $p; + my $e; my $dlurl; my $hd; my $dlpath; @@ -54,46 +54,48 @@ sub _parse { $metadata->{'DLURL'} = undef; # Get the XML file containing the video metadata - unless(defined($content = $self->simple_get(sprintf('http://www.vimeo.com/moogaloop/load/clip:%s', $2)))) { - $self->error('Could not download XML metadata'); + unless(defined($content = $self->simple_get(sprintf('http://vimeo.com/%s', $2)))) { + $self->error('Could not download site'); return undef; } - unless(defined($t = $p->XMLin($content, KeepRoot => 1))) { - $self->error('Could not parse XML metadata'); - return undef; - } + $p = HTML::TokeParser->new(\$content); - if (exists($t->{'xml'}->{'video'}->{'isHD'}) and (0 != $t->{'xml'}->{'video'}->{'isHD'})) { - $self->debug('Selecting HD video'); - $hd = '/?q=hd'; - } else { - $self->debug('Selecting SD video'); - $hd = ''; - } - $timestamp = $t->{'xml'}->{'request_signature_expires'}; - $hash = $t->{'xml'}->{'request_signature'}; - $dlurl = sprintf('http://vimeo.com/moogaloop/play/clip:%s/%s/%d%s', $metadata->{'ID'}, $hash, $timestamp, $hd); + while ($e = $p->get_tag('script')) { + if ($e->[0] eq 'script') { + my $t = $p->get_text(); - unless(defined($dlurl)) { - $self->error('No dlurl found in XML'); - return undef; - } + if ($t =~ m|clip\d+_\d+ = (.*\});Player|) { + my $jsp = videosite::JSArrayParser->new(); + my $r; + + $self->debug("Found raw config: %s", $1); + $r = $jsp->parse($1); - # # Vimeo appends a hash to the download URL, in order to thwart people like me. - # # Unfortunately the algorithm isn't that complicated :) - # if ($dlurl =~ m|http://bitcast.vimeo.com(.+)|) { - # $dlpath = $1; - # $timestamp += 1800; - # $hash = md5_hex(sprintf('redFiretruck%s?e=%d', $dlpath, $timestamp)); - # } else { - # $self->error('Unknown dlurl scheme: %s', $dlurl); - # return undef; - # } - - # $metadata->{'DLURL'} = sprintf('%s?e=%d&h=%s', $dlurl, $timestamp, $hash); - $metadata->{'DLURL'} = $dlurl; - $metadata->{'TITLE'} = $t->{'xml'}->{'video'}->{'caption'}; + unless(defined($r)) { + $self->error("Found information hash, but could not parse"); + return undef; + } + + $self->debug("Found parsed config: %s", Dumper($r)); + + unless(exists($r->{'config'}->{'request'})) { + $self->error("Required information not found in hash"); + return undef; + } + + $metadata->{'TITLE'} = $r->{'config'}->{'video'}->{'title'}; + $r = $r->{'config'}->{'request'}; + + $metadata->{'DLURL'} = sprintf("http://%s/play_redirect?clip_id=%d&sig=%s&time=%d&quality=hd&codecs=H264,VP8,VP6", + $r->{'player_url'}, + $metadata->{'ID'}, + $r->{'signature'}, + $r->{'timestamp'}, + ); + } + } + } unless(defined($metadata->{'DLURL'}) && defined($metadata->{'TITLE'})) { $self->error('Could not extract download URL and title');