VimeoGrabber: rework to new site layout
authorRalf Ertzinger <ralf@skytale.net>
Sat, 28 Jul 2012 19:32:19 +0000 (21:32 +0200)
committerRalf Ertzinger <ralf@skytale.net>
Sat, 28 Jul 2012 19:32:19 +0000 (21:32 +0200)
videosite/VimeoGrabber.pm

index 6b1dd60..68295ce 100644 (file)
@@ -8,8 +8,8 @@ package videosite::VimeoGrabber;
 use videosite::GrabberBase;
 @ISA = qw(videosite::GrabberBase);
 
-use XML::Simple;
-use Digest::MD5 qw(md5_hex);
+use HTML::TokeParser;
+use videosite::JSArrayParser;
 use Data::Dumper;
 
 use strict;
@@ -35,8 +35,8 @@ sub _parse {
     my $pattern = shift;
     my $content;
     my $metadata = {};
-    my $p = XML::Simple->new();
-    my $t;
+    my $p;
+    my $e;
     my $dlurl;
     my $hd;
     my $dlpath;
@@ -54,46 +54,48 @@ sub _parse {
     $metadata->{'DLURL'} = undef;
 
     # Get the XML file containing the video metadata
-    unless(defined($content = $self->simple_get(sprintf('http://www.vimeo.com/moogaloop/load/clip:%s', $2)))) {
-        $self->error('Could not download XML metadata');
+    unless(defined($content = $self->simple_get(sprintf('http://vimeo.com/%s', $2)))) {
+        $self->error('Could not download site');
         return undef;
     }
 
-    unless(defined($t = $p->XMLin($content, KeepRoot => 1))) {
-        $self->error('Could not parse XML metadata');
-        return undef;
-    }
+    $p = HTML::TokeParser->new(\$content);
 
-    if (exists($t->{'xml'}->{'video'}->{'isHD'}) and (0 != $t->{'xml'}->{'video'}->{'isHD'})) {
-        $self->debug('Selecting HD video');
-        $hd = '/?q=hd';
-    } else {
-        $self->debug('Selecting SD video');
-        $hd = '';
-    }
-    $timestamp = $t->{'xml'}->{'request_signature_expires'};
-    $hash = $t->{'xml'}->{'request_signature'};
-    $dlurl = sprintf('http://vimeo.com/moogaloop/play/clip:%s/%s/%d%s', $metadata->{'ID'}, $hash, $timestamp, $hd);
+    while ($e = $p->get_tag('script')) {
+        if ($e->[0] eq 'script') {
+            my $t = $p->get_text();
 
-    unless(defined($dlurl)) {
-        $self->error('No dlurl found in XML');
-        return undef;
-    }
+            if ($t =~ m|clip\d+_\d+ = (.*\});Player|) {
+                my $jsp = videosite::JSArrayParser->new();
+                my $r;
+
+                $self->debug("Found raw config: %s", $1);
+                $r = $jsp->parse($1);
 
-    # # Vimeo appends a hash to the download URL, in order to thwart people like me.
-    # # Unfortunately the algorithm isn't that complicated :)
-    # if ($dlurl =~ m|http://bitcast.vimeo.com(.+)|) {
-    #     $dlpath = $1;
-    #     $timestamp += 1800;
-    #     $hash = md5_hex(sprintf('redFiretruck%s?e=%d', $dlpath, $timestamp));
-    # } else {
-    #     $self->error('Unknown dlurl scheme: %s', $dlurl);
-    #     return undef;
-    # }
-
-    # $metadata->{'DLURL'} = sprintf('%s?e=%d&h=%s', $dlurl, $timestamp, $hash);
-    $metadata->{'DLURL'} = $dlurl;
-    $metadata->{'TITLE'} = $t->{'xml'}->{'video'}->{'caption'};
+                unless(defined($r)) {
+                    $self->error("Found information hash, but could not parse");
+                    return undef;
+                }
+
+                $self->debug("Found parsed config: %s", Dumper($r));
+
+                unless(exists($r->{'config'}->{'request'})) {
+                    $self->error("Required information not found in hash");
+                    return undef;
+                }
+
+                $metadata->{'TITLE'} = $r->{'config'}->{'video'}->{'title'};
+                $r = $r->{'config'}->{'request'};
+
+                $metadata->{'DLURL'} = sprintf("http://%s/play_redirect?clip_id=%d&sig=%s&time=%d&quality=hd&codecs=H264,VP8,VP6",
+                        $r->{'player_url'},
+                        $metadata->{'ID'},
+                        $r->{'signature'},
+                        $r->{'timestamp'},
+                        );
+            }
+        }
+    }
 
     unless(defined($metadata->{'DLURL'}) && defined($metadata->{'TITLE'})) {
         $self->error('Could not extract download URL and title');