Add RedTube grabber

author Ralf Ertzinger <sun@ryoko-darknet.camperquake.de>

Thu, 23 Oct 2008 12:56:21 +0000 (14:56 +0200)

committer Ralf Ertzinger <sun@ryoko-darknet.camperquake.de>

Thu, 23 Oct 2008 12:56:21 +0000 (14:56 +0200)
author Ralf Ertzinger <sun@ryoko-darknet.camperquake.de>
Thu, 23 Oct 2008 12:56:21 +0000 (14:56 +0200)
committer Ralf Ertzinger <sun@ryoko-darknet.camperquake.de>
Thu, 23 Oct 2008 12:56:21 +0000 (14:56 +0200)
diff --git a/videosite/RedTubeGrabber.pm b/videosite/RedTubeGrabber.pm

new file mode 100644 (file)

index 0000000..a67a755
--- /dev/null
+++ b/videosite/RedTubeGrabber.pm
@@ -0,0 +1,141 @@
+# (c) 2008 by Ralf Ertzinger <ralf@camperquake.de>
+# licensed under GNU GPL v2
+#
+# Grabber for redtube.com
+#
+# Algorithm for the file name hash reverse engineered by
+# Maximilian Rehkopf  <otakon at gmx dot net>
+
+package RedTubeGrabber;
+
+use GrabberBase;
+@ISA = qw(GrabberBase);
+
+use LWP::UserAgent;
+use HTTP::Cookies;
+use HTML::TokeParser;
+use Data::Dumper;
+
+use strict;
+
+sub new {
+    my $class = shift;
+    my $self = $class->SUPER::new();
+
+    $self->{'NAME'} = 'redtube';
+    $self->{'PATTERNS'} = ['(http://(?:[-a-zA-Z0-9_.]+\.)*redtube.com/(\d+))'];
+
+    bless($self, $class);
+    $self->_prepare_parameters();
+
+    return $self;
+}
+
+sub check_cookie {
+
+    my $jar = shift;
+    my $key = shift;
+    my $found = undef;
+
+    $jar->scan(sub { $found = 1 if ( $key eq $_[1]) });
+
+    return $found;
+}
+
+sub div($$) {
+    return ($_[0] - ($_[0] % $_[1])) / $_[1];
+}
+
+sub digitatindex($$) {
+    return div($_[0], 10**$_[1]) % 10;
+}
+
+sub mkfilename($) {
+    my $id = shift;
+    my $i = 7;
+    my $q = 0;
+    my $q2 = 0;
+    my @key = split(//, "R15342O7K9HBCDXFGAIJ8LMZ6PQ0STUVWEYN");
+    my $hash = "";
+
+    # Calculate a weighed digit sum of the id
+    $q += ($_*($i--)) for reverse(split(//, $id));
+
+    # Now calculate the digit sum of the digit sum
+    $q2 += $_ for split(//, $q);
+
+    # The rest are lookups into @key and the second digit sum,
+    # based on the second digit sum and the original id
+    $hash .= $key[digitatindex($id,3)+$q2+3];
+    $hash .= digitatindex($q2, 0);
+    $hash .= $key[digitatindex($id,6)+$q2+2];
+    $hash .= $key[digitatindex($id,4)+$q2+1];
+    $hash .= $key[digitatindex($id,1)+$q2+6];
+    $hash .= $key[digitatindex($id,5)+$q2+5];
+    $hash .= digitatindex($q2, 1);
+    $hash .= $key[digitatindex($id,2)+$q2+7];
+    $hash .= $key[digitatindex($id,0)+$q2+4];
+
+    return (sprintf("%07d", $id/1000), $hash);
+}
+
+sub _parse {
+    my $self = shift;
+    my $url = shift;
+    my $pattern = shift;
+    my $jar = HTTP::Cookies->new();
+    my $ua = LWP::UserAgent->new('agent' => 'Mozilla/5.0');
+    my $content;
+    my $metadata = {};
+    my $p;
+    my $r;
+    my $dir;
+    my $hash;
+
+    $url =~ m|$pattern|;
+    $url = $1;
+
+    $metadata->{'URL'} = $url;
+    $metadata->{'ID'} = $2;
+    $metadata->{'TYPE'} = 'video';
+    $metadata->{'SOURCE'} = $self->{'NAME'};
+    $metadata->{'TITLE'} = undef;
+    $metadata->{'DLURL'} = undef;
+
+    # Set the cookies necessary to get the video data
+    $jar->set_cookie(undef, 'pp', '1', '/', '.redtube.com');
+    $ua->cookie_jar($jar);
+
+    unless(defined($r = $ua->get(sprintf("http://www.redtube.com/%s", $2)))) {
+        $self->error('Could not download page');
+        return undef;
+    }
+
+    # Get the site to extract the title
+    $content = $r->decoded_content();
+
+    $p = HTML::TokeParser->new(\$content);
+
+    # Look for the title
+    if ($p->get_tag('title')) {
+        my $t = $p->get_text();
+        if ($t =~ /\xa0RedTube - /) {
+            $metadata->{'TITLE'} = $t;
+            $metadata->{'TITLE'} =~ s/\xa0RedTube - //;
+        }
+    }
+
+    # Redtube uses a selfmade hash system to create the filename
+    ($dir, $hash) = mkfilename($metadata->{'ID'});
+
+    $metadata->{'DLURL'} = sprintf('http://dl.redtube.com/_videos_t4vn23s9jc5498tgj49icfj4678/%s/%s.flv', $dir, $hash);
+
+    unless(defined($metadata->{'DLURL'}) && defined($metadata->{'TITLE'})) {
+        $self->error('Could not extract download URL and title');
+        return undef;
+    }
+
+    return $metadata;
+}
+
+1;
author	Ralf Ertzinger <sun@ryoko-darknet.camperquake.de>
	Thu, 23 Oct 2008 12:56:21 +0000 (14:56 +0200)
committer	Ralf Ertzinger <sun@ryoko-darknet.camperquake.de>
	Thu, 23 Oct 2008 12:56:21 +0000 (14:56 +0200)