videosite/RedTubeGrabber.pm

   1 # (c) 2008 by Ralf Ertzinger <ralf@camperquake.de>
   2 # licensed under GNU GPL v2
   3 #
   4 # Grabber for redtube.com
   5 #
   6 # Algorithm for the file name hash reverse engineered by
   7 # Maximilian Rehkopf  <otakon at gmx dot net>
   8
   9 package videosite::RedTubeGrabber;
  10
  11 use videosite::GrabberBase;
  12 @ISA = qw(videosite::GrabberBase);
  13
  14 use HTML::TokeParser;
  15 use Data::Dumper;
  16
  17 use strict;
  18
  19 sub new {
  20     my $class = shift;
  21     my $self = $class->SUPER::new();
  22
  23     $self->{'NAME'} = 'redtube';
  24     $self->{'PATTERNS'} = ['(http://(?:[-a-zA-Z0-9_.]+\.)*redtube.com/(\d+))'];
  25
  26     bless($self, $class);
  27     $self->_prepare_parameters();
  28
  29     return $self;
  30 }
  31
  32 sub div($$) {
  33     return ($_[0] - ($_[0] % $_[1])) / $_[1];
  34 }
  35
  36 sub digitatindex($$) {
  37     return div($_[0], 10**$_[1]) % 10;
  38 }
  39
  40 sub mkfilename($) {
  41     my $id = shift;
  42     my $i = 7;
  43     my $q = 0;
  44     my $q2 = 0;
  45     my @key = split(//, "R15342O7K9HBCDXFGAIJ8LMZ6PQ0STUVWEYN");
  46     my $hash = "";
  47
  48     # Calculate a weighed digit sum of the id
  49     $q += ($_*($i--)) for reverse(split(//, $id));
  50
  51     # Now calculate the digit sum of the digit sum
  52     $q2 += $_ for split(//, $q);
  53
  54     # The rest are lookups into @key and the second digit sum,
  55     # based on the second digit sum and the original id
  56     $hash .= $key[digitatindex($id,3)+$q2+3];
  57     $hash .= digitatindex($q2, 0);
  58     $hash .= $key[digitatindex($id,6)+$q2+2];
  59     $hash .= $key[digitatindex($id,4)+$q2+1];
  60     $hash .= $key[digitatindex($id,1)+$q2+6];
  61     $hash .= $key[digitatindex($id,5)+$q2+5];
  62     $hash .= digitatindex($q2, 1);
  63     $hash .= $key[digitatindex($id,2)+$q2+7];
  64     $hash .= $key[digitatindex($id,0)+$q2+4];
  65
  66     return (sprintf("%07d", $id/1000), $hash);
  67 }
  68
  69 sub _parse {
  70     my $self = shift;
  71     my $url = shift;
  72     my $pattern = shift;
  73     my $ua = $self->ua();
  74     my $content;
  75     my $metadata = {};
  76     my $p;
  77     my $r;
  78     my $dir;
  79     my $hash;
  80
  81     $url =~ m|$pattern|;
  82     $url = $1;
  83
  84     $metadata->{'URL'} = $url;
  85     $metadata->{'ID'} = $2;
  86     $metadata->{'TYPE'} = 'video';
  87     $metadata->{'SOURCE'} = $self->{'NAME'};
  88     $metadata->{'TITLE'} = undef;
  89     $metadata->{'DLURL'} = undef;
  90
  91     # Set the cookies necessary to get the video data
  92     $ua->cookie_jar->set_cookie(undef, 'pp', '1', '/', '.redtube.com');
  93
  94     unless(defined($content = $self->simple_get(sprintf("http://www.redtube.com/%s", $2), $ua))) {
  95         $self->error('Could not download page');
  96         return undef;
  97     }
  98
  99     $p = HTML::TokeParser->new(\$content);
 100
 101     # Look for the title
 102     if ($p->get_tag('title')) {
 103         my $t = $p->get_text();
 104         if ($t =~ /\xa0RedTube - /) {
 105             $metadata->{'TITLE'} = $t;
 106             $metadata->{'TITLE'} =~ s/\xa0RedTube - //;
 107         }
 108     }
 109
 110     # Redtube uses a selfmade hash system to create the filename
 111     ($dir, $hash) = mkfilename($metadata->{'ID'});
 112
 113     $metadata->{'DLURL'} = sprintf('http://dl.redtube.com/_videos_t4vn23s9jc5498tgj49icfj4678/%s/%s.flv', $dir, $hash);
 114
 115     unless(defined($metadata->{'DLURL'}) && defined($metadata->{'TITLE'})) {
 116         $self->error('Could not extract download URL and title');
 117         return undef;
 118     }
 119
 120     return $metadata;
 121 }
 122
 123 1;