videosite/RedTubeGrabber.pm

   1 # (c) 2008 by Ralf Ertzinger <ralf@camperquake.de>
   2 # licensed under GNU GPL v2
   3 #
   4 # Grabber for redtube.com
   5 #
   6 # Algorithm for the file name hash reverse engineered by
   7 # Maximilian Rehkopf  <otakon at gmx dot net>
   8
   9 package RedTubeGrabber;
  10
  11 use GrabberBase;
  12 @ISA = qw(GrabberBase);
  13
  14 use LWP::UserAgent;
  15 use HTTP::Cookies;
  16 use HTML::TokeParser;
  17 use Data::Dumper;
  18
  19 use strict;
  20
  21 sub new {
  22     my $class = shift;
  23     my $self = $class->SUPER::new();
  24
  25     $self->{'NAME'} = 'redtube';
  26     $self->{'PATTERNS'} = ['(http://(?:[-a-zA-Z0-9_.]+\.)*redtube.com/(\d+))'];
  27
  28     bless($self, $class);
  29     $self->_prepare_parameters();
  30
  31     return $self;
  32 }
  33
  34 sub check_cookie {
  35
  36     my $jar = shift;
  37     my $key = shift;
  38     my $found = undef;
  39
  40     $jar->scan(sub { $found = 1 if ( $key eq $_[1]) });
  41
  42     return $found;
  43 }
  44
  45 sub div($$) {
  46     return ($_[0] - ($_[0] % $_[1])) / $_[1];
  47 }
  48
  49 sub digitatindex($$) {
  50     return div($_[0], 10**$_[1]) % 10;
  51 }
  52
  53 sub mkfilename($) {
  54     my $id = shift;
  55     my $i = 7;
  56     my $q = 0;
  57     my $q2 = 0;
  58     my @key = split(//, "R15342O7K9HBCDXFGAIJ8LMZ6PQ0STUVWEYN");
  59     my $hash = "";
  60
  61     # Calculate a weighed digit sum of the id
  62     $q += ($_*($i--)) for reverse(split(//, $id));
  63
  64     # Now calculate the digit sum of the digit sum
  65     $q2 += $_ for split(//, $q);
  66
  67     # The rest are lookups into @key and the second digit sum,
  68     # based on the second digit sum and the original id
  69     $hash .= $key[digitatindex($id,3)+$q2+3];
  70     $hash .= digitatindex($q2, 0);
  71     $hash .= $key[digitatindex($id,6)+$q2+2];
  72     $hash .= $key[digitatindex($id,4)+$q2+1];
  73     $hash .= $key[digitatindex($id,1)+$q2+6];
  74     $hash .= $key[digitatindex($id,5)+$q2+5];
  75     $hash .= digitatindex($q2, 1);
  76     $hash .= $key[digitatindex($id,2)+$q2+7];
  77     $hash .= $key[digitatindex($id,0)+$q2+4];
  78
  79     return (sprintf("%07d", $id/1000), $hash);
  80 }
  81
  82 sub _parse {
  83     my $self = shift;
  84     my $url = shift;
  85     my $pattern = shift;
  86     my $jar = HTTP::Cookies->new();
  87     my $ua = LWP::UserAgent->new('agent' => 'Mozilla/5.0');
  88     my $content;
  89     my $metadata = {};
  90     my $p;
  91     my $r;
  92     my $dir;
  93     my $hash;
  94
  95     $url =~ m|$pattern|;
  96     $url = $1;
  97
  98     $metadata->{'URL'} = $url;
  99     $metadata->{'ID'} = $2;
 100     $metadata->{'TYPE'} = 'video';
 101     $metadata->{'SOURCE'} = $self->{'NAME'};
 102     $metadata->{'TITLE'} = undef;
 103     $metadata->{'DLURL'} = undef;
 104
 105     # Set the cookies necessary to get the video data
 106     $jar->set_cookie(undef, 'pp', '1', '/', '.redtube.com');
 107     $ua->cookie_jar($jar);
 108
 109     unless(defined($r = $ua->get(sprintf("http://www.redtube.com/%s", $2)))) {
 110         $self->error('Could not download page');
 111         return undef;
 112     }
 113
 114     # Get the site to extract the title
 115     $content = $r->decoded_content();
 116
 117     $p = HTML::TokeParser->new(\$content);
 118
 119     # Look for the title
 120     if ($p->get_tag('title')) {
 121         my $t = $p->get_text();
 122         if ($t =~ /\xa0RedTube - /) {
 123             $metadata->{'TITLE'} = $t;
 124             $metadata->{'TITLE'} =~ s/\xa0RedTube - //;
 125         }
 126     }
 127
 128     # Redtube uses a selfmade hash system to create the filename
 129     ($dir, $hash) = mkfilename($metadata->{'ID'});
 130
 131     $metadata->{'DLURL'} = sprintf('http://dl.redtube.com/_videos_t4vn23s9jc5498tgj49icfj4678/%s/%s.flv', $dir, $hash);
 132
 133     unless(defined($metadata->{'DLURL'}) && defined($metadata->{'TITLE'})) {
 134         $self->error('Could not extract download URL and title');
 135         return undef;
 136     }
 137
 138     return $metadata;
 139 }
 140
 141 1;