# (c) 2008 by Ralf Ertzinger # licensed under GNU GPL v2 # # Grabber for redtube.com # # Algorithm for the file name hash reverse engineered by # Maximilian Rehkopf package videosite::RedTubeGrabber; use videosite::GrabberBase; @ISA = qw(videosite::GrabberBase); use LWP::UserAgent; use HTTP::Cookies; use HTML::TokeParser; use Data::Dumper; use strict; sub new { my $class = shift; my $self = $class->SUPER::new(); $self->{'NAME'} = 'redtube'; $self->{'PATTERNS'} = ['(http://(?:[-a-zA-Z0-9_.]+\.)*redtube.com/(\d+))']; bless($self, $class); $self->_prepare_parameters(); return $self; } sub div($$) { return ($_[0] - ($_[0] % $_[1])) / $_[1]; } sub digitatindex($$) { return div($_[0], 10**$_[1]) % 10; } sub mkfilename($) { my $id = shift; my $i = 7; my $q = 0; my $q2 = 0; my @key = split(//, "R15342O7K9HBCDXFGAIJ8LMZ6PQ0STUVWEYN"); my $hash = ""; # Calculate a weighed digit sum of the id $q += ($_*($i--)) for reverse(split(//, $id)); # Now calculate the digit sum of the digit sum $q2 += $_ for split(//, $q); # The rest are lookups into @key and the second digit sum, # based on the second digit sum and the original id $hash .= $key[digitatindex($id,3)+$q2+3]; $hash .= digitatindex($q2, 0); $hash .= $key[digitatindex($id,6)+$q2+2]; $hash .= $key[digitatindex($id,4)+$q2+1]; $hash .= $key[digitatindex($id,1)+$q2+6]; $hash .= $key[digitatindex($id,5)+$q2+5]; $hash .= digitatindex($q2, 1); $hash .= $key[digitatindex($id,2)+$q2+7]; $hash .= $key[digitatindex($id,0)+$q2+4]; return (sprintf("%07d", $id/1000), $hash); } sub _parse { my $self = shift; my $url = shift; my $pattern = shift; my $jar = HTTP::Cookies->new(); my $ua = LWP::UserAgent->new('agent' => 'Mozilla/5.0'); my $content; my $metadata = {}; my $p; my $r; my $dir; my $hash; $url =~ m|$pattern|; $url = $1; $metadata->{'URL'} = $url; $metadata->{'ID'} = $2; $metadata->{'TYPE'} = 'video'; $metadata->{'SOURCE'} = $self->{'NAME'}; $metadata->{'TITLE'} = undef; $metadata->{'DLURL'} = undef; # Set the cookies necessary to get the video data $jar->set_cookie(undef, 'pp', '1', '/', '.redtube.com'); $ua->cookie_jar($jar); unless(defined($r = $ua->get(sprintf("http://www.redtube.com/%s", $2)))) { $self->error('Could not download page'); return undef; } # Get the site to extract the title $content = $r->decoded_content(); $p = HTML::TokeParser->new(\$content); # Look for the title if ($p->get_tag('title')) { my $t = $p->get_text(); if ($t =~ /\xa0RedTube - /) { $metadata->{'TITLE'} = $t; $metadata->{'TITLE'} =~ s/\xa0RedTube - //; } } # Redtube uses a selfmade hash system to create the filename ($dir, $hash) = mkfilename($metadata->{'ID'}); $metadata->{'DLURL'} = sprintf('http://dl.redtube.com/_videos_t4vn23s9jc5498tgj49icfj4678/%s/%s.flv', $dir, $hash); unless(defined($metadata->{'DLURL'}) && defined($metadata->{'TITLE'})) { $self->error('Could not extract download URL and title'); return undef; } return $metadata; } 1;