X-Git-Url: https://git.camperquake.de/gitweb.cgi?a=blobdiff_plain;f=videosite%2FVeohGrabber.pm;h=c33f1d8b5508ab80ca7af39b1e620b4bab430709;hb=a644eb433aca424a85dcbfda6e42b0675dfbfd86;hp=4cac1ea99a438a6ffc374a516f6fadb9a1768882;hpb=771fcc7e7309dcb054d652812cead6b6bf47be5f;p=videosite.git diff --git a/videosite/VeohGrabber.pm b/videosite/VeohGrabber.pm index 4cac1ea..c33f1d8 100644 --- a/videosite/VeohGrabber.pm +++ b/videosite/VeohGrabber.pm @@ -8,22 +8,21 @@ package videosite::VeohGrabber; use videosite::GrabberBase; @ISA = qw(videosite::GrabberBase); -use XML::Simple; +use HTML::TokeParser; use Data::Dumper; - use strict; sub new { my $class = shift; - my $self = $class->SUPER::new(); - - $self->{'NAME'} = 'veoh'; - $self->{'PATTERNS'} = ['(http://(?:[-a-zA-Z0-9_.]+\.)*veoh.com/browse/videos/category/[^/]+/watch/([^/]+))']; - - bless($self, $class); - $self->_prepare_parameters(); - - return $self; + my $self = $class->SUPER::new( + NAME => 'veoh', + _SELFTESTURL => 'http://www.veoh.com/watch/v18348952fyn2twbe', + _SELFTESTTITLE => '518_2 kureyon shinchan', + PATTERNS => ['(http://(?:[-a-zA-Z0-9_.]+\.)*veoh.com/+watch/(\w+)\??)'], + @_, + ); + + return bless($self, $class); } sub _parse { @@ -32,9 +31,11 @@ sub _parse { my $pattern = shift; my $content; my $metadata = {}; - my $p = XML::Simple->new(); my $ua = $self->ua(); + my $p; my $t; + my @text; + my @accum; my $dlurl; my $r; @@ -48,41 +49,40 @@ sub _parse { $metadata->{'TITLE'} = undef; $metadata->{'DLURL'} = undef; - # Get the XML file containing the video metadata - unless(defined($content = $self->simple_get(sprintf('http://www.veoh.com/rest/v2/execute.xml?apiKey=5697781E-1C60-663B-FFD8-9B49D2B56D36&method=veoh.search.search&type=video&maxResults=1&permalink=%s&contentRatingId=1&', $2), $ua))) { - $self->error('Could not download XML metadata'); + unless(defined($content = $self->simple_get(sprintf('http://www.veoh.com/watch/%s', $2), $ua))) { + $self->error('Could not download'); return undef; } - unless(defined($t = $p->XMLin($content, KeepRoot => 1))) { - $self->error('Could not parse XML metadata'); - return undef; - } + $p = HTML::TokeParser->new(\$content); - if (exists($t->{'rsp'}->{'videoList'}->{'video'}->{'fullPreviewHashPath'})) { - $dlurl = $t->{'rsp'}->{'videoList'}->{'video'}->{'fullPreviewHashPath'}; - } else { - $dlurl = $t->{'rsp'}->{'videoList'}->{'video'}->{'fullPreviewHashLowPath'} - } + while ($t = $p->get_tag('script')) { + if ($t->[0] eq 'script') { + my $e = $p->get_text(); + my $jsp; - unless(defined($dlurl)) { - $self->error('No dlurl found in XML'); - return undef; - } + if ($e =~ m|__watch.videoDetailsJSON = '([^\x27]+)|) { + $self->debug("Found JSON: %s", $1); + $jsp = videosite::JSArrayParser->new(); + $r = $jsp->parse($1); - # We now have to fetch the dlurl to get the redirect target after it, - # because the dlurl itself must be called with the right referer set + unless(defined($r)) { + $self->error("Found information hash, but could not parse"); + return undef; + } - $ua->max_redirect(0); - $r = $ua->get($dlurl, 'referer' => 'http://www.veoh.com'); + $self->debug("Parsed JSON: %s", Dumper($r)); - unless ($r->is_redirect) { - $self->error('Expected redirect, got %s', $r->code); - return undef; - } + unless(exists($r->{previewUrl})) { + $self->error("previewUrl not found in information hash"); + return undef; + } - $metadata->{'DLURL'} = $r->header('Location'); - $metadata->{'TITLE'} = $t->{'rsp'}->{'videoList'}->{'video'}->{'title'}; + $metadata->{'DLURL'} = $r->{previewUrl}; + $metadata->{'TITLE'} = $r->{title}; + } + } + } unless(defined($metadata->{'DLURL'}) && defined($metadata->{'TITLE'})) { $self->error('Could not extract download URL and title');