From: Ralf Ertzinger Date: Sun, 12 Feb 2012 16:15:23 +0000 (+0100) Subject: Veoh: Fix grabber X-Git-Url: https://git.camperquake.de/gitweb.cgi?p=videosite.git;a=commitdiff_plain;h=2d23e11ae4900b1be59bb26fbae73397161bccd7 Veoh: Fix grabber --- diff --git a/videosite/VeohGrabber.pm b/videosite/VeohGrabber.pm index 4cac1ea..dda2b5d 100644 --- a/videosite/VeohGrabber.pm +++ b/videosite/VeohGrabber.pm @@ -8,9 +8,8 @@ package videosite::VeohGrabber; use videosite::GrabberBase; @ISA = qw(videosite::GrabberBase); -use XML::Simple; +use HTML::TokeParser; use Data::Dumper; - use strict; sub new { @@ -18,7 +17,9 @@ sub new { my $self = $class->SUPER::new(); $self->{'NAME'} = 'veoh'; - $self->{'PATTERNS'} = ['(http://(?:[-a-zA-Z0-9_.]+\.)*veoh.com/browse/videos/category/[^/]+/watch/([^/]+))']; + $self->{_SELFTESTURL} = 'http://www.veoh.com/watch/v18348952fyn2twbe'; + $self->{_SELFTESTTITLE} = '518_2 kureyon shinchan'; + $self->{'PATTERNS'} = ['(http://(?:[-a-zA-Z0-9_.]+\.)*veoh.com/+watch/(\w+)\??)']; bless($self, $class); $self->_prepare_parameters(); @@ -32,9 +33,11 @@ sub _parse { my $pattern = shift; my $content; my $metadata = {}; - my $p = XML::Simple->new(); my $ua = $self->ua(); + my $p; my $t; + my @text; + my @accum; my $dlurl; my $r; @@ -48,41 +51,40 @@ sub _parse { $metadata->{'TITLE'} = undef; $metadata->{'DLURL'} = undef; - # Get the XML file containing the video metadata - unless(defined($content = $self->simple_get(sprintf('http://www.veoh.com/rest/v2/execute.xml?apiKey=5697781E-1C60-663B-FFD8-9B49D2B56D36&method=veoh.search.search&type=video&maxResults=1&permalink=%s&contentRatingId=1&', $2), $ua))) { - $self->error('Could not download XML metadata'); + unless(defined($content = $self->simple_get(sprintf('http://www.veoh.com/watch/%s', $2), $ua))) { + $self->error('Could not download'); return undef; } - unless(defined($t = $p->XMLin($content, KeepRoot => 1))) { - $self->error('Could not parse XML metadata'); - return undef; - } + $p = HTML::TokeParser->new(\$content); - if (exists($t->{'rsp'}->{'videoList'}->{'video'}->{'fullPreviewHashPath'})) { - $dlurl = $t->{'rsp'}->{'videoList'}->{'video'}->{'fullPreviewHashPath'}; - } else { - $dlurl = $t->{'rsp'}->{'videoList'}->{'video'}->{'fullPreviewHashLowPath'} - } + while ($t = $p->get_tag('script')) { + if ($t->[0] eq 'script') { + my $e = $p->get_text(); + my $jsp; - unless(defined($dlurl)) { - $self->error('No dlurl found in XML'); - return undef; - } + if ($e =~ m|__watch.videoDetailsJSON = '([^\x27]+)|) { + $self->debug("Found JSON: %s", $1); + $jsp = videosite::JSArrayParser->new(); + $r = $jsp->parse($1); - # We now have to fetch the dlurl to get the redirect target after it, - # because the dlurl itself must be called with the right referer set + unless(defined($r)) { + $self->error("Found information hash, but could not parse"); + return undef; + } - $ua->max_redirect(0); - $r = $ua->get($dlurl, 'referer' => 'http://www.veoh.com'); + $self->debug("Parsed JSON: %s", Dumper($r)); - unless ($r->is_redirect) { - $self->error('Expected redirect, got %s', $r->code); - return undef; - } + unless(exists($r->{previewUrl})) { + $self->error("previewUrl not found in information hash"); + return undef; + } - $metadata->{'DLURL'} = $r->header('Location'); - $metadata->{'TITLE'} = $t->{'rsp'}->{'videoList'}->{'video'}->{'title'}; + $metadata->{'DLURL'} = $r->{previewUrl}; + $metadata->{'TITLE'} = $r->{title}; + } + } + } unless(defined($metadata->{'DLURL'}) && defined($metadata->{'TITLE'})) { $self->error('Could not extract download URL and title');