X-Git-Url: https://git.camperquake.de/gitweb.cgi?a=blobdiff_plain;f=videosite%2FGoogleGrabber.pm;h=51d0e2e1f9180705b2d679eee69aa8a5129849bd;hb=2793bf0eb588c2ad4c76a4e2e38793612b69757f;hp=c1fe9eb26e286a4518a83c5a218fc77e6ea406b7;hpb=4b79658ad5745fd99983221b219619c22658d771;p=videosite.git diff --git a/videosite/GoogleGrabber.pm b/videosite/GoogleGrabber.pm index c1fe9eb..51d0e2e 100644 --- a/videosite/GoogleGrabber.pm +++ b/videosite/GoogleGrabber.pm @@ -3,28 +3,28 @@ # # Grabber for video.google.com -package GoogleGrabber; +package videosite::GoogleGrabber; -use GrabberBase; -@ISA = qw(GrabberBase); +use videosite::GrabberBase; +@ISA = qw(videosite::GrabberBase); -use LWP::Simple qw(!get); -use HTML::Parser; +use HTML::TokeParser; use Data::Dumper; use strict; sub new { my $class = shift; - my $self = $class->SUPER::new(); - - $self->{'NAME'} = 'google'; - $self->{'PATTERNS'} = ['(http://video\.google\.com/videoplay\?docid=([-\d]+))']; - - bless($self, $class); - $self->_prepare_parameters(); - - return $self; + my $self = $class->SUPER::new( + NAME => 'google', + PATTERNS => ['(http://video\.google\.com/videoplay\?docid=([-\d]+))'], + _PARAMS => { + QUALITY => ['normal', 'Quality of the video to download. normal = standard resolution flash video, h264 = high resolution MPEG4 video'] + }, + @_, + ); + + return bless($self, $class); } sub _parse { @@ -33,10 +33,9 @@ sub _parse { my $pattern = shift; my $content; my $metadata = {}; - my $p = HTML::Parser->new(api_version => 3); - my @accum; - my @text; + my $p; my $e; + my $quality = $self->_getval('QUALITY'); $url =~ m|$pattern|; $url = $1; @@ -48,29 +47,33 @@ sub _parse { $metadata->{'TITLE'} = undef; $metadata->{'DLURL'} = undef; - unless(defined($content = LWP::Simple::get(sprintf('http://video.google.com/videoplay?docid=%s', $2)))) { + unless(defined($content = $self->simple_get(sprintf('http://video.google.com/videohosted?docid=%s', $2)))) { $self->error('Could not download %s', $url); return undef; } - $p->handler(start => \@accum, "tagname, attr"); - $p->handler(text => \@text, "text"); - $p->report_tags(qw(embed div)); - $p->utf8_mode(1); - $p->parse($content); - - # Look for the title in the div tags - foreach $e (@accum) { - if ('div' eq $e->[0]) { - if ((exists($e->[1]->{'class'})) and ('title' eq $e->[1]->{'class'})) { - $metadata->{'TITLE'} = $e->[1]->{'title'}; + $p = HTML::TokeParser->new(\$content); + + # Look for the title + if ($p->get_tag('title')) { + $metadata->{'TITLE'} = $p->get_text(); + $metadata->{'TITLE'} =~ s/\s?- Google Video$//s; + } + + if ($quality eq 'h264') { + while ($e = $p->get_tag('a')) { + if ((exists($e->[1]{'id'})) and ('ipoddownloadlink' eq $e->[1]{'id'})) { + $metadata->{'DLURL'} = $e->[1]{'href'}; + last; } } - - if ('embed' eq $e->[0]) { - if ((exists($e->[1]->{'src'})) and ($e->[1]->{'src'} =~ m|^/googleplayer.swf\?\&videoUrl=([^&]+)\&|)) { - $metadata->{'DLURL'} = $1; - $metadata->{'DLURL'} =~ s/%(..)/chr(hex($1))/ge; + } else { + while ($e = $p->get_tag('script')) { + if ($p->get_text() =~ m|googleplayer\.swf\?\\46videoUrl\\75(.+?)\\46|s) { + my $u = $1; + $u =~ s/%(..)/chr(hex($1))/ge; + $metadata->{'DLURL'} = $u; + last; } } }