From a9bb044fe82d7093b94eca821ef1e72ac9e57607 Mon Sep 17 00:00:00 2001 From: Ralf Ertzinger Date: Thu, 17 Apr 2008 17:45:00 +0200 Subject: [PATCH] - Change grabber to get the MO4 download link --- videosite/GoogleGrabber.pm | 37 ++++++++++++++----------------------- 1 file changed, 14 insertions(+), 23 deletions(-) diff --git a/videosite/GoogleGrabber.pm b/videosite/GoogleGrabber.pm index c1fe9eb..7d9cad6 100644 --- a/videosite/GoogleGrabber.pm +++ b/videosite/GoogleGrabber.pm @@ -9,7 +9,7 @@ use GrabberBase; @ISA = qw(GrabberBase); use LWP::Simple qw(!get); -use HTML::Parser; +use HTML::TokeParser; use Data::Dumper; use strict; @@ -33,9 +33,7 @@ sub _parse { my $pattern = shift; my $content; my $metadata = {}; - my $p = HTML::Parser->new(api_version => 3); - my @accum; - my @text; + my $p; my $e; $url =~ m|$pattern|; @@ -48,30 +46,23 @@ sub _parse { $metadata->{'TITLE'} = undef; $metadata->{'DLURL'} = undef; - unless(defined($content = LWP::Simple::get(sprintf('http://video.google.com/videoplay?docid=%s', $2)))) { + unless(defined($content = LWP::Simple::get(sprintf('http://video.google.com/videohosted?docid=%s', $2)))) { $self->error('Could not download %s', $url); return undef; } - $p->handler(start => \@accum, "tagname, attr"); - $p->handler(text => \@text, "text"); - $p->report_tags(qw(embed div)); - $p->utf8_mode(1); - $p->parse($content); - - # Look for the title in the div tags - foreach $e (@accum) { - if ('div' eq $e->[0]) { - if ((exists($e->[1]->{'class'})) and ('title' eq $e->[1]->{'class'})) { - $metadata->{'TITLE'} = $e->[1]->{'title'}; - } - } + $p = HTML::TokeParser->new(\$content); + + # Look for the title + if ($p->get_tag('title')) { + $metadata->{'TITLE'} = $p->get_text(); + $metadata->{'TITLE'} =~ s/ - Google Video$//; + } - if ('embed' eq $e->[0]) { - if ((exists($e->[1]->{'src'})) and ($e->[1]->{'src'} =~ m|^/googleplayer.swf\?\&videoUrl=([^&]+)\&|)) { - $metadata->{'DLURL'} = $1; - $metadata->{'DLURL'} =~ s/%(..)/chr(hex($1))/ge; - } + while ($e = $p->get_tag('a')) { + if ((exists($a->[1]{'id'})) and ('ipoddownloadlink' eq $a->[1]{'id'})) { + $metadata->{'DLURL'} = $a->[1]{'href'}; + last; } } -- 1.8.3.1