From 818591f451224a1d974415d1d626923bd9637929 Mon Sep 17 00:00:00 2001 From: Ralf Ertzinger Date: Sat, 6 Dec 2008 21:34:11 +0100 Subject: [PATCH] - Modify BreakGrabber to use HTMLHelper - Modify BroadcasterGrabber to use HTMLHelper --- videosite/BreakGrabber.pm | 37 +++++++++++++------------------------ videosite/BroadcasterGrabber.pm | 37 ++++++++++++------------------------- 2 files changed, 25 insertions(+), 49 deletions(-) diff --git a/videosite/BreakGrabber.pm b/videosite/BreakGrabber.pm index 4870da8..0619da5 100644 --- a/videosite/BreakGrabber.pm +++ b/videosite/BreakGrabber.pm @@ -8,8 +8,7 @@ package BreakGrabber; use GrabberBase; @ISA = qw(GrabberBase); -use LWP::Simple qw(!get); -use HTML::Parser; +use HTMLHelper; use Data::Dumper; use strict; @@ -34,10 +33,8 @@ sub _parse { my $pattern = shift; my $content; my $metadata = {}; - my $p = HTML::Parser->new(api_version => 3); - my @accum; - my @text; - my $e; + my $p = HTMLHelper->new(); + my $n; $url =~ m|$pattern|; $url = $1; @@ -49,28 +46,20 @@ sub _parse { $metadata->{'TITLE'} = undef; $metadata->{'DLURL'} = undef; - unless(defined($content = LWP::Simple::get($1))) { + unless(defined($content = $p->load($1))) { $self->error('Could not download %s', $url); return undef; } - $p->handler(start => \@accum, "tagname, attr"); - $p->report_tags(qw(meta)); - $p->utf8_mode(1); - $p->parse($content); - - # Look for the title in the meta tags - foreach $e (@accum) { - if ('meta' eq $e->[0]) { - if ('embed_video_title' eq $e->[1]->{'name'}) { - $metadata->{'TITLE'} = $e->[1]->{'content'}; - } - - if ('embed_video_thumb_url' eq $e->[1]->{'name'}) { - $metadata->{'DLURL'} = $e->[1]->{'content'}; - $metadata->{'DLURL'} =~ s/\.jpg$/\.flv/; - } - } + $n = $p->findnodes('meta[@name="embed_video_title"]'); + if (defined($n)) { + $metadata->{'TITLE'} = $n->{'content'}; + } + + $n = $p->findnodes('meta[@name="embed_video_thumb_url"]'); + if (defined($n)) { + $metadata->{'DLURL'} = $n->{'content'}; + $metadata->{'DLURL'} =~ s/\.jpg$/\.flv/; } unless(defined($metadata->{'DLURL'}) && defined($metadata->{'TITLE'})) { diff --git a/videosite/BroadcasterGrabber.pm b/videosite/BroadcasterGrabber.pm index 8d0cda9..52d893d 100644 --- a/videosite/BroadcasterGrabber.pm +++ b/videosite/BroadcasterGrabber.pm @@ -8,8 +8,7 @@ package BroadcasterGrabber; use GrabberBase; @ISA = qw(GrabberBase); -use LWP::Simple qw(!get); -use HTML::Parser; +use HTMLHelper; use Data::Dumper; use strict; @@ -33,10 +32,8 @@ sub _parse { my $pattern = shift; my $content; my $metadata = {}; - my $p = HTML::Parser->new(api_version => 3); - my @accum; - my @text; - my $e; + my $p = HTMLHelper->new(); + my $n; $url =~ m|$pattern|; $url = $1; @@ -48,32 +45,22 @@ sub _parse { $metadata->{'TITLE'} = undef; $metadata->{'DLURL'} = undef; - unless(defined($content = LWP::Simple::get(sprintf('http://www.broadcaster.com/clip/%s', $2)))) { + unless(defined($content = $p->load(sprintf('http://www.broadcaster.com/clip/%s', $2)))) { $self->error('Could not download %s', $url); return undef; } - $p->handler(start => \@accum, "tagname, attr"); - $p->handler(text => \@text, "text"); - $p->report_tags(qw(script)); - $p->utf8_mode(1); - $p->parse($content); - # Look for the title - foreach $e (@text) { - if ($e->[0] =~ m|\&page_title=([^\x22]+)\x22|s) { - $metadata->{'TITLE'} = $1; - last; - } - } + ($metadata->{'TITLE'}) = grep { $_ =~ m|\&page_title=([^\x22]+)\x22|s && {$_ = $1} } + map { join("", @{$_->content()}) } + grep { defined($_->content()) } + $p->findnodes('script'); # Look for the download URL - foreach $e (@text) { - if ($e->[0] =~ m|\&clip_loc=.+?cache.broadcaster.com.+?escape\(\x22([^\x22]+)\x22\)|s) { - $metadata->{'DLURL'} = 'http://cache.broadcaster.com/peoplecaster/' . $1; - last; - } - } + ($metadata->{'DLURL'}) = grep { $_ =~ m|\&clip_loc=.+?cache.broadcaster.com.+?escape\(\x22([^\x22]+)\x22\)|s && {$_ = 'http://cache.broadcaster.com/peoplecaster/' .$1} } + map { join("", @{$_->content()}) } + grep { defined($_->content()) } + $p->findnodes('script'); unless(defined($metadata->{'DLURL'}) && defined($metadata->{'TITLE'})) { $self->error('Could not determine download URL'); -- 1.8.3.1