X-Git-Url: https://git.camperquake.de/gitweb.cgi?a=blobdiff_plain;f=videosite%2FBroadcasterGrabber.pm;h=4c371011cbc24f8fc71908b0b5b24e18fa95720e;hb=cc69ec1d9eb2ecf207b4c6b50a02062175e14314;hp=8d0cda9ef3ee25d7cda99ebc7606e8946e63ca34;hpb=24b7768511035a6793e01fc8da2fa245348e5d18;p=videosite.git diff --git a/videosite/BroadcasterGrabber.pm b/videosite/BroadcasterGrabber.pm index 8d0cda9..4c37101 100644 --- a/videosite/BroadcasterGrabber.pm +++ b/videosite/BroadcasterGrabber.pm @@ -3,28 +3,25 @@ # # Grabber for broadcaster.com -package BroadcasterGrabber; +package videosite::BroadcasterGrabber; -use GrabberBase; -@ISA = qw(GrabberBase); +use videosite::GrabberBase; +@ISA = qw(videosite::GrabberBase); -use LWP::Simple qw(!get); -use HTML::Parser; +use videosite::HTMLHelper; use Data::Dumper; use strict; sub new { my $class = shift; - my $self = $class->SUPER::new(); + my $self = $class->SUPER::new( + NAME => 'broadcaster', + PATTERNS => ['(http://(?:[-a-zA-Z0-9_.]+\.)*broadcaster\.com/clip/(\d+))'], + @_, + ); - $self->{'NAME'} = 'broadcaster'; - $self->{'PATTERNS'} = ['(http://(?:[-a-zA-Z0-9_.]+\.)*broadcaster\.com/clip/(\d+))']; - - bless($self, $class); - $self->_prepare_parameters(); - - return $self; + return bless($self, $class); } sub _parse { @@ -33,10 +30,8 @@ sub _parse { my $pattern = shift; my $content; my $metadata = {}; - my $p = HTML::Parser->new(api_version => 3); - my @accum; - my @text; - my $e; + my $p = videosite::HTMLHelper->new(); + my $n; $url =~ m|$pattern|; $url = $1; @@ -48,32 +43,22 @@ sub _parse { $metadata->{'TITLE'} = undef; $metadata->{'DLURL'} = undef; - unless(defined($content = LWP::Simple::get(sprintf('http://www.broadcaster.com/clip/%s', $2)))) { + unless(defined($content = $p->load(sprintf('http://www.broadcaster.com/clip/%s', $2)))) { $self->error('Could not download %s', $url); return undef; } - $p->handler(start => \@accum, "tagname, attr"); - $p->handler(text => \@text, "text"); - $p->report_tags(qw(script)); - $p->utf8_mode(1); - $p->parse($content); - # Look for the title - foreach $e (@text) { - if ($e->[0] =~ m|\&page_title=([^\x22]+)\x22|s) { - $metadata->{'TITLE'} = $1; - last; - } - } + ($metadata->{'TITLE'}) = grep { $_ =~ m|\&page_title=([^\x22]+)\x22|s && {$_ = $1} } + map { join("", @{$_->content()}) } + grep { defined($_->content()) } + $p->findnodes('script'); # Look for the download URL - foreach $e (@text) { - if ($e->[0] =~ m|\&clip_loc=.+?cache.broadcaster.com.+?escape\(\x22([^\x22]+)\x22\)|s) { - $metadata->{'DLURL'} = 'http://cache.broadcaster.com/peoplecaster/' . $1; - last; - } - } + ($metadata->{'DLURL'}) = grep { $_ =~ m|\&clip_loc=.+?cache.broadcaster.com.+?escape\(\x22([^\x22]+)\x22\)|s && {$_ = 'http://cache.broadcaster.com/peoplecaster/' .$1} } + map { join("", @{$_->content()}) } + grep { defined($_->content()) } + $p->findnodes('script'); unless(defined($metadata->{'DLURL'}) && defined($metadata->{'TITLE'})) { $self->error('Could not determine download URL');