#
# Grabber for broadcaster.com
-package BroadcasterGrabber;
+package videosite::BroadcasterGrabber;
-use GrabberBase;
-@ISA = qw(GrabberBase);
+use videosite::GrabberBase;
+@ISA = qw(videosite::GrabberBase);
-use LWP::Simple qw(!get);
-use HTML::Parser;
+use videosite::HTMLHelper;
use Data::Dumper;
use strict;
sub new {
my $class = shift;
- my $self = $class->SUPER::new();
+ my $self = $class->SUPER::new(
+ NAME => 'broadcaster',
+ PATTERNS => ['(http://(?:[-a-zA-Z0-9_.]+\.)*broadcaster\.com/clip/(\d+))'],
+ @_,
+ );
- $self->{'NAME'} = 'broadcaster';
- $self->{'PATTERNS'} = ['(http://(?:[-a-zA-Z0-9_.]+\.)*broadcaster\.com/clip/(\d+))'];
-
- bless($self, $class);
- $self->_prepare_parameters();
-
- return $self;
+ return bless($self, $class);
}
sub _parse {
my $pattern = shift;
my $content;
my $metadata = {};
- my $p = HTML::Parser->new(api_version => 3);
- my @accum;
- my @text;
- my $e;
+ my $p = videosite::HTMLHelper->new();
+ my $n;
$url =~ m|$pattern|;
$url = $1;
$metadata->{'TITLE'} = undef;
$metadata->{'DLURL'} = undef;
- unless(defined($content = LWP::Simple::get(sprintf('http://www.broadcaster.com/clip/%s', $2)))) {
+ unless(defined($content = $p->load(sprintf('http://www.broadcaster.com/clip/%s', $2)))) {
$self->error('Could not download %s', $url);
return undef;
}
- $p->handler(start => \@accum, "tagname, attr");
- $p->handler(text => \@text, "text");
- $p->report_tags(qw(script));
- $p->utf8_mode(1);
- $p->parse($content);
-
# Look for the title
- foreach $e (@text) {
- if ($e->[0] =~ m|\&page_title=([^\x22]+)\x22|s) {
- $metadata->{'TITLE'} = $1;
- last;
- }
- }
+ ($metadata->{'TITLE'}) = grep { $_ =~ m|\&page_title=([^\x22]+)\x22|s && {$_ = $1} }
+ map { join("", @{$_->content()}) }
+ grep { defined($_->content()) }
+ $p->findnodes('script');
# Look for the download URL
- foreach $e (@text) {
- if ($e->[0] =~ m|\&clip_loc=.+?cache.broadcaster.com.+?escape\(\x22([^\x22]+)\x22\)|s) {
- $metadata->{'DLURL'} = 'http://cache.broadcaster.com/peoplecaster/' . $1;
- last;
- }
- }
+ ($metadata->{'DLURL'}) = grep { $_ =~ m|\&clip_loc=.+?cache.broadcaster.com.+?escape\(\x22([^\x22]+)\x22\)|s && {$_ = 'http://cache.broadcaster.com/peoplecaster/' .$1} }
+ map { join("", @{$_->content()}) }
+ grep { defined($_->content()) }
+ $p->findnodes('script');
unless(defined($metadata->{'DLURL'}) && defined($metadata->{'TITLE'})) {
$self->error('Could not determine download URL');