X-Git-Url: https://git.camperquake.de/gitweb.cgi?a=blobdiff_plain;f=videosite%2FCollegeHumorGrabber.pm;h=c7851e37ff87ed7eb735a452dcb05249c67e7aa8;hb=5160262f35ac74f803164ac4e734381c7cea1c74;hp=3bda5dd5a8c0d7ed0eeffd76505674824d1915ba;hpb=b0f58504e0f59d7d607d4ef7c06495180f047311;p=videosite.git diff --git a/videosite/CollegeHumorGrabber.pm b/videosite/CollegeHumorGrabber.pm index 3bda5dd..c7851e3 100644 --- a/videosite/CollegeHumorGrabber.pm +++ b/videosite/CollegeHumorGrabber.pm @@ -3,29 +3,29 @@ # (c) 2007 by Ralf Ertzinger # licensed under GNU GPL v2 -package CollegeHumorGrabber; +package videosite::CollegeHumorGrabber; -use GrabberBase; -@ISA = qw(GrabberBase); +use videosite::GrabberBase; +@ISA = qw(videosite::GrabberBase); -use LWP::Simple qw(!get); +use videosite::HTMLHelper; use XML::Simple; -use HTML::Parser; use Data::Dumper; use strict; sub new { my $class = shift; - my $self = $class->SUPER::new(); - - $self->{'NAME'} = 'collegehumor'; - $self->{'PATTERNS'} = ['(http://www.collegehumor.com/video:(\d+))']; - - bless($self, $class); - $self->_prepare_parameters(); - - return $self; + my $self = $class->SUPER::new( + NAME => 'collegehumor', + _SELFTESTURL => 'http://www.collegehumor.com/video/5635400/pixar-intro-parody', + _SELFTESTTITLE => 'Pixar Intro Parody', + PATTERNS => ['(http://www.collegehumor.com/video:(\d+))', + '(http://www.collegehumor.com/video/(\d+))'], + @_, + ); + + return bless($self, $class); } sub _parse { @@ -49,7 +49,7 @@ sub _parse { $metadata->{'DLURL'} = undef; # Get the XML file containing the video metadata - unless(defined($content = LWP::Simple::get(sprintf('http://www.collegehumor.com/moogaloop/video:%s', $2)))) { + unless(defined($content = $self->simple_get(sprintf('http://www.collegehumor.com/moogaloop/video/%s', $2)))) { $self->error('Could not download XML metadata'); return undef; } @@ -60,29 +60,7 @@ sub _parse { } $metadata->{'DLURL'} = $t->{'video'}->{'file'}; - - # The XML does not contain the full title of the video, for - # reasons possibly known to some jerk at CollegeHumor. - # So we'll have to parse the actual HTML, too. - unless(defined($content = LWP::Simple::get(sprintf('http://www.collegehumor.com/video:%s', $2)))) { - $self->error('Could not download HTML'); - return undef; - } - $p = HTML::Parser->new(api_version => 3); - - $p->handler(start => \@accum, "tagname, attr"); - $p->report_tags(qw(meta)); - $p->utf8_mode(1); - $p->parse($content); - - # Look for the title in the meta tags - foreach $t (@accum) { - if ('meta' eq $t->[0]) { - if ('title' eq $t->[1]->{'name'}) { - $metadata->{'TITLE'} = $t->[1]->{'content'}; - } - } - } + $metadata->{'TITLE'} = $t->{'video'}->{'caption'}; unless(defined($metadata->{'DLURL'}) && defined($metadata->{'TITLE'})) { $self->error('Could not extract download URL and title');