X-Git-Url: https://git.camperquake.de/gitweb.cgi?a=blobdiff_plain;f=videosite%2FCollegeHumorGrabber.pm;h=c7851e37ff87ed7eb735a452dcb05249c67e7aa8;hb=8d918b07ad100a4d7b96cf5df640cef0a8a0411d;hp=bbf54a688aac683fab225aca387df9641d98a041;hpb=6bfa02043de8e679102e4d3babd2cc1e6e49824b;p=videosite.git diff --git a/videosite/CollegeHumorGrabber.pm b/videosite/CollegeHumorGrabber.pm index bbf54a6..c7851e3 100644 --- a/videosite/CollegeHumorGrabber.pm +++ b/videosite/CollegeHumorGrabber.pm @@ -9,7 +9,6 @@ use videosite::GrabberBase; @ISA = qw(videosite::GrabberBase); use videosite::HTMLHelper; -use LWP::Simple qw(!get); use XML::Simple; use Data::Dumper; @@ -17,15 +16,16 @@ use strict; sub new { my $class = shift; - my $self = $class->SUPER::new(); - - $self->{'NAME'} = 'collegehumor'; - $self->{'PATTERNS'} = ['(http://www.collegehumor.com/video:(\d+))']; - - bless($self, $class); - $self->_prepare_parameters(); - - return $self; + my $self = $class->SUPER::new( + NAME => 'collegehumor', + _SELFTESTURL => 'http://www.collegehumor.com/video/5635400/pixar-intro-parody', + _SELFTESTTITLE => 'Pixar Intro Parody', + PATTERNS => ['(http://www.collegehumor.com/video:(\d+))', + '(http://www.collegehumor.com/video/(\d+))'], + @_, + ); + + return bless($self, $class); } sub _parse { @@ -49,7 +49,7 @@ sub _parse { $metadata->{'DLURL'} = undef; # Get the XML file containing the video metadata - unless(defined($content = LWP::Simple::get(sprintf('http://www.collegehumor.com/moogaloop/video:%s', $2)))) { + unless(defined($content = $self->simple_get(sprintf('http://www.collegehumor.com/moogaloop/video/%s', $2)))) { $self->error('Could not download XML metadata'); return undef; } @@ -60,20 +60,7 @@ sub _parse { } $metadata->{'DLURL'} = $t->{'video'}->{'file'}; - - # The XML does not contain the full title of the video, for - # reasons possibly known to some jerk at CollegeHumor. - # So we'll have to parse the actual HTML, too. - $p = videosite::HTMLHelper->new(); - unless(defined($content = $p->load(sprintf('http://www.collegehumor.com/video:%s', $2)))) { - $self->error('Could not download HTML'); - return undef; - } - - $t = $p->findnodes('h1[@id="item_title"]'); - if (defined($t)) { - $metadata->{'TITLE'} = $t->{'_content'}->[0]; - } + $metadata->{'TITLE'} = $t->{'video'}->{'caption'}; unless(defined($metadata->{'DLURL'}) && defined($metadata->{'TITLE'})) { $self->error('Could not extract download URL and title');