X-Git-Url: https://git.camperquake.de/gitweb.cgi?a=blobdiff_plain;f=videosite%2FCollegeHumorGrabber.pm;h=fb032863b323fb506a9e7a8163a1cc2686ed0b97;hb=84bcf124e34f7e12c49f03c9841d863da9da0c8e;hp=3bda5dd5a8c0d7ed0eeffd76505674824d1915ba;hpb=b0f58504e0f59d7d607d4ef7c06495180f047311;p=videosite.git diff --git a/videosite/CollegeHumorGrabber.pm b/videosite/CollegeHumorGrabber.pm index 3bda5dd..fb03286 100644 --- a/videosite/CollegeHumorGrabber.pm +++ b/videosite/CollegeHumorGrabber.pm @@ -3,14 +3,14 @@ # (c) 2007 by Ralf Ertzinger # licensed under GNU GPL v2 -package CollegeHumorGrabber; +package videosite::CollegeHumorGrabber; -use GrabberBase; -@ISA = qw(GrabberBase); +use videosite::GrabberBase; +@ISA = qw(videosite::GrabberBase); +use videosite::HTMLHelper; use LWP::Simple qw(!get); use XML::Simple; -use HTML::Parser; use Data::Dumper; use strict; @@ -20,7 +20,8 @@ sub new { my $self = $class->SUPER::new(); $self->{'NAME'} = 'collegehumor'; - $self->{'PATTERNS'} = ['(http://www.collegehumor.com/video:(\d+))']; + $self->{'PATTERNS'} = ['(http://www.collegehumor.com/video:(\d+))', + '(http://www.collegehumor.com/video/(\d+))']; bless($self, $class); $self->_prepare_parameters(); @@ -49,7 +50,7 @@ sub _parse { $metadata->{'DLURL'} = undef; # Get the XML file containing the video metadata - unless(defined($content = LWP::Simple::get(sprintf('http://www.collegehumor.com/moogaloop/video:%s', $2)))) { + unless(defined($content = LWP::Simple::get(sprintf('http://www.collegehumor.com/moogaloop/video/%s', $2)))) { $self->error('Could not download XML metadata'); return undef; } @@ -60,29 +61,7 @@ sub _parse { } $metadata->{'DLURL'} = $t->{'video'}->{'file'}; - - # The XML does not contain the full title of the video, for - # reasons possibly known to some jerk at CollegeHumor. - # So we'll have to parse the actual HTML, too. - unless(defined($content = LWP::Simple::get(sprintf('http://www.collegehumor.com/video:%s', $2)))) { - $self->error('Could not download HTML'); - return undef; - } - $p = HTML::Parser->new(api_version => 3); - - $p->handler(start => \@accum, "tagname, attr"); - $p->report_tags(qw(meta)); - $p->utf8_mode(1); - $p->parse($content); - - # Look for the title in the meta tags - foreach $t (@accum) { - if ('meta' eq $t->[0]) { - if ('title' eq $t->[1]->{'name'}) { - $metadata->{'TITLE'} = $t->[1]->{'content'}; - } - } - } + $metadata->{'TITLE'} = $t->{'video'}->{'caption'}; unless(defined($metadata->{'DLURL'}) && defined($metadata->{'TITLE'})) { $self->error('Could not extract download URL and title');