X-Git-Url: https://git.camperquake.de/gitweb.cgi?a=blobdiff_plain;f=videosite%2FCollegeHumorGrabber.pm;h=7fb3cae4174198e4fb8220dc65d2820a8ba12a60;hb=d4fae22c083e110d1c788a78196f85d1e39b116e;hp=b25043cd61841f2987601aae09b478b86f74ca5e;hpb=7f8669fc619a87e496066fb6f92f3df127e985c5;p=videosite.git diff --git a/videosite/CollegeHumorGrabber.pm b/videosite/CollegeHumorGrabber.pm index b25043c..7fb3cae 100644 --- a/videosite/CollegeHumorGrabber.pm +++ b/videosite/CollegeHumorGrabber.pm @@ -3,14 +3,13 @@ # (c) 2007 by Ralf Ertzinger # licensed under GNU GPL v2 -package CollegeHumorGrabber; +package videosite::CollegeHumorGrabber; -use GrabberBase; -@ISA = qw(GrabberBase); +use videosite::GrabberBase; +@ISA = qw(videosite::GrabberBase); -use LWP::Simple qw(!get); +use videosite::HTMLHelper; use XML::Simple; -use HTML::Parser; use Data::Dumper; use strict; @@ -20,7 +19,10 @@ sub new { my $self = $class->SUPER::new(); $self->{'NAME'} = 'collegehumor'; - $self->{'PATTERNS'} = ['(http://www.collegehumor.com/video:(\d+))']; + $self->{_SELFTESTURL} = 'http://www.collegehumor.com/video/5635400/pixar-intro-parody'; + $self->{_SELFTESTTITLE} = 'Pixar Intro Parody'; + $self->{'PATTERNS'} = ['(http://www.collegehumor.com/video:(\d+))', + '(http://www.collegehumor.com/video/(\d+))']; bless($self, $class); $self->_prepare_parameters(); @@ -49,7 +51,7 @@ sub _parse { $metadata->{'DLURL'} = undef; # Get the XML file containing the video metadata - unless(defined($content = LWP::Simple::get(sprintf('http://www.collegehumor.com/moogaloop/video:%s', $2)))) { + unless(defined($content = $self->simple_get(sprintf('http://www.collegehumor.com/moogaloop/video/%s', $2)))) { $self->error('Could not download XML metadata'); return undef; } @@ -60,29 +62,7 @@ sub _parse { } $metadata->{'DLURL'} = $t->{'video'}->{'file'}; - - # The XML does not contain the full title of the video, for - # reasons possibly known to some jerk at CollegeHumor. - # So we'll have to parse the actual HTML, too. - unless(defined($content = LWP::Simple::get(sprintf('http://www.collegehumor.com/video:%s', $2)))) { - $self->error('Could not download HTML'); - return undef; - } - $p = HTML::Parser->new(api_version => 3); - - $p->handler(start => \@accum, "tagname, attr"); - $p->report_tags(qw(meta)); - $p->utf8_mode(1); - $p->parse($content); - - # Look for the title in the meta tags - foreach $t (@accum) { - if ('meta' eq $t->[0]) { - if (exists($t->[1]->{'name'}) and ('title' eq $t->[1]->{'name'})) { - $metadata->{'TITLE'} = $t->[1]->{'content'}; - } - } - } + $metadata->{'TITLE'} = $t->{'video'}->{'caption'}; unless(defined($metadata->{'DLURL'}) && defined($metadata->{'TITLE'})) { $self->error('Could not extract download URL and title');