X-Git-Url: https://git.camperquake.de/gitweb.cgi?a=blobdiff_plain;f=videosite%2FCollegeHumorGrabber.pm;h=7fb3cae4174198e4fb8220dc65d2820a8ba12a60;hb=d4fae22c083e110d1c788a78196f85d1e39b116e;hp=192ece592710591715a8ab6b8a593a009231ef97;hpb=911eeb36e674f916d08b04cd9c48bb33e96bf108;p=videosite.git diff --git a/videosite/CollegeHumorGrabber.pm b/videosite/CollegeHumorGrabber.pm index 192ece5..7fb3cae 100644 --- a/videosite/CollegeHumorGrabber.pm +++ b/videosite/CollegeHumorGrabber.pm @@ -9,7 +9,6 @@ use videosite::GrabberBase; @ISA = qw(videosite::GrabberBase); use videosite::HTMLHelper; -use LWP::Simple qw(!get); use XML::Simple; use Data::Dumper; @@ -20,7 +19,10 @@ sub new { my $self = $class->SUPER::new(); $self->{'NAME'} = 'collegehumor'; - $self->{'PATTERNS'} = ['(http://www.collegehumor.com/video:(\d+))']; + $self->{_SELFTESTURL} = 'http://www.collegehumor.com/video/5635400/pixar-intro-parody'; + $self->{_SELFTESTTITLE} = 'Pixar Intro Parody'; + $self->{'PATTERNS'} = ['(http://www.collegehumor.com/video:(\d+))', + '(http://www.collegehumor.com/video/(\d+))']; bless($self, $class); $self->_prepare_parameters(); @@ -49,7 +51,7 @@ sub _parse { $metadata->{'DLURL'} = undef; # Get the XML file containing the video metadata - unless(defined($content = LWP::Simple::get(sprintf('http://www.collegehumor.com/moogaloop/video:%s', $2)))) { + unless(defined($content = $self->simple_get(sprintf('http://www.collegehumor.com/moogaloop/video/%s', $2)))) { $self->error('Could not download XML metadata'); return undef; } @@ -60,20 +62,7 @@ sub _parse { } $metadata->{'DLURL'} = $t->{'video'}->{'file'}; - - # The XML does not contain the full title of the video, for - # reasons possibly known to some jerk at CollegeHumor. - # So we'll have to parse the actual HTML, too. - $p = HTMLHelper->new(); - unless(defined($content = $p->load(sprintf('http://www.collegehumor.com/video:%s', $2)))) { - $self->error('Could not download HTML'); - return undef; - } - - $t = $p->findnodes('meta[@name="title"]'); - if (defined($t)) { - $metadata->{'TITLE'} = $t->{'content'}; - } + $metadata->{'TITLE'} = $t->{'video'}->{'caption'}; unless(defined($metadata->{'DLURL'}) && defined($metadata->{'TITLE'})) { $self->error('Could not extract download URL and title');