X-Git-Url: https://git.camperquake.de/gitweb.cgi?a=blobdiff_plain;f=videosite%2FCollegeHumorGrabber.pm;h=4388a312f38903887f07dfa29439d24bab874bfe;hb=400621043a9ee2217a07e2afe5cedf23a2491d69;hp=546e669d1f05f7ea8ee749296a5194e4dc4ffa49;hpb=01b3e34609b4e32c187dd2ded169efd19a58a6bb;p=videosite.git diff --git a/videosite/CollegeHumorGrabber.pm b/videosite/CollegeHumorGrabber.pm index 546e669..4388a31 100644 --- a/videosite/CollegeHumorGrabber.pm +++ b/videosite/CollegeHumorGrabber.pm @@ -8,6 +8,7 @@ package CollegeHumorGrabber; use GrabberBase; @ISA = qw(GrabberBase); +use HTMLHelper; use LWP::Simple qw(!get); use XML::Simple; use Data::Dumper; @@ -34,6 +35,7 @@ sub _parse { my $content; my $metadata = {}; my $p = XML::Simple->new(); + my @accum; my $t; $url =~ m|$pattern|; @@ -58,7 +60,20 @@ sub _parse { } $metadata->{'DLURL'} = $t->{'video'}->{'file'}; - $metadata->{'TITLE'} = $t->{'video'}->{'caption'}; + + # The XML does not contain the full title of the video, for + # reasons possibly known to some jerk at CollegeHumor. + # So we'll have to parse the actual HTML, too. + $p = HTMLHelper->new(); + unless(defined($content = $p->load(sprintf('http://www.collegehumor.com/video:%s', $2)))) { + $self->error('Could not download HTML'); + return undef; + } + + $t = $p->findnodes('meta[@name="title"]'); + if (defined($t)) { + $metadata->{'TITLE'} = $t->{'content'}; + } unless(defined($metadata->{'DLURL'}) && defined($metadata->{'TITLE'})) { $self->error('Could not extract download URL and title');