# Grabber for collegehumor.com
#
# (c) 2007 by Ralf Ertzinger <ralf@camperquake.de>
# licensed under GNU GPL v2

package CollegeHumorGrabber;

use GrabberBase;
@ISA = qw(GrabberBase);

use LWP::Simple qw(!get);
use XML::Simple;
use HTML::Parser;
use Data::Dumper;

use strict;

sub new {
    my $class = shift;
    my $self = $class->SUPER::new();

    $self->{'NAME'} = 'collegehumor';
    $self->{'PATTERNS'} = ['(http://www.collegehumor.com/video:(\d+))'];

    bless($self, $class);
    $self->_prepare_parameters();

    return $self;
}

sub _parse {
    my $self = shift;
    my $url = shift;
    my $pattern = shift;
    my $content;
    my $metadata = {};
    my $p = XML::Simple->new();
    my @accum;
    my $t;

    $url =~ m|$pattern|;
    $url = $1;

    $metadata->{'URL'} = $url;
    $metadata->{'ID'} = $2;
    $metadata->{'TYPE'} = 'video';
    $metadata->{'SOURCE'} = $self->{'NAME'};
    $metadata->{'TITLE'} = undef;
    $metadata->{'DLURL'} = undef;

    # Get the XML file containing the video metadata
    unless(defined($content = LWP::Simple::get(sprintf('http://www.collegehumor.com/moogaloop/video:%s', $2)))) {
        $self->error('Could not download XML metadata');
        return undef;
    }

    unless(defined($t = $p->XMLin($content))) {
        $self->error('Could not parse XML metadata');
        return undef;
    }

    $metadata->{'DLURL'} = $t->{'video'}->{'file'};

    # The XML does not contain the full title of the video, for
    # reasons possibly known to some jerk at CollegeHumor.
    # So we'll have to parse the actual HTML, too.
    unless(defined($content = LWP::Simple::get(sprintf('http://www.collegehumor.com/video:%s', $2)))) {
        $self->error('Could not download HTML');
        return undef;
    }
    $p = HTML::Parser->new(api_version => 3);

    $p->handler(start => \@accum, "tagname, attr");
    $p->report_tags(qw(meta));
    $p->utf8_mode(1);
    $p->parse($content);

    # Look for the title in the meta tags
    foreach $t (@accum) {
        if ('meta' eq $t->[0]) {
            if ('title' eq $t->[1]->{'name'}) {
                $metadata->{'TITLE'} = $t->[1]->{'content'};
            }
        }
    }

    unless(defined($metadata->{'DLURL'}) && defined($metadata->{'TITLE'})) {
        $self->error('Could not extract download URL and title');
        return undef;
    }

    return $metadata;
}

1;