X-Git-Url: https://git.camperquake.de/gitweb.cgi?a=blobdiff_plain;f=videosite%2FDoubleVikingGrabber.pm;fp=videosite%2FDoubleVikingGrabber.pm;h=f5b95e6e37e6e11a139bfb7cbe0347f00ded1ebb;hb=f3c0e1f809aa082e024a318d644a3b9fd4ee3b69;hp=0000000000000000000000000000000000000000;hpb=3707340a7b096e4cedb3ab3ae4f663043e82c9ee;p=videosite.git diff --git a/videosite/DoubleVikingGrabber.pm b/videosite/DoubleVikingGrabber.pm new file mode 100644 index 0000000..f5b95e6 --- /dev/null +++ b/videosite/DoubleVikingGrabber.pm @@ -0,0 +1,78 @@ +package DoubleVikingGrabber; + +use GrabberBase; +@ISA = qw(GrabberBase); + +use LWP::Simple qw(!get); +use HTML::TokeParser; +use Data::Dumper; + +use strict; + +sub new { + my $class = shift; + my $self = $class->SUPER::new(); + + $self->{'NAME'} = 'doubleviking'; + $self->{'PATTERNS'} = ['(http://(?:[-a-zA-Z0-9_.]+\.)*doubleviking.com/videos/(?:[-a-zA-Z0-9_ %]+/)*page0\.html/(\d+)\.html$)']; + + bless($self, $class); + $self->_prepare_parameters(); + + return $self; +} + +sub _parse { + my $self = shift; + my $url = shift; + my $pattern = shift; + my $content; + my $metadata = {}; + my $p; + my @accum; + my @text; + my $e; + + $url =~ m|$pattern|; + $url = $1; + + $metadata->{'URL'} = $url; + $metadata->{'ID'} = $2; + $metadata->{'TYPE'} = 'video'; + $metadata->{'SOURCE'} = 'doubleviking'; + $metadata->{'TITLE'} = undef; + $metadata->{'DLURL'} = undef; + + unless(defined($content = LWP::Simple::get(sprintf('http://www.doubleviking.com/videos/page0.html/%s.html', $2)))) { + $self->error('Could not download %s', $url); + return undef; + } + + $p = HTML::TokeParser->new(\$content); + + # Look for the title + if ($p->get_tag('title')) { + $metadata->{'TITLE'} = $p->get_text(); + $metadata->{'TITLE'} =~ s/^\s*(.*) - DoubleViking Video\s*$/$1/im; + } + + # Look for the download URL + while ($e = $p->get_tag('embed')) { + if (exists($e->[1]{'flashvars'})) { + print Dumper \$e; + my $c = $e->[1]{'flashvars'}; + if ($c =~ m|\&file=([^\&]+)\&|) { + $metadata->{'DLURL'} = $1; + } + } + } + + unless(defined($metadata->{'DLURL'}) && defined($metadata->{'TITLE'})) { + $self->error('Could not determine download URL'); + return undef; + } + + return $metadata; +} + +1;