From d738e03f4f2d70a41eba8b77177826d1ff62f42b Mon Sep 17 00:00:00 2001 From: Ralf Ertzinger Date: Thu, 27 Dec 2007 00:03:29 +0100 Subject: [PATCH] - Initial checkin --- videosite.pl | 299 +++++++++++++++++++++++++++++++++++++++ videosite/Base.pm | 123 ++++++++++++++++ videosite/CollegeHumorGrabber.pm | 65 +++++++++ videosite/FileGetter.pm | 84 +++++++++++ videosite/GetterBase.pm | 16 +++ videosite/GrabberBase.pm | 40 ++++++ videosite/HTTPRPCGetter.pm | 51 +++++++ videosite/NullGetter.pm | 24 ++++ videosite/SevenloadGrabber.pm | 65 +++++++++ videosite/YouTubeGrabber.pm | 81 +++++++++++ 10 files changed, 848 insertions(+) create mode 100644 videosite.pl create mode 100644 videosite/Base.pm create mode 100644 videosite/CollegeHumorGrabber.pm create mode 100644 videosite/FileGetter.pm create mode 100644 videosite/GetterBase.pm create mode 100644 videosite/GrabberBase.pm create mode 100644 videosite/HTTPRPCGetter.pm create mode 100644 videosite/NullGetter.pm create mode 100644 videosite/SevenloadGrabber.pm create mode 100644 videosite/YouTubeGrabber.pm diff --git a/videosite.pl b/videosite.pl new file mode 100644 index 0000000..0d4b3e3 --- /dev/null +++ b/videosite.pl @@ -0,0 +1,299 @@ +# autodownload flash videos +# +# (c) 2007 by Ralf Ertzinger +# licensed under GNU GPL v2 +# +# based on trigger.pl by Wouter Coekaerts +# download strategy revised using +# http://www.kde-apps.org/content/show.php?content=41456 +# +# Based on youtube.pl by Christian Garbs + +use strict; +use Irssi 20020324 qw (command_bind command_runsub signal_add_first signal_add_last); +use vars qw($VERSION %IRSSI); +use XML::Simple; +use Data::Dumper; +use File::Spec; + +my @grabbers; +my @getters; +my $getter; +my $conf; +my $conffile = File::Spec->catfile(Irssi::get_irssi_dir(), 'videosite.xml'); + +# activate debug here +my $debug = 0; + +# "message public", SERVER_REC, char *msg, char *nick, char *address, char *target +signal_add_last("message public" => sub {check_for_link(\@_,1,4,2,0);}); +# "message own_public", SERVER_REC, char *msg, char *target +signal_add_last("message own_public" => sub {check_for_link(\@_,1,2,-1,0);}); + +# "message private", SERVER_REC, char *msg, char *nick, char *address +signal_add_last("message private" => sub {check_for_link(\@_,1,-1,2,0);}); +# "message own_private", SERVER_REC, char *msg, char *target, char *orig_target +signal_add_last("message own_private" => sub {check_for_link(\@_,1,2,-1,0);}); + +# "message irc action", SERVER_REC, char *msg, char *nick, char *address, char *target +signal_add_last("message irc action" => sub {check_for_link(\@_,1,4,2,0);}); +# "message irc own_action", SERVER_REC, char *msg, char *target +signal_add_last("message irc own_action" => sub {check_for_link(\@_,1,2,-1,0);}); + +sub write_irssi { + my $witem = shift; + my @text = @_; + + if (defined $witem) { + $witem->print(sprintf(shift(@text), @text), MSGLEVEL_CLIENTCRAP); + } else { + Irssi::print(sprintf(shift(@text), @text)); + } + +} + +sub write_verbose { + if (Irssi::settings_get_bool('youtube_verbose')) { + write_irssi(shift, @_); + } +} + +sub write_debug { + if ($debug) { + write_irssi(shift, @_); + } +} + +sub check_for_link { + my ($signal,$parammessage,$paramchannel,$paramnick,$paramserver) = @_; + my $server = $signal->[$paramserver]; + my $target = $signal->[$paramchannel]; + my $message = ($parammessage == -1) ? '' : $signal->[$parammessage]; + my $g; + my $m; + + + my $witem; + if (defined $server) { + $witem = $server->window_item_find($target); + } else { + $witem = Irssi::window_item_find($target); + } + + # Offer the message to all Grabbers in turn + foreach $g (@grabbers) { + if (defined($m = $g->get($message))) { + write_irssi($witem, '%%R>>> %%NSaving %%Y%s%%N %%G%s', $m->{'TYPE'}, $m->{'TITLE'}); + unless($getter->get($m)) { + write_irssi($witem, '%%R>>> FAILED'); + } + } + } +} + +sub cmd_save { + + open(CONF, '>'.$conffile); + print CONF XML::Simple::XMLout($conf, KeepRoot => 1, KeyAttr => ['module', 'key']); + close(CONF); +} + +sub cmd_set { + my $target = shift; + my $key = shift; + my $val = shift; + my $p; + + foreach $p (@getters, @grabbers) { + if ($p->{'NAME'} eq $target) { + $p->setval($key, $val); + return; + } + } + write_irssi(undef, 'No such module'); +} + +sub cmd_show { + my $target = shift; + my $p; + + if (defined($target)) { + foreach $p (@getters, @grabbers) { + if ($p->{'NAME'} eq $target) { + write_irssi(undef, $p->getconfstr()); + return; + } + } + write_irssi(undef, 'No such module'); + } else { + write_irssi(undef, 'Loaded grabbers:'); + foreach $p (@grabbers) { + write_irssi(undef, ' ' . $p->{'NAME'}); + }; + + write_irssi(undef, 'Loaded getters:'); + foreach $p (@getters) { + write_irssi(undef, ' ' . $p->{'NAME'}); + }; + } +} + +sub cmd_help { + my $target = shift; + my $p; + + if (defined($target)) { + foreach $p (@getters, @grabbers) { + if ($p->{'NAME'} eq $target) { + write_irssi(undef, $p->gethelpstr()); + return; + } + } + write_irssi(undef, 'No such module'); + } else { + write_irssi(undef, "Supported commands:\n save: Save the current configuration\n help [modulename]: Display this help, or module specific help\n show [modulename]: Show loaded modules, or the current parameters of a module\n set modulename parameter value: set a module parameter to a new value\n getter [modulename]: display or set the getter to use"); + } +} + +sub cmd_getter { + my $target = shift; + my $p; + + if (defined($target)) { + foreach $p (@getters, @grabbers) { + if (($p->{'NAME'} eq $target) && ($p->{'TYPE'} eq 'getter')) { + $getter = $p; + $conf->{'videosite'}->{'getter'} = $target; + return; + } + } + write_irssi(undef, 'No such getter'); + } else { + write_irssi(undef, 'Current getter: %s', $conf->{'videosite'}->{'getter'}); + } +} + + +# save on unload +sub sig_command_script_unload { + my $script = shift; + if ($script =~ /(.*\/)?videosite(\.pl)?$/) { + cmd_save(); + } +} + +sub ploader { + + my $dir = shift; + my $pattern = shift; + my $type = shift; + my @list; + my $p; + my $g; + my @g = (); + + opendir(D, $dir) || return (); + @list = grep {/$pattern/ && -f File::Spec->catfile($dir, $_) } readdir(D); + closedir(D); + + foreach $p (@list) { + write_debug(undef, "Trying to load $p:"); + $p =~ s/\.pm$//; + eval qq{ require $p; }; + if ($@) { + write_debug(undef, "Failed to load plugin: $@"); + next; + } + + $g = eval $p.q{->new();}; + if ($@) { + write_debug(undef, "Failed to instanciate: $@"); + next; + } + + write_debug(undef, "found $g->{'TYPE'} $g->{'NAME'}"); + if ($type eq $g->{'TYPE'}) { + push(@g, $g); + } else { + write_irssi(undef, '%s has wrong type (got %s, expected %s)', $p, $g->{'TYPE'}, $type); + } + } + + write_debug(undef, "Loaded %d plugins", $#g+1); + + return @g; +} + +sub init_videosite { + + my $p; + + unless(-r $conffile && defined($conf = XML::Simple::XMLin($conffile, ForceArray => ['config', 'option'], KeepRoot => 1, KeyAttr => ['module', 'key']))) { + # No config, start with an empty one + write_debug(undef, 'No config found, using defaults'); + $conf = { 'videosite' => { 'getter' => '' }}; + } + + unshift(@INC, File::Spec->catfile(Irssi::get_irssi_dir(), 'scripts', 'videosite')); + @grabbers = ploader(File::Spec->catfile(Irssi::get_irssi_dir(), 'scripts', 'videosite'), '.*Grabber\.pm$', 'grabber'); + @getters = ploader(File::Spec->catfile(Irssi::get_irssi_dir(), 'scripts', 'videosite'), '.*Getter\.pm$', 'getter'); + + unless (defined(@grabbers) && defined(@getters)) { + write_irssi(undef, 'No grabbers or no getters found, can not proceed.'); + return; + } + + $getter = $getters[0]; + foreach $p (@getters) { + if ($conf->{'videosite'}->{'getter'} eq $p->{'NAME'}) { + $getter = $p; + } + } + write_debug(undef, 'Selected %s as getter', $getter->{'NAME'}); + $conf->{'videosite'}->{'getter'} = $getter->{'NAME'}; + + # Loop through all plugins and load the config + foreach $p (@grabbers, @getters) { + $conf->{'videosite'}->{'config'}->{$p->{'NAME'}} = $p->mergeconfig($conf->{'videosite'}->{'config'}->{$p->{'NAME'}}); + } + + Irssi::signal_add_first('command script load', 'sig_command_script_unload'); + Irssi::signal_add_first('command script unload', 'sig_command_script_unload'); + + Irssi::command_bind('videosite' => \&cmdhandler); +} + +sub cmdhandler { + my ($data, $server, $item) = @_; + my @params = split(/\s+/, $data); + + if ($params[0] eq 'save') { + cmd_save(); + } elsif ($params[0] eq 'set') { + shift(@params); + cmd_set(@params); + } elsif ($params[0] eq 'show') { + shift(@params); + cmd_show(@params); + } elsif ($params[0] eq 'help') { + shift(@params); + cmd_help(@params); + } elsif ($params[0] eq 'getter') { + shift(@params); + cmd_getter(@params); + } elsif ($params[0] eq 'debug') { + $debug = 1; + foreach (@grabbers, @getters) { + $_->setdebug(1); + } + write_irssi(undef, 'Enabled debugging'); + } elsif ($params[0] eq 'nodebug') { + $debug = 0; + foreach (@grabbers, @getters) { + $_->setdebug(0); + } + write_irssi(undef, 'Disabled debugging'); + } +} + +init_videosite(); diff --git a/videosite/Base.pm b/videosite/Base.pm new file mode 100644 index 0000000..78f0741 --- /dev/null +++ b/videosite/Base.pm @@ -0,0 +1,123 @@ +package Base; + +use strict; +use Irssi; + +sub new { + my $class = shift; + my $self = {'_DEBUG' => 0}; + + bless($self, $class); + + $self->_prepare_parameters(); + + return $self; +} + +sub error { + my $self = shift; + my @data = @_; + + Irssi::print(sprintf(shift(@_), @_)); +} + +sub debug { + my $self = shift; + my @data = @_; + + $data[0] = "DEBUG: " . $data[0]; + if ($self->{'_DEBUG'} != 0) {$self->error(@data)}; +} + +sub mergeconfig { + my $self = shift; + my $c = shift; + my $o; + + foreach $o (keys(%{$c->{'option'}})) { + if (exists($self->{'_CONFIG'}->{'option'}->{$o})) { + $self->{'_CONFIG'}->{'option'}->{$o}->{'content'} = $c->{'option'}->{$o}->{'content'}; + } + } + + return $self->{'_CONFIG'}; +} + +sub _prepare_parameters { + my $self = shift; + my $p; + + $self->{'_CONFIG'} = { 'module' => $self->{'NAME'}, + 'option' => {}}; + + foreach $p (keys(%{$self->{'_PARAMS'}})) { + $self->{'_CONFIG'}->{'option'}->{$p}->{'content'} = $self->{'_PARAMS'}->{$p}->[0]; + } +} + +sub _getval { + my $self = shift; + my $key = shift; + + return $self->{'_CONFIG'}->{'option'}->{$key}->{'content'} +} + +sub setval { + my $self = shift; + my $key = shift; + my $val = shift; + + if (exists($self->{'_CONFIG'}->{'option'}->{$key})) { + $self->{'_CONFIG'}->{'option'}->{$key}->{'content'} = $val; + } else { + $self->error('Module %s does not have a parameter named %s', $self->{'NAME'}, $key); + } +} + +sub getconfstr { + my $self = shift; + my $s = 'Options for ' . $self->{'NAME'} . ":\n"; + my $k; + my $p; + + foreach $k (keys(%{$self->{'_CONFIG'}->{'option'}})) { + $p = $self->{'_CONFIG'}->{'option'}->{$k}->{'content'}; + $p =~ s/%/%%/g; + $s .= sprintf(" %s: %s", $k, $p); + if ($self->{'_CONFIG'}->{'option'}->{$k}->{'content'} == $self->{'_PARAMS'}->{$k}->[0]) { + $s .= " (default)\n"; + } else { + $s .= "\n"; + } + } + + return $s; +} + +sub gethelpstr { + my $self = shift; + my $s = 'Help for ' . $self->{'NAME'} . ":\n"; + my $k; + my $p; + + if (exists($self->{'DESC'})) { + $s .= "Description:\n " . $self->{'DESC'}; + } + + $s .= " Options:\n"; + foreach $k (keys(%{$self->{'_CONFIG'}->{'option'}})) { + $p = $self->{'_PARAMS'}->{$k}->[0]; + $p =~ s/%/%%/g; + $s .= sprintf(" %s: %s (default: %s)\n", $k, $self->{'_PARAMS'}->{$k}->[1], $p); + } + + return $s; +} + +sub setdebug { + my $self = shift; + + $self->{'_DEBUG'} = shift; +} + +1; diff --git a/videosite/CollegeHumorGrabber.pm b/videosite/CollegeHumorGrabber.pm new file mode 100644 index 0000000..9fdb87e --- /dev/null +++ b/videosite/CollegeHumorGrabber.pm @@ -0,0 +1,65 @@ +package CollegeHumorGrabber; + +use GrabberBase; +@ISA = qw(GrabberBase); + +use LWP::Simple qw(!get); +use XML::Simple; +use Data::Dumper; + +use strict; + +sub new { + my $class = shift; + my $self = $class->SUPER::new(); + + $self->{'NAME'} = 'collegehumor'; + $self->{'PATTERNS'} = ['(http://www.collegehumor.com/video:(\d+))']; + + bless($self, $class); + $self->_prepare_parameters(); + + return $self; +} + +sub _parse { + my $self = shift; + my $url = shift; + my $pattern = shift; + my $content; + my $metadata = {}; + my $p = XML::Simple->new(); + my $t; + + $url =~ m|$pattern|; + $url = $1; + + $metadata->{'URL'} = $url; + $metadata->{'ID'} = $2; + $metadata->{'TYPE'} = 'collegehumor'; + $metadata->{'TITLE'} = undef; + $metadata->{'DLURL'} = undef; + + # Get the XML file containing the video metadata + unless(defined($content = LWP::Simple::get(sprintf('http://www.collegehumor.com/moogaloop/video:%s', $2)))) { + $self->error('Could not download XML metadata'); + return undef; + } + + unless(defined($t = $p->XMLin($content))) { + $self->error('Could not parse XML metadata'); + return undef; + } + + $metadata->{'DLURL'} = $t->{'video'}->{'file'}; + $metadata->{'TITLE'} = $t->{'video'}->{'caption'}; + + unless(defined($metadata->{'DLURL'}) && defined($metadata->{'TITLE'})) { + $self->error('Could not extract download URL and title'); + return undef; + } + + return $metadata; +} + +1; diff --git a/videosite/FileGetter.pm b/videosite/FileGetter.pm new file mode 100644 index 0000000..a01c285 --- /dev/null +++ b/videosite/FileGetter.pm @@ -0,0 +1,84 @@ +package FileGetter; + +use GetterBase; +@ISA = qw(GetterBase); + +use strict; +use LWP::Simple qw(!get); +use File::Basename; + +sub new { + my $class = shift; + my $self = $class->SUPER::new(); + + $self->{'NAME'} = 'filegetter'; + $self->{'_PARAMS'} = {'MINFREE' => ['500000', 'The amount of space that needs to be available on the filesystem before the video is downloaded (in kilobytes)'], 'FILEPATTERN', => ['/tmp/%s - %s - %s.flv', "The file name to save the file under. This is a string which is passed to a sprintf call later on. The parameters passed to that sprintf call, in order, are:\n- The site the video is from\n- The ID of the video\n- The title of the video\n- The URL of the video file itself\n- The URL of the site the video was taken from\nAll parameters are encoded (space and / replaced by _)"]}; + + bless($self, $class); + $self->_prepare_parameters(); + + return $self; +} + +sub get { + my $self = shift; + my $video = shift; + my $dlfile; + my $dirname; + + $dlfile = sprintf($self->_getval('FILEPATTERN'), + $self->_encode($video->{'TYPE'}), + $self->_encode($video->{'ID'}), + $self->_encode($video->{'TITLE'}), + $self->_encode($video->{'DLURL'}), + $self->_encode($video)->{'URL'}); + + $dirname = dirname($dlfile); + if ($self->_diskfree($dirname) < $self->_getval('MINFREE')) { + $self->error("Not enough free space to download"); + return 0; + } + + $self->debug('Going to download %s to %s', $video->{'DLURL'}, $dlfile); + + if (200 != LWP::Simple::mirror($video->{'DLURL'}, $dlfile)) { + $self->error('Could not download %s to %s', $video->{'DLURL'}, $dlfile); + return 0; + } + + return 1; +} + + +sub _encode { + my $self = shift; + my $s = shift; + + $s =~ s|[/ ]|_|g; + + return $s; +} + +sub _diskfree { + + # poor man's df + # if you want it portable, use Filesys::Statvfs + + my $self = shift; + my $dir = shift; + my $size; + + open DF, "df -P $dir|" or return 0; + my $line = ; # skip header + + if ( $line = ) { + if ($line =~ /\s(\d+)\s+\d{1,3}% (\/.*)$/) { + $size = $1; + } + } else { + $size = -1; #some error occurred + } + + close DF; + return $size; +} diff --git a/videosite/GetterBase.pm b/videosite/GetterBase.pm new file mode 100644 index 0000000..c1eb5b5 --- /dev/null +++ b/videosite/GetterBase.pm @@ -0,0 +1,16 @@ +package GetterBase; + +use Base; +@ISA = qw(Base); + +use strict; + +sub new { + my $class = shift; + my $self = { + NAME => 'FlashGetter', + TYPE => 'getter' + }; + + return bless($self, $class); +} diff --git a/videosite/GrabberBase.pm b/videosite/GrabberBase.pm new file mode 100644 index 0000000..5dbb36b --- /dev/null +++ b/videosite/GrabberBase.pm @@ -0,0 +1,40 @@ +package GrabberBase; +use Base; +@ISA = qw(Base); + +use strict; + +sub new { + my $class = shift; + my $self = { + NAME => 'FlashGrab', + TYPE => 'grabber', + PATTERNS => [], + }; + return bless($self, $class); +} + +sub get($$) { + my $self = shift; + my $url = shift; + my $pattern; + + foreach $pattern (@{$self->{'PATTERNS'}}) { + $self->debug("Matching %s against %s", $pattern, $url); + if ($url =~ m|$pattern|) { + $self->debug("Match"); + return $self->_parse($url, $pattern); + } + } + + return undef; +} + +sub _parse { + my $self = shift; + my $url = shift; + + return undef; +} + +1; diff --git a/videosite/HTTPRPCGetter.pm b/videosite/HTTPRPCGetter.pm new file mode 100644 index 0000000..007fd6c --- /dev/null +++ b/videosite/HTTPRPCGetter.pm @@ -0,0 +1,51 @@ +package HTTPRPCGetter; + +use GetterBase; +@ISA = qw(GetterBase); + +use strict; +use LWP::Simple qw(!get); + +sub new { + my $class = shift; + my $self = $class->SUPER::new(); + + $self->{'NAME'} = 'HTTPRPCGetter'; + $self->{'_PARAMS'} = {'URL' => ['http://www.example.com/get.pl?type=%s&vid=%s&title=%s&url=%s', "The URL to call in order to trigger a download. This is a string which is passed to a sprintf call later on. The parameters passed to that sprintf call, in order, are:\n- The site the video is from\n- The ID of the video\n- The title of the video\n- The URL of the video file itself\n- The URL of the site the video was taken from\nAll parameters are hexencoded"]}; + + bless($self, $class); + $self->_prepare_parameters(); + + return $self; +} + +sub get { + my $self = shift; + my $video = shift; + my $callurl; + + $callurl = sprintf($self->_getval('URL'), + $self->_encode($video->{'TYPE'}), + $self->_encode($video->{'ID'}), + $self->_encode($video->{'TITLE'}), + $self->_encode($video->{'DLURL'}), + $self->_encode($video->{'URL'})); + + $self->debug('Going to call %s', $callurl); + + unless(defined(LWP::Simple::get($callurl))) { + $self->error("Error calling RPC"); + return 0; + } + + return 1; +} + +sub _encode { + my $self = shift; + my $s = shift; + + $s =~ s/(.)/sprintf("%%%02x", ord($1))/ge; + + return $s; +} diff --git a/videosite/NullGetter.pm b/videosite/NullGetter.pm new file mode 100644 index 0000000..ad97294 --- /dev/null +++ b/videosite/NullGetter.pm @@ -0,0 +1,24 @@ +package NullGetter; + +use GetterBase; +@ISA = qw(GetterBase); + +use strict; +use LWP::Simple qw(!get); + +sub new { + my $class = shift; + my $self = $class->SUPER::new(); + + $self->{'NAME'} = 'nullgetter'; + + bless($self, $class); + $self->_prepare_parameters(); + + return $self; +} + +sub get { + + return 1; +} diff --git a/videosite/SevenloadGrabber.pm b/videosite/SevenloadGrabber.pm new file mode 100644 index 0000000..83ba8f6 --- /dev/null +++ b/videosite/SevenloadGrabber.pm @@ -0,0 +1,65 @@ +package SevenloadGrabber; + +use GrabberBase; +@ISA = qw(GrabberBase); + +use LWP::Simple qw(!get); +use XML::Simple; +use Data::Dumper; + +use strict; + +sub new { + my $class = shift; + my $self = $class->SUPER::new(); + + $self->{'NAME'} = 'sevenload'; + $self->{'PATTERNS'} = ['(http://(?:[-a-zA-Z0-9_.]+\.)*sevenload.com/videos/([^/]+)(?:/.*)*)']; + + bless($self, $class); + $self->_prepare_parameters(); + + return $self; +} + +sub _parse { + my $self = shift; + my $url = shift; + my $pattern = shift; + my $content; + my $metadata = {}; + my $p = XML::Simple->new(); + my $t; + + $url =~ m|$pattern|; + $url = $1; + + $metadata->{'URL'} = $url; + $metadata->{'ID'} = $2; + $metadata->{'TYPE'} = 'sevenload'; + $metadata->{'TITLE'} = undef; + $metadata->{'DLURL'} = undef; + + # Get the XML file containing the video metadata + unless(defined($content = LWP::Simple::get(sprintf('http://api.sevenload.com/api/player/id/%s', $2)))) { + $self->error('Could not download XML metadata'); + return undef; + } + + unless(defined($t = $p->XMLin($content))) { + $self->error('Could not parse XML metadata'); + return undef; + } + + $metadata->{'DLURL'} = $t->{'item'}->{'video'}->{'url'}; + $metadata->{'TITLE'} = $t->{'item'}->{'title'}; + + unless(defined($metadata->{'DLURL'}) && defined($metadata->{'TITLE'})) { + $self->error('Could not extract download URL and title'); + return undef; + } + + return $metadata; +} + +1; diff --git a/videosite/YouTubeGrabber.pm b/videosite/YouTubeGrabber.pm new file mode 100644 index 0000000..bf7640c --- /dev/null +++ b/videosite/YouTubeGrabber.pm @@ -0,0 +1,81 @@ +package YouTubeGrabber; + +use GrabberBase; +@ISA = qw(GrabberBase); + +use LWP::Simple qw(!get); +use HTML::Parser; +use Data::Dumper; + +use strict; + +sub new { + my $class = shift; + my $self = $class->SUPER::new(); + + $self->{'NAME'} = 'youtube'; + $self->{'PATTERNS'} = ['(http://(?:[-a-zA-Z0-9_.]+\.)*youtube.(?:com|de|co.uk)/watch\?(?:.+=.+&)*v=([-a-zA-Z0-9_]+))', + '(http://(?:[-a-zA-Z0-9_.]+\.)*youtube.(?:com|de|co.uk)/v/([-a-zA-Z0-9_]+))']; + + bless($self, $class); + $self->_prepare_parameters(); + + return $self; +} + +sub _parse { + my $self = shift; + my $url = shift; + my $pattern = shift; + my $content; + my $metadata = {}; + my $p = HTML::Parser->new(api_version => 3); + my @accum; + my @text; + my $e; + + $url =~ m|$pattern|; + $url = $1; + + $metadata->{'URL'} = $url; + $metadata->{'ID'} = $2; + $metadata->{'TYPE'} = 'youtube'; + $metadata->{'TITLE'} = undef; + $metadata->{'DLURL'} = undef; + + unless(defined($content = LWP::Simple::get(sprintf('http://youtube.com/watch?v=%s', $2)))) { + $self->error('Could not download %s', $url); + return undef; + } + + $p->handler(start => \@accum, "tagname, attr"); + $p->handler(text => \@text, "text"); + $p->report_tags(qw(meta script)); + $p->utf8_mode(1); + $p->parse($content); + + # Look for the title in the meta tags + foreach $e (@accum) { + if ('meta' eq $e->[0]) { + if ('title' eq $e->[1]->{'name'}) { + $metadata->{'TITLE'} = $e->[1]->{'content'}; + } + } + } + + # Look for the download URL + foreach $e (@text) { + if ($e->[0] =~ m|/watch_fullscreen\?(.*)\&fs|) { + $metadata->{'DLURL'} = 'http://www.youtube.com/get_video.php?' . $1; + } + } + + unless(defined($metadata->{'DLURL'}) && defined($metadata->{'TITLE'})) { + $self->error('Could not determine download URL'); + return undef; + } + + return $metadata; +} + +1; -- 1.8.3.1