1 # library to autodownload flash videos
3 # (c) 2007-2008 by Ralf Ertzinger <ralf@camperquake.de>
4 # licensed under GNU GPL v2
6 # Based on youtube.pl by Christian Garbs <mitch@cgarbs.de>
8 # based on trigger.pl by Wouter Coekaerts <wouter@coekaerts.be>
13 use vars qw(@ISA @EXPORT_OK);
20 use JSON -support_by_pp;
21 use File::Temp qw(tempfile);
25 @EXPORT_OK = qw(init register_api check_for_link);
30 my %debugwindows = ();
34 my %builtin_config = ();
35 my $builtin_config_path;
36 my $builtin_config_default;
38 my %config_cache = ();
42 # The default config. These values will be set in the config
43 # if they do not exist already.
46 'getter' => 'filegetter',
48 'active-connectors' => 'direct',
49 'defined-connectors' => 'direct',
57 'name' => 'environment',
62 'config-version' => '2',
66 # This is a list of default values for the remote API. These
67 # are used if the values are not registered by the library user.
70 io => sub { print @_, "\n" },
71 config_init => \&_builtin_config_init,
72 config_get => \&_builtin_config_get,
73 config_set => \&_builtin_config_set,
74 config_has => \&_builtin_config_has,
75 config_save => \&_builtin_config_save,
76 config_del => \&_builtin_config_del,
77 color => sub { return '' },
78 module_path => sub { return dirname(realpath($0)) },
79 quote => sub { return $_ },
81 wait_for_child => sub {},
85 # List of known commands and handlers
87 my $videosite_commands = {
117 $remote_api->{reload}->();
142 # Output a string on the client.
143 # Works like (s)printf in that it takes a format string and a list of
144 # values to be replaced. Undefined values will be printed as '(undef)'
146 # All parameters (except for the format string itself) will be quoted
147 # using the client specific quote function
153 @text = ('') unless(@text);
155 # This will define the outputprefix once, so we don't have
156 # do do this every time.
157 $outputprefix = sprintf("%svideosite: %s",
158 _colorpair('magenta'),
159 _colorpair()) unless(defined($outputprefix));
160 $format = $outputprefix . shift(@text);
163 # The format string is assumed to be appropriately quoted.
164 # Quote the rest of the text, replacing undefined strings by (undef)
166 @text = map { defined($_)?$remote_api->{quote}->($_):'(undef)' } @text;
168 $outputstack[0]->{io}->(sprintf($format, @text));
172 # Recursively walk through a hash-of-hashes, calling the given function
173 # for each found leaf with the path to the leaf
175 sub _recursive_hash_walk {
177 my $callback = shift;
180 foreach (keys(%{$hash})) {
181 if (ref($hash->{$_}) eq 'HASH') {
182 _recursive_hash_walk($hash->{$_}, $callback, @path, $_);
184 $callback->([@path, $_], $hash->{$_});
190 # Return the color code for the given foreground/background color
191 # pair. Both can be undef, which means "default"
196 $fg = defined($fg)?$fg:'default';
197 $bg = defined($bg)?$bg:'default';
199 return $remote_api->{color}->($fg, $bg);
203 # Sets the given config item if it is not set already
205 sub _init_config_item {
209 unless(_config_has($path)) {
210 _config_set($path, $value);
215 # Print a message if debug is enabled
220 $data[0] = "DEBUG: " . $data[0];
222 # Check for global debug
226 # Check if current window is in the per-window-debug list
227 if (exists($debugwindows{$outputstack[0]->{window}})) {
234 # Load a list of modules matching a pattern from a given directory.
246 opendir(D, $dir) || return ();
247 @list = grep {/$pattern/ && -f File::Spec->catfile($dir, $_) } readdir(D);
251 _debug("Trying to load $p:");
254 load "videosite::$p";
257 _io("Failed to load plugin: $@");
262 $g = "videosite::$p"->new();
265 _io("Failed to instanciate: $@");
270 _debug("found $g->{'TYPE'} $g->{'NAME'}");
271 if ($type eq $g->{'TYPE'}) {
275 io_debug => \&_debug,
276 connectors => sub { return _connectorlist('active-connectors') },
277 config_get => \&_config_get,
278 config_set => \&_config_set,
279 config_has => \&_config_has,
280 wait_for_child => $remote_api->{wait_for_child},
283 _io('%s has wrong type (got %s, expected %s)', $p, $g->{'TYPE'}, $type);
288 _debug("Loaded %d plugins", $#g+1);
294 # Populate the @grabbers and @getters lists from the given
297 sub _load_modules($) {
301 foreach (keys(%INC)) {
302 if ($INC{$_} =~ m|^$path|) {
303 _debug("Removing %s from \$INC", $_);
307 @grabbers = _ploader($path, '.*Grabber\.pm$', 'grabber');
308 @getters = _ploader($path, '.*Getter\.pm$', 'getter');
312 # Wrapper functions for config management to put in
317 my $dotpath = join('.', @{$path});
320 if ($config_cache && exists($config_cache{$dotpath}) && exists($config_cache{$dotpath}->{value})) {
321 $value = $config_cache{$dotpath}->{value};
323 $value = $remote_api->{config_get}->($path);
324 $config_cache{$dotpath} = {value => $value, has => 1};
328 _debug("config: getting %s=%s", $dotpath, $value);
334 my $dotpath = join('.', @{$path});
337 _debug("config: setting %s=%s", $dotpath, $value);
338 $config_cache{$dotpath} = {value => $value, has => 1};
339 return $remote_api->{config_set}->($path, $value);
344 my $dotpath = join('.', @{$path});
347 if ($config_cache && exists($config_cache{$dotpath}) && exists($config_cache{$dotpath}->{has})) {
348 $b = $config_cache{$dotpath}->{has};
350 $b = $remote_api->{config_has}->($path);
351 $config_cache{$dotpath}->{has} = $b;
354 _debug("config: testing %s (%s)", $dotpath, $b?'true':'false');
360 my $dotpath = join('.', @{$path});
362 _debug("config: removing %s", $dotpath);
363 delete($config_cache{$dotpath});
364 $remote_api->{config_del}->($path);
368 # The _config_list_* are helper functions taking a path to a comma separated
369 # string. The string is interpreted as a list and the action performed
370 # on it, storing back the modified version
374 # Add an item to the list, checking for duplicates
376 sub _config_list_add {
381 if (_config_has($path)) {
382 @c = split(/\s*,\s*/, _config_get($path));
387 _debug("Adding %s to list %s", $item, join(".", @{$path}));
388 unless(grep { $_ eq $item } @c) {
392 _config_set($path, join(',', @c));
396 # Remove an item from the list
398 sub _config_list_del {
403 unless(_config_has($path)) {
407 _debug("Removing %s from list %s", $item, join('.', @{$path}));
408 @c = grep { $item ne $_ } split(/\s*,\s*/, _config_get($path));
410 _config_set($path, join(',', @c));
414 # Return true if the item contains the given list, false otherwise
416 sub _config_list_has {
420 unless(_config_has($path)) {
424 _debug("Checking for %s in list %s", $item, join('.', @{$path}));
426 return grep { $item eq $_ } split(/\s*,\s*/, _config_get($path));
430 # Replace a list with the given items
432 sub _config_list_set {
435 _debug("Replacing %s with (%s)", join('.', @{$path}), join(",", @_));
437 _config_set($path, join(',', @_));
441 # Return the list of currently active connectors, in the configured
448 foreach(split(/,/, _config_get([$key]))) {
449 push(@c, _unserialize_connector_hash($_));
456 # Convert a connector hash from it's config structure back to a perl
459 sub _unserialize_connector_hash {
463 if (_config_has(['connectors', $name, 'name'])) {
464 $connector->{name} = _config_get(['connectors', $name, 'name']);
465 $connector->{schemas} = {};
466 foreach ('http', 'https') {
467 if (_config_has(['connectors', $name, 'schemas', $_])) {
468 $connector->{schemas}->{$_} = _config_get(['connectors', $name, 'schemas', $_]);
473 _debug("Returning connector %s: %s", $name, Dumper($connector));
479 # Push a new output function on the IO stack.
482 unshift(@outputstack, shift);
486 # Pop the topmost output function from the stack, leaving
487 # at least one function on it.
490 if (scalar(@outputstack) > 0) {
496 # Takes a string and replaces commonly used URL shorteners recursively,
497 # up to 10 levels deep
499 sub _expand_url_shortener {
503 'is\.gd/[[:alnum:]]+',
504 'otf\.me/[[:alnum:]]+',
505 'hel\.me/[[:alnum:]]+',
506 '7ax\.de/[[:alnum:]]+',
507 'ow\.ly/[[:alnum:]]+',
508 'j\.mp/[[:alnum:]]+',
509 'bit\.ly/[[:alnum:]]+',
510 'tinyurl\.com/[[:alnum:]]+',
511 'pop\.is/[[:alnum:]]+',
512 'post\.ly/[[:alnum:]]+',
513 '1\.ly/[[:alnum:]]+',
514 '2\.ly/[[:alnum:]]+',
515 't\.co/[[:alnum:]]+',
516 'shar\.es/[[:alnum:]]+',
517 'goo\.gl/[[:alnum:]]+',
519 my $ua = LWP::UserAgent->new(agent => 'Mozilla', max_redirect => 0, timeout => 5);
522 OUTER: while (($os ne $s) and ($i > 0)) {
527 foreach my $pattern (@urlshortener) {
528 my $p = "https?:\/\/" . $pattern;
530 _debug("Matching %s against %s", $p, $s);
535 _debug("Found %s", $matched);
536 $res = $ua->head($matched);
537 if ($res->is_redirect()) {
538 my $new = $res->headers()->header("Location");
540 _debug("Replacing %s with %s", $matched, $new);
541 $s =~ s/$matched/$new/;
544 _debug("Error resolving %s", $matched);
551 _debug("Loop terminated by counter");
554 _debug("Final string: %s", $s);
560 # Save the config to durable storage
565 if ($remote_api->{config_save}->()) {
568 _io(sprintf("%sConfig save failed%s", _colorpair("*red"), _colorpair()));
573 # Set a configuration element
582 foreach $p (@getters, @grabbers) {
583 if ($p->{'NAME'} eq $target) {
584 $p->setval($key, $val);
588 _io('No such module');
593 # Enable a given module
600 foreach $p (@grabbers) {
601 if ($p->{'NAME'} eq $target) {
606 _io('No such module');
610 # Disable given module
617 foreach $p (@grabbers) {
618 if ($p->{'NAME'} eq $target) {
623 _io('No such module');
627 # Show settings for modules
635 if (defined($target)) {
636 foreach $p (@getters, @grabbers) {
637 if ($p->{'NAME'} eq $target) {
638 _io($p->getconfstr());
642 _io('No such module');
644 _io('Loaded grabbers (* denotes enabled modules):');
645 foreach $p (@grabbers) {
646 $e = $p->_getval('enabled');
647 _io(' %s%s', $p->{'NAME'}, $e?'*':'');
650 _io('Loaded getters:');
651 foreach $p (@getters) {
652 _io(' %s', $p->{'NAME'});
658 # Show help for the commands
665 if (defined($target)) {
666 foreach $p (@getters, @grabbers) {
667 if ($p->{'NAME'} eq $target) {
668 _io($p->gethelpstr());
672 _io('No such module');
676 save: save the current configuration
677 help [modulename]: display this help, or module specific help
678 show [modulename]: show loaded modules, or the current parameters of a module
679 set modulename parameter value: set a module parameter to a new value
680 getter [modulename]: display or set the getter to use
681 enable [modulename]: enable the usage of this module (grabbers only)
682 disable [modulename]: disable the usage of this module (grabbers only)
683 reload: reload all modules (this is somewhat experimental)
684 mode [modename]: display or set the operation mode (download/display)
685 connector [subcommand]: manage connectors (proxies)
686 debug: enable debugging messages
687 nodebug: disable debugging messages
693 # Set the getter to use
700 if (defined($target)) {
701 foreach $p (@getters) {
702 if ($p->{'NAME'} eq $target) {
704 _config_set(['getter'], $target);
705 _io("Getter changed to %s", $target);
709 _io('No such getter');
711 _io('Current getter: %s', _config_get(['getter']));
716 # Show/set the working mode
722 if (defined($mode)) {
724 if (('download' eq $mode) or ('display' eq $mode)) {
725 _config_set(['mode'], $mode);
726 _io('Now using %s mode', $mode);
728 _io('Invalid mode: %s', $mode);
731 _io('Current mode: %s', _config_get(['mode']));
737 # Manage the connectors
744 unless(defined($subcmd)) {
748 $subcmd = lc($subcmd);
750 if ($subcmd eq 'list') {
751 _io("Defined connectors");
752 foreach $c (_connectorlist('defined-connectors')) {
754 my $schemas = $c->{schemas};
755 if (scalar(keys(%{$schemas})) == 0) {
756 _io(" No schemas defined");
758 foreach (keys(%{$schemas})) {
759 _io(' %s: %s', $_, $schemas->{$_});
765 _io("Selected connectors: %s", _config_get(['active-connectors']));
766 } elsif ($subcmd eq 'add') {
769 unless(defined($name)) {
770 _io("No name given");
776 unless($name =~ m|^[a-z]+$|) {
777 _io("%s is not a valid connector name (only letters are allowed)", $name);
781 if (_config_list_has(['defined-connectors'], $name)) {
782 _io("Connector already exists");
786 _config_set(['connectors', $name, 'name'], $name);
787 _config_list_add(['defined-connectors'], $name);
788 } elsif ($subcmd eq 'del') {
792 unless(defined($name)) {
793 _io("No name given");
797 unless (_config_list_has(['defined-connectors'], $name)) {
798 _io("Connector does not exist");
802 if (_config_has(['connectors', $name, '_immutable'])) {
803 _io("Connector cannot be removed");
807 # Remove from list of active connectors
808 _config_list_del(['defined-connectors'], $name);
809 _config_list_del(['active-connectors'], $name);
811 _config_del(['connectors', $name, 'name']);
812 _config_del(['connectors', $name, '_immutable']);
813 _config_del(['connectors', $name, 'schemas', 'http']);
814 _config_del(['connectors', $name, 'schemas', 'https']);
816 @dcon = split(/,/, _config_get(['active-connectors']));
818 if (scalar(@dcon) == 0) {
819 _io("List of selected connectors is empty, resetting to direct");
820 _config_list_add(['active-connectors', 'direct']);
822 } elsif ($subcmd eq 'addschema') {
823 my ($conn, $schema, $proxy) = @_;
825 unless(defined($conn)) {
826 _io("No connector name given");
830 unless(defined($schema)) {
831 _io("No schema given");
835 unless(defined($proxy)) {
836 _io("No proxy given");
841 unless(_config_list_has(['defined-connectors'], $conn)) {
842 _io("Connector does not exist");
846 if (_config_has(['connectors', $conn, '_immutable'])) {
847 _io("Connector cannot be modified");
851 $schema = lc($schema);
852 _config_set(['connectors', $conn, 'schemas', $schema], $proxy);
853 } elsif ($subcmd eq 'delschema') {
854 my ($conn, $schema) = @_;
856 unless(defined($conn)) {
857 _io("No connector name given");
861 unless(defined($schema)) {
862 _io("No schema given");
867 unless(_config_list_has(['defined-connectors'], $conn)) {
868 _io("Connector does not exist");
872 $schema = lc($schema);
873 _config_del(['connectors', $conn, 'schemas', $schema]);
874 } elsif ($subcmd eq 'select') {
875 my @connlist = map { lc } @_;
877 if (scalar(@connlist) == 0) {
878 _io("No connectors given");
882 foreach (@connlist) {
883 unless(_config_list_has(['defined-connectors'], $_)) {
884 _io("Connector %s does not exist", $_);
889 _config_list_set(['active-connectors'], @connlist);
891 _io("connector [list|add|del|addschema|delschema|help] <options>");
892 _io(" help: Show this help");
893 _io(" list: List the defined connectors");
894 _io(" add <name>: Add a connector with name <name>");
895 _io(" del <name>: Delete the connector with name <name>");
896 _io(" addschema <name> <schema> <proxy>: Add proxy to connector for the given schema");
897 _io(" delschema <name> <schema>: Remove the schema from the connector");
898 _io(" select <name> [<name>...]: Select the connectors to use");
904 # Global debug if the keyword "all" is given, or just for the
905 # current window otherwise
911 if (defined($scope) and (lc($scope) eq 'all')) {
912 _io("Global debug enabled");
915 _io("Debug for this window enabled");
916 $debugwindows{$event->{window}} = 1;
922 # Disable global debug if the keyword "all" is given (this will
923 # also disable all per-window debugs) or just for the current
930 if (defined($scope) and (lc($scope) eq 'all')) {
933 _io("Global debug disabled");
935 delete($debugwindows{$event->{window}});
936 _io("Debug for this window disabled");
941 # Handle generic service commands
945 my $subcmd = shift || '';
947 $subcmd = lc($subcmd);
949 if ($subcmd eq 'cache') {
950 _cmd_service_cache($event, @_);
956 # Display or clear the content of the config cache
958 sub _cmd_service_cache {
962 $subcmd = 'list' unless defined($subcmd);
963 $subcmd = lc($subcmd);
965 if ($subcmd eq 'list') {
966 _io("Content of config cache:");
967 foreach (sort(keys(%config_cache))) {
968 if (exists($config_cache{$_}->{value})) {
969 _io(" %s => %s", $_, $config_cache{$_}->{value});
971 _io(" %s present", $_);
974 } elsif ($subcmd eq 'clear') {
976 _io("Cache cleared");
982 # Return the list of loaded grabbers.
983 # This is used by the test programs, and is not meant to be
991 # ==============================================
992 # Builtin config handling functions
993 # These are used if the library used does not
994 # register it's own config_* handlers
995 # ==============================================
997 sub _builtin_config_init {
999 if (defined($builtin_config_path)) {
1000 my $filename = File::Spec->catfile($builtin_config_path, 'videosite.json');
1002 _debug("Trying to load configuration from %s", $filename);
1007 open(CONF, '<', $filename);
1008 %builtin_config = %{JSON->new->utf8->decode(<CONF>)};
1011 _io("Error loading configuration: %s", $@);
1014 } elsif (defined($builtin_config_default)) {
1015 _debug("Initializing builtin config from external default");
1016 foreach (keys(%{$builtin_config_default})) {
1017 _debug("Setting %s=%s", $_, $builtin_config_default->{$_});
1018 $builtin_config{$_} = $builtin_config_default->{$_};
1023 sub _builtin_config_get {
1024 return $builtin_config{join(".", @{$_[0]})};
1027 sub _builtin_config_set {
1028 $builtin_config{join(".", @{$_[0]})} = $_[1];
1031 sub _builtin_config_has {
1032 return exists($builtin_config{join(".", @{$_[0]})});
1035 sub _builtin_config_save {
1037 if (defined($builtin_config_path)) {
1038 my $filename = File::Spec->catfile($builtin_config_path, 'videosite.json');
1040 _debug("Attempting to save config to %s", $filename);
1043 my ($tempfile, $tempfn) = tempfile("videosite.json.XXXXXX", dir => $builtin_config_path);
1044 print $tempfile JSON->new->pretty->utf8->encode(\%builtin_config);
1046 rename($tempfn, $filename);
1055 sub _builtin_config_del {
1056 delete($builtin_config{join(".", @{$_[0]})});
1060 # ==============================================
1061 # From this point on publicly callable functions
1062 # ==============================================
1067 # Initialization function for the library
1068 # Actually not the first thing to be called, it expects an API
1069 # has (register_api) to be registered first
1072 unless($remote_api) {
1073 $error = "No API set";
1077 # Initialize configuration data
1078 $remote_api->{config_init}->();
1080 # Check/create default values, if they do not exist
1081 _recursive_hash_walk($defaultconfig, \&_init_config_item);
1084 _load_modules(File::Spec->catfile($remote_api->{module_path}->(), 'videosite'));
1086 unless (@grabbers && @getters) {
1087 _io('No grabbers or no getters found, can not proceed.');
1092 $getter = $getters[0];
1093 foreach my $p (@getters) {
1094 if (_config_get(['getter']) eq $p->{'NAME'}) {
1098 _debug('Selected %s as getter', $getter->{'NAME'});
1099 _config_set(['getter'], $getter->{'NAME'});
1102 _io('initialized successfully');
1107 # Register a remote API. This API contains a basic output function (used
1108 # when no window specific function is available), some config functions
1109 # and a color code function.
1113 my @config_functions = qw(config_init config_set config_get config_has config_save config_del);
1117 unless(defined($a)) {
1118 die("No API given");
1122 # The config_* handlers are special in that they either all have
1123 # provided by the user, or none. In the latter case builtin
1124 # handlers will be used, but the config will not persist.
1127 foreach (@config_functions) {
1128 if (exists($a->{$_})) {
1135 unless (($c == 0) or ($c == scalar(@config_functions))) {
1136 $error = sprintf("Missing config function: %s", $missing[0]);
1140 foreach (keys(%{$a})) {
1141 if (ref($a->{$_}) ne 'CODE') {
1142 $error = sprintf("API handler %s is not a subroutine reference", $_);
1144 $remote_api->{$_} = $a->{$_};
1147 if (exists($a->{_debug})) {
1148 $debug = $a->{_debug}->();
1151 if (exists($a->{_config_path})) {
1152 $builtin_config_path = $a->{_config_path}->();
1155 if (exists($a->{_config_default})) {
1156 $builtin_config_default = $a->{_config_default}->();
1159 if (exists($a->{_config_cache})) {
1160 $config_cache = $a->{_config_cache}->();
1163 @outputstack = ({io => $remote_api->{'io'}, window => ""});
1169 # Check a message for useable links
1171 sub check_for_link {
1173 my $message = $event->{message};
1178 my $mode = _config_get(['mode']);
1182 # If /nosave is present in the message switch to display mode, regardless
1185 if ($message =~ m,(?:\s|^)/nosave(?:\s|$),) {
1189 _push_output($event);
1190 $message = _expand_url_shortener($message);
1194 # Offer the message to all Grabbers in turn
1195 GRABBER: foreach $g (@grabbers) {
1196 ($m, $p) = $g->get($message);
1197 while (defined($m)) {
1198 _debug('Metadata: %s', Dumper($m));
1200 if (exists($remote_api->{link_callback})) {
1201 $skip = $remote_api->{link_callback}->($m);
1204 if ('download' eq $mode) {
1206 sprintf('%s>>> %sSaving %s%%s%s %s%%s',
1209 _colorpair('*yellow'),
1211 _colorpair('*green'),
1216 unless($getter->get($m)) {
1217 _io(sprintf('%s>>> FAILED', _colorpair('*red')));
1219 } elsif ('display' eq $mode) {
1221 sprintf('%s>>> %sSaw %s%%s%s %s%%s',
1222 _colorpair('*magenta'),
1224 _colorpair('*yellow'),
1226 _colorpair('*green')
1232 _io(sprintf('%s>>> Invalid operation mode', _colorpair('*red')));
1236 # Remove the matched part from the message and try again (there may be
1240 last GRABBER if ($message =~ /^\s*$/);
1242 ($m, $p) = $g->get($message);
1250 # Handle a videosite command (/videosite ...) entered in the client
1252 sub handle_command {
1254 my ($cmd, @params) = split(/\s+/, $event->{message});
1256 _push_output($event);
1258 if (exists($videosite_commands->{$cmd})) {
1259 $videosite_commands->{$cmd}->($event, @params);