Fix Dailymotion grabber
[videosite.git] / videosite / DailyMotionGrabber.pm
1 # Grabber for dailymotion.com
2 #
3 # (c) 2007 by Ralf Ertzinger <ralf@camperquake.de>
4 # licensed under GNU GPL v2
5
6 package videosite::DailyMotionGrabber;
7
8 use videosite::GrabberBase;
9 @ISA = qw(videosite::GrabberBase);
10
11 use HTML::TokeParser;
12 use videosite::JSArrayParser;
13 use Data::Dumper;
14
15 use strict;
16
17 sub new {
18     my $class = shift;
19     my $self = $class->SUPER::new();
20
21     $self->{'NAME'} = 'dailymotion';
22     $self->{_SELFTESTURL} = 'http://www.dailymotion.com/video/xylv6u_moon-duo-sleepwalker_music';
23     $self->{_SELFTESTTITLE} = 'Moon Duo - Sleepwalker';
24     $self->{'PATTERNS'} = ['(http://(?:[-a-zA-Z0-9_.]+\.)*dailymotion.com/(?:[^/]+/)*video/([-a-zA-Z0-9_]+))'];
25
26     bless($self, $class);
27
28     $self->_prepare_parameters();
29
30     return $self;
31 }
32
33 sub _parse {
34     my $self = shift;
35     my $url = shift;
36     my $pattern = shift;
37     my $content;
38     my $metadata = {};
39     my $p;
40     my @accum;
41     my @text;
42     my $e;
43
44     $url =~ m|$pattern|;
45     $url = $1;
46
47     $metadata->{'URL'} = $url;
48     $metadata->{'ID'} = $2;
49     $metadata->{'TYPE'} = 'video';
50     $metadata->{'SOURCE'} = $self->{'NAME'};
51     $metadata->{'TITLE'} = undef;
52     $metadata->{'DLURL'} = undef;
53
54     unless(defined($content = $self->simple_get(sprintf('http://www.dailymotion.com/video/%s', $2)))) {
55         $self->error('Could not download %s', $url);
56         return undef;
57     }
58
59     $p = HTML::TokeParser->new(\$content);
60
61     # Look for the title in the meta tags
62     while ($e = $p->get_tag('meta', 'script')) {
63         if ('meta' eq $e->[0]) {
64             if (exists($e->[1]->{'property'}) && ('og:title' eq $e->[1]->{'property'})) {
65                 $metadata->{'TITLE'} = $e->[1]->{'content'};
66             }
67         } elsif ('script' eq $e->[0]) {
68             my $c = $p->get_text();
69
70             $self->debug("Found script: %s", $c);
71
72             if ($c =~ m|flashvars = (.+);$|m) {
73                 my $flashvars = $1;
74                 my $jsp = videosite::JSArrayParser->new();
75                 my $l;
76                 my $s;
77                 my $sequence;
78
79                 $self->debug("Found flashvars: %s", $flashvars);
80
81                 $self->debug("Using %s to parse", ref($jsp));
82                 $flashvars = $jsp->parse($flashvars);
83                 $self->debug("Parsed flashvars: %s", Dumper($flashvars));
84
85                 $sequence = $flashvars->{'sequence'};
86                 $sequence =~ s/%(..)/chr(hex($1))/ge;
87
88                 $self->debug("Decoded sequence: %s", $sequence);
89                 $sequence =  $jsp->parse($sequence);
90
91                 unless(defined($sequence)) {
92                     $self->error("Found sequence, but could not parse");
93                     return undef;
94                 } else {
95                     $self->debug("Parsed sequence: %s", Dumper($sequence));
96
97                     $l = $self->_fetch_layer($sequence->{'sequence'}, "root/layerList", "background/sequenceList", "main/layerList", "video/param");
98                     unless(defined($l)) {
99                         $self->error("Could not find video layer");
100                         return undef;
101                     }
102
103                     # Found video section
104                     if (exists($l->{'hd1080URL'})) {
105                         $metadata->{'DLURL'} = $l->{'hd1080URL'};
106                     } elsif (exists($l->{'hd720URL'})) {
107                         $metadata->{'DLURL'} = $l->{'hd720URL'};
108                     } elsif (exists($l->{'hqURL'})) {
109                         $metadata->{'DLURL'} = $l->{'hqURL'};
110                     } elsif (exists($l->{'sdURL'})) {
111                         $metadata->{'DLURL'} = $l->{'sdURL'};
112                     } elsif (exists($l->{'ldURL'})) {
113                         $metadata->{'DLURL'} = $l->{'ldURL'};
114                     } else {
115                         $self->error("Video section found, but no URLs");
116                         return undef;
117                     }
118                 }
119             }
120         }
121     }
122
123     unless(defined($metadata->{'DLURL'}) && defined($metadata->{'TITLE'})) {
124         $self->error('Could not determine download URL');
125         return undef;
126     }
127
128     return $metadata;
129 }
130
131 sub _fetch_layer {
132     my $self = shift;
133     my $sequence = shift;
134     my $point = shift;
135     my $next;
136     my @points = @_;
137     my $l;
138
139     $self->debug("Looking for %s in %s", $point, Dumper($sequence));
140
141     unless(defined($point)) {
142         $self->debug("Reached last point");
143         return $sequence;
144     }
145     ($point, $next) = split(/\//, $point, 2);
146
147     foreach (@{$sequence}) {
148         if (exists($_->{'name'}) and ($_->{'name'} eq $point)) {
149             if (exists($_->{$next})) {
150                 $self->debug("Using %s in %s", $next, $point);
151                 return $self->_fetch_layer($_->{$next}, @points);
152             } else {
153                 $self->debug("%s found, but no %s", $point, $next);
154                 return undef;
155             }
156
157         }
158     }
159
160     $self->debug("Could not find entry named %s", $point);
161     return undef;
162 }
163
164 1;