Fix DailyMotionGrabber, it now needs a JSON parser
[videosite.git] / videosite / DailyMotionGrabber.pm
1 # Grabber for dailymotion.com
2 #
3 # (c) 2007 by Ralf Ertzinger <ralf@camperquake.de>
4 # licensed under GNU GPL v2
5
6 package videosite::DailyMotionGrabber;
7
8 use videosite::GrabberBase;
9 @ISA = qw(videosite::GrabberBase);
10
11 use LWP::Simple qw(!get);
12 use HTML::Parser;
13 use videosite::JSArrayParser;
14 use Data::Dumper;
15
16 use strict;
17
18 sub new {
19     my $class = shift;
20     my $self = $class->SUPER::new();
21
22     $self->{'NAME'} = 'dailymotion';
23     $self->{'PATTERNS'} = ['(http://(?:[-a-zA-Z0-9_.]+\.)*dailymotion.com/(?:[^/]+/)*video/([-a-zA-Z0-9_]+))'];
24
25     bless($self, $class);
26
27     $self->_prepare_parameters();
28
29     return $self;
30 }
31
32 sub _parse {
33     my $self = shift;
34     my $url = shift;
35     my $pattern = shift;
36     my $content;
37     my $metadata = {};
38     my $p = HTML::Parser->new(api_version => 3);
39     my @accum;
40     my @text;
41     my $e;
42
43     $url =~ m|$pattern|;
44     $url = $1;
45
46     $metadata->{'URL'} = $url;
47     $metadata->{'ID'} = $2;
48     $metadata->{'TYPE'} = 'video';
49     $metadata->{'SOURCE'} = $self->{'NAME'};
50     $metadata->{'TITLE'} = undef;
51     $metadata->{'DLURL'} = undef;
52
53     unless(defined($content = LWP::Simple::get(sprintf('http://www.dailymotion.com/video/%s', $2)))) {
54         $self->error('Could not download %s', $url);
55         return undef;
56     }
57
58     $p->handler(start => \@accum, "tagname, attr");
59     $p->handler(text => \@text, "text");
60     $p->report_tags(qw(meta script));
61     $p->utf8_mode(1);
62     $p->parse($content);
63
64     # Look for the title in the meta tags
65     foreach $e (@accum) {
66         if ('meta' eq $e->[0]) {
67             if ('title' eq $e->[1]->{'name'}) {
68                 $metadata->{'TITLE'} = $e->[1]->{'content'};
69                 $metadata->{'TITLE'} =~ s/^Dailymotion\s+-\s+//;
70                 $metadata->{'TITLE'} =~ s/(?:\s+-\s+.*)?$//;
71             }
72         }
73     }
74
75     # Look for the download URL
76     foreach $e (@text) {
77         if ($e->[0] =~ m|\.addVariable\("sequence",\s*"([^\"]+)"|) {
78             my $sequence = $1;
79             my $jsp = videosite::JSArrayParser->new();
80             my $main;
81             my $s;
82
83             $sequence =~ s/%(..)/chr(hex($1))/ge;
84             $self->debug("Found sequence: %s", $sequence);
85
86             $self->debug("Using %s to parse", ref($jsp));
87             $sequence = $jsp->parse($sequence);
88             $self->debug(Dumper($sequence));
89
90             unless(defined($sequence)) {
91                 $self->error("Found sequence, but could not parse");
92                 return undef;
93             } else {
94                 $self->debug("Parsed sequence: %s", Dumper($sequence));
95
96                 foreach (@{$sequence}) {
97                     if (exists($_->{'name'}) && ($_->{'name'} eq 'main')) {
98                         # Found main section
99                         $main = $_->{'layerList'};
100                     }
101                 }
102                 unless(defined($main)) {
103                     $self->error("Could not find layerList[main]");
104                     return undef;
105                 }
106
107                 foreach (@{$main}) {
108                     if (exists($_->{'name'}) && ($_->{'name'} eq 'video')) {
109                         # Found video section
110                         if (exists($_->{'param'}->{'hdURL'})) {
111                             $metadata->{'DLURL'} = $_->{'param'}->{'hdURL'};
112                         } elsif (exists($_->{'param'}->{'hqURL'})) {
113                             $metadata->{'DLURL'} = $_->{'param'}->{'hqURL'};
114                         } elsif (exists($_->{'param'}->{'hqURL'})) {
115                             $metadata->{'DLURL'} = $_->{'param'}->{'sdURL'};
116                         } else {
117                             $self->error("Video section found, but no URLs");
118                             return undef;
119                         }
120                     }
121                 }
122             }
123         }
124     }
125
126     unless(defined($metadata->{'DLURL'}) && defined($metadata->{'TITLE'})) {
127         $self->error('Could not determine download URL');
128         return undef;
129     }
130
131     return $metadata;
132 }
133
134 1;