can do all jsarray could, and much more.
#
-# A helper class for parsing textual JS hashes into perl
-# hashes
+# A helper class for parsing textual JSON structures into perl
+# structures
#
-# The parser is in jsarray.yp, to regenerate you'll need the Parse::YAPP
-# package. Use 'yapp -m videosite::jsarray -s jsarray.yp' to regenerate
+# The parser is in JSONNospace.yp, to regenerate you'll need the Parse::YAPP
+# package. Use 'yapp -m videosite::JSONNospace -s JSONNospace.yp' to regenerate
#
package videosite::JSLexArrayParser;
@ISA = qw(videosite::JSArrayParser);
use Parse::Lex;
-use videosite::jsarray;
+use videosite::JSONNospace;
+use Data::Dumper;
use strict;
my @tokens = (
- COLON => '[:]',
- RIGHTC => '[\}]',
- LEFTC => '[\{]',
- QUOTE => '[\"]',
- COMMA => '[,]',
- ID => '[\w_%\.\+-]+'
+ COLON => ':',
+ QUOTE => '\"',
+ SINGLEQUOTE => '\\\'',
+ TRUE => 'true',
+ FALSE => 'false',
+ NULL => 'null',
+ QUADHEX => 'u[0-9a-fA-F]{4}',
+ INTEGER => '[0-9]+',
+ QUOTEDNORMAL => '[nr]',
+ SIMPLECHAR => '[-\w\._\?\+=\&\!%<>;]+',
+ BACKSLASH => '\\\\',
+ SLASH => '/',
+ COMMA => ',',
+ CURLYOPEN => '{',
+ CURLYCLOSE => '}',
+ SQUAREOPEN => '\[',
+ SQUARECLOSE => '\]',
);
sub new {
my $class = shift;
my $self = $class->SUPER::new();
- $self->{'_PARSER'} = videosite::jsarray->new();
+ $self->{'_PARSER'} = videosite::JSONNospace->new();
$self->{'_LEXER'} = Parse::Lex->new(@tokens);
return bless($self, $class);
sub parse {
my $self = shift;
my $s = shift;
- my @result;
+ my $result;
my $l = $self->{'_LEXER'};
$l->from($s);
- @result = $self->{'_PARSER'}->YYParse(
+ $result = $self->{'_PARSER'}->YYParse(
yylex => sub {
my $tok = $l->next();
return ('', undef) unless $tok;
$_[0]->YYAbort();
},
yydebug => 0x0);
- return $result[0]?{@{$result[0]}}:undef;
+ return ref($result)?$result->[0]:$result;
}
1;
# ANY CHANGE MADE HERE WILL BE LOST !
#
####################################################################
-package videosite::jsarray;
+package videosite::JSONNospace;
use vars qw ( @ISA );
use strict;
#End of include--------------------------------------------------
+#line 31 "JSONNospace.yp"
+
+use Encode;
sub new {
[
{#State 0
ACTIONS => {
- 'LEFTC' => 2
+ 'TRUE' => 1,
+ 'FALSE' => 10,
+ 'CURLYOPEN' => 2,
+ 'SQUAREOPEN' => 11,
+ 'QUOTE' => 3
},
GOTOS => {
- 'array' => 1
+ 'boolean' => 8,
+ 'array' => 7,
+ 'hash' => 6,
+ 'basicstream' => 5,
+ 'quotestring' => 9,
+ 'basictype' => 4
}
},
{#State 1
- ACTIONS => {
- '' => 3
- }
+ DEFAULT => -7
},
{#State 2
ACTIONS => {
- 'QUOTE' => 7
+ 'CURLYCLOSE' => 14,
+ 'QUOTE' => 3
},
GOTOS => {
- 'quotestring' => 4,
- 'kvpair' => 5,
- 'kvlist' => 6
+ 'quotestring' => 15,
+ 'kvstream' => 12,
+ 'kvpair' => 13
}
},
{#State 3
- DEFAULT => 0
+ ACTIONS => {
+ 'SIMPLECHAR' => 17,
+ 'INTEGER' => 16,
+ 'TRUE' => 19,
+ 'NULL' => 20,
+ 'COLON' => 21,
+ 'BACKSLASH' => 23,
+ 'COMMA' => 22,
+ 'QUOTE' => 25,
+ 'SINGLEQUOTE' => 26,
+ 'FALSE' => 27,
+ 'QUOTEDNORMAL' => 31,
+ 'U' => 30,
+ 'QUADHEX' => 29
+ },
+ GOTOS => {
+ 'charstream' => 18,
+ 'char' => 28,
+ 'escapedchar' => 24
+ }
},
{#State 4
+ DEFAULT => -1
+ },
+ {#State 5
ACTIONS => {
- 'COLON' => 8
+ '' => 32,
+ 'COMMA' => 33
}
},
- {#State 5
+ {#State 6
DEFAULT => -3
},
- {#State 6
+ {#State 7
+ DEFAULT => -4
+ },
+ {#State 8
+ DEFAULT => -6
+ },
+ {#State 9
+ DEFAULT => -5
+ },
+ {#State 10
+ DEFAULT => -8
+ },
+ {#State 11
ACTIONS => {
- 'COMMA' => 9,
- 'RIGHTC' => 10
+ 'TRUE' => 1,
+ 'FALSE' => 10,
+ 'SQUARECLOSE' => 35,
+ 'CURLYOPEN' => 2,
+ 'SQUAREOPEN' => 11,
+ 'QUOTE' => 3
+ },
+ GOTOS => {
+ 'basicstream' => 34,
+ 'hash' => 6,
+ 'array' => 7,
+ 'boolean' => 8,
+ 'quotestring' => 9,
+ 'basictype' => 4
}
},
- {#State 7
+ {#State 12
ACTIONS => {
- 'ID' => 11,
- 'QUOTE' => 12
+ 'CURLYCLOSE' => 37,
+ 'COMMA' => 36
}
},
- {#State 8
+ {#State 13
+ DEFAULT => -13
+ },
+ {#State 14
+ DEFAULT => -11
+ },
+ {#State 15
ACTIONS => {
- 'QUOTE' => 7
+ 'COLON' => 38
+ }
+ },
+ {#State 16
+ DEFAULT => -39
+ },
+ {#State 17
+ DEFAULT => -38
+ },
+ {#State 18
+ ACTIONS => {
+ 'SIMPLECHAR' => 17,
+ 'INTEGER' => 16,
+ 'TRUE' => 19,
+ 'NULL' => 20,
+ 'COLON' => 21,
+ 'BACKSLASH' => 23,
+ 'COMMA' => 22,
+ 'QUOTE' => 39,
+ 'SINGLEQUOTE' => 26,
+ 'FALSE' => 27,
+ 'QUOTEDNORMAL' => 31,
+ 'U' => 30,
+ 'QUADHEX' => 29
},
GOTOS => {
- 'quotestring' => 13
+ 'char' => 40,
+ 'escapedchar' => 24
}
},
- {#State 9
+ {#State 19
+ DEFAULT => -43
+ },
+ {#State 20
+ DEFAULT => -45
+ },
+ {#State 21
+ DEFAULT => -40
+ },
+ {#State 22
+ DEFAULT => -41
+ },
+ {#State 23
+ ACTIONS => {
+ 'BACKSLASH' => 41,
+ 'CURLYOPEN' => 43,
+ 'SLASH' => 42,
+ 'QUOTE' => 44,
+ 'SINGLEQUOTE' => 45,
+ 'CURLYCLOSE' => 46,
+ 'SQUARECLOSE' => 47,
+ 'SQUAREOPEN' => 50,
+ 'QUADHEX' => 49,
+ 'QUOTEDNORMAL' => 48
+ }
+ },
+ {#State 24
+ DEFAULT => -46
+ },
+ {#State 25
+ DEFAULT => -21
+ },
+ {#State 26
+ DEFAULT => -42
+ },
+ {#State 27
+ DEFAULT => -44
+ },
+ {#State 28
+ DEFAULT => -23
+ },
+ {#State 29
+ DEFAULT => -37
+ },
+ {#State 30
+ DEFAULT => -36
+ },
+ {#State 31
+ DEFAULT => -35
+ },
+ {#State 32
+ DEFAULT => 0
+ },
+ {#State 33
ACTIONS => {
- 'QUOTE' => 7
+ 'TRUE' => 1,
+ 'FALSE' => 10,
+ 'CURLYOPEN' => 2,
+ 'SQUAREOPEN' => 11,
+ 'QUOTE' => 3
},
GOTOS => {
- 'quotestring' => 4,
- 'kvpair' => 14
+ 'hash' => 6,
+ 'array' => 7,
+ 'boolean' => 8,
+ 'quotestring' => 9,
+ 'basictype' => 51
}
},
- {#State 10
- DEFAULT => -1
+ {#State 34
+ ACTIONS => {
+ 'SQUARECLOSE' => 52,
+ 'COMMA' => 33
+ }
},
- {#State 11
+ {#State 35
+ DEFAULT => -9
+ },
+ {#State 36
ACTIONS => {
- 'QUOTE' => 15
+ 'QUOTE' => 3
+ },
+ GOTOS => {
+ 'quotestring' => 15,
+ 'kvpair' => 53
}
},
- {#State 12
- DEFAULT => -5
+ {#State 37
+ DEFAULT => -12
},
- {#State 13
- DEFAULT => -4
+ {#State 38
+ ACTIONS => {
+ 'INTEGER' => 54,
+ 'TRUE' => 1,
+ 'NULL' => 55,
+ 'FALSE' => 10,
+ 'CURLYOPEN' => 2,
+ 'SQUAREOPEN' => 11,
+ 'QUOTE' => 3
+ },
+ GOTOS => {
+ 'hash' => 58,
+ 'array' => 57,
+ 'boolean' => 56,
+ 'quotestring' => 59
+ }
},
- {#State 14
+ {#State 39
+ DEFAULT => -22
+ },
+ {#State 40
+ DEFAULT => -24
+ },
+ {#State 41
+ DEFAULT => -32
+ },
+ {#State 42
+ DEFAULT => -31
+ },
+ {#State 43
+ DEFAULT => -25
+ },
+ {#State 44
+ DEFAULT => -29
+ },
+ {#State 45
+ DEFAULT => -30
+ },
+ {#State 46
+ DEFAULT => -26
+ },
+ {#State 47
+ DEFAULT => -28
+ },
+ {#State 48
+ DEFAULT => -34
+ },
+ {#State 49
+ DEFAULT => -33
+ },
+ {#State 50
+ DEFAULT => -27
+ },
+ {#State 51
DEFAULT => -2
},
- {#State 15
- DEFAULT => -6
+ {#State 52
+ DEFAULT => -10
+ },
+ {#State 53
+ DEFAULT => -14
+ },
+ {#State 54
+ DEFAULT => -20
+ },
+ {#State 55
+ DEFAULT => -19
+ },
+ {#State 56
+ DEFAULT => -18
+ },
+ {#State 57
+ DEFAULT => -17
+ },
+ {#State 58
+ DEFAULT => -16
+ },
+ {#State 59
+ DEFAULT => -15
}
],
yyrules =>
'$start', 2, undef
],
[#Rule 1
- 'array', 3,
+ 'basicstream', 1,
sub
-#line 3 "jsarray.yp"
-{ return $_[2] }
+#line 37 "JSONNospace.yp"
+{ return [ $_[1] ] }
],
[#Rule 2
- 'kvlist', 3,
+ 'basicstream', 3,
sub
-#line 6 "jsarray.yp"
-{ return [ @{$_[1]}, @{$_[3]} ] }
+#line 38 "JSONNospace.yp"
+{ return [ @{$_[1]}, $_[3] ] }
],
[#Rule 3
- 'kvlist', 1, undef
+ 'basictype', 1, undef
],
[#Rule 4
+ 'basictype', 1, undef
+ ],
+ [#Rule 5
+ 'basictype', 1, undef
+ ],
+ [#Rule 6
+ 'basictype', 1, undef
+ ],
+ [#Rule 7
+ 'boolean', 1,
+sub
+#line 47 "JSONNospace.yp"
+{ return 1 }
+ ],
+ [#Rule 8
+ 'boolean', 1,
+sub
+#line 48 "JSONNospace.yp"
+{ return 0 }
+ ],
+ [#Rule 9
+ 'array', 2,
+sub
+#line 51 "JSONNospace.yp"
+{ return [] }
+ ],
+ [#Rule 10
+ 'array', 3,
+sub
+#line 52 "JSONNospace.yp"
+{ return [ @{$_[2]} ] }
+ ],
+ [#Rule 11
+ 'hash', 2,
+sub
+#line 55 "JSONNospace.yp"
+{ return { } }
+ ],
+ [#Rule 12
+ 'hash', 3,
+sub
+#line 56 "JSONNospace.yp"
+{ return { @{$_[2]} } }
+ ],
+ [#Rule 13
+ 'kvstream', 1, undef
+ ],
+ [#Rule 14
+ 'kvstream', 3,
+sub
+#line 60 "JSONNospace.yp"
+{ return [ @{$_[1]}, @{$_[3]} ] }
+ ],
+ [#Rule 15
'kvpair', 3,
sub
-#line 10 "jsarray.yp"
+#line 63 "JSONNospace.yp"
{ return [ $_[1], $_[3] ] }
],
- [#Rule 5
+ [#Rule 16
+ 'kvpair', 3,
+sub
+#line 64 "JSONNospace.yp"
+{ return [ $_[1], $_[3] ] }
+ ],
+ [#Rule 17
+ 'kvpair', 3,
+sub
+#line 65 "JSONNospace.yp"
+{ return [ $_[1], $_[3] ] }
+ ],
+ [#Rule 18
+ 'kvpair', 3,
+sub
+#line 66 "JSONNospace.yp"
+{ return [ $_[1], $_[3] ] }
+ ],
+ [#Rule 19
+ 'kvpair', 3,
+sub
+#line 67 "JSONNospace.yp"
+{ return [ $_[1], undef ] }
+ ],
+ [#Rule 20
+ 'kvpair', 3,
+sub
+#line 68 "JSONNospace.yp"
+{ return [ $_[1], $_[3] ] }
+ ],
+ [#Rule 21
'quotestring', 2,
sub
-#line 13 "jsarray.yp"
+#line 71 "JSONNospace.yp"
{ return "" }
],
- [#Rule 6
+ [#Rule 22
'quotestring', 3,
sub
-#line 14 "jsarray.yp"
+#line 72 "JSONNospace.yp"
+{ return $_[2] }
+ ],
+ [#Rule 23
+ 'charstream', 1, undef
+ ],
+ [#Rule 24
+ 'charstream', 2,
+sub
+#line 76 "JSONNospace.yp"
+{ return $_[1] . $_[2] }
+ ],
+ [#Rule 25
+ 'escapedchar', 2,
+sub
+#line 79 "JSONNospace.yp"
+{ return $_[2] }
+ ],
+ [#Rule 26
+ 'escapedchar', 2,
+sub
+#line 80 "JSONNospace.yp"
+{ return $_[2] }
+ ],
+ [#Rule 27
+ 'escapedchar', 2,
+sub
+#line 81 "JSONNospace.yp"
+{ return $_[2] }
+ ],
+ [#Rule 28
+ 'escapedchar', 2,
+sub
+#line 82 "JSONNospace.yp"
+{ return $_[2] }
+ ],
+ [#Rule 29
+ 'escapedchar', 2,
+sub
+#line 83 "JSONNospace.yp"
+{ return $_[2] }
+ ],
+ [#Rule 30
+ 'escapedchar', 2,
+sub
+#line 84 "JSONNospace.yp"
{ return $_[2] }
+ ],
+ [#Rule 31
+ 'escapedchar', 2,
+sub
+#line 85 "JSONNospace.yp"
+{ return $_[2] }
+ ],
+ [#Rule 32
+ 'escapedchar', 2,
+sub
+#line 86 "JSONNospace.yp"
+{ return $_[2] }
+ ],
+ [#Rule 33
+ 'escapedchar', 2,
+sub
+#line 87 "JSONNospace.yp"
+{ return encode("utf8", pack("U", hex(substr($_[2],1)))) }
+ ],
+ [#Rule 34
+ 'escapedchar', 2,
+sub
+#line 88 "JSONNospace.yp"
+{
+ if ($_[2] eq 'n') {
+ return "\n";
+ } elsif ($_[2] eq 'r') {
+ return "\r";
+ } else {
+ return $_[2];
+ }}
+ ],
+ [#Rule 35
+ 'char', 1, undef
+ ],
+ [#Rule 36
+ 'char', 1, undef
+ ],
+ [#Rule 37
+ 'char', 1, undef
+ ],
+ [#Rule 38
+ 'char', 1, undef
+ ],
+ [#Rule 39
+ 'char', 1, undef
+ ],
+ [#Rule 40
+ 'char', 1, undef
+ ],
+ [#Rule 41
+ 'char', 1, undef
+ ],
+ [#Rule 42
+ 'char', 1, undef
+ ],
+ [#Rule 43
+ 'char', 1, undef
+ ],
+ [#Rule 44
+ 'char', 1, undef
+ ],
+ [#Rule 45
+ 'char', 1, undef
+ ],
+ [#Rule 46
+ 'char', 1, undef
]
],
@_);
bless($self,$class);
}
-#line 15 "jsarray.yp"
+#line 111 "JSONNospace.yp"
1;
--- /dev/null
+/*
+ * This file contains a simple and quite possible wrong parser for
+ * JSON encoded structures.
+ *
+ * It does not support white space in quoted strings, those are silently
+ * dropped.
+ *
+ * It has primarily been written to parse video parameters from the
+ * DailyMotion site
+ *
+ * Token definitions:
+ * COLON => ':',
+ * QUOTE => '\"',
+ * SINGLEQUOTE => '\\\'',
+ * TRUE => 'true',
+ * FALSE => 'false',
+ * NULL => 'null',
+ * QUADHEX => 'u[0-9a-fA-F]{4}',
+ * INTEGER => '[0-9]+',
+ * QUOTEDNORMAL => '[nr]',
+ * SIMPLECHAR => '[-a-zA-Z\._\?\+=\&\!%<>]+',
+ * BACKSLASH => '\\\\',
+ * SLASH => '/',
+ * COMMA => ',',
+ * CURLYOPEN => '{',
+ * CURLYCLOSE => '}',
+ * SQUAREOPEN => '\[',
+ * SQUARECLOSE => '\]',
+ */
+
+%{
+use Encode;
+%}
+%%
+
+basicstream:
+ basictype { return [ $_[1] ] } |
+ basicstream COMMA basictype { return [ @{$_[1]}, $_[3] ] };
+
+basictype:
+ hash |
+ array |
+ quotestring |
+ boolean;
+
+boolean:
+ TRUE { return 1 } |
+ FALSE { return 0 };
+
+array:
+ SQUAREOPEN SQUARECLOSE { return [] } |
+ SQUAREOPEN basicstream SQUARECLOSE { return [ @{$_[2]} ] };
+
+hash:
+ CURLYOPEN CURLYCLOSE { return { } } |
+ CURLYOPEN kvstream CURLYCLOSE { return { @{$_[2]} } };
+
+kvstream:
+ kvpair |
+ kvstream COMMA kvpair { return [ @{$_[1]}, @{$_[3]} ] };
+
+kvpair:
+ quotestring COLON quotestring { return [ $_[1], $_[3] ] } |
+ quotestring COLON hash { return [ $_[1], $_[3] ] } |
+ quotestring COLON array { return [ $_[1], $_[3] ] } |
+ quotestring COLON boolean { return [ $_[1], $_[3] ] } |
+ quotestring COLON NULL { return [ $_[1], undef ] } |
+ quotestring COLON INTEGER { return [ $_[1], $_[3] ] };
+
+quotestring:
+ QUOTE QUOTE { return "" } |
+ QUOTE charstream QUOTE { return $_[2] };
+
+charstream:
+ char |
+ charstream char { return $_[1] . $_[2] };
+
+escapedchar:
+ BACKSLASH CURLYOPEN { return $_[2] } |
+ BACKSLASH CURLYCLOSE { return $_[2] } |
+ BACKSLASH SQUAREOPEN { return $_[2] } |
+ BACKSLASH SQUARECLOSE { return $_[2] } |
+ BACKSLASH QUOTE { return $_[2] } |
+ BACKSLASH SINGLEQUOTE { return $_[2] } |
+ BACKSLASH SLASH { return $_[2] } |
+ BACKSLASH BACKSLASH { return $_[2] } |
+ BACKSLASH QUADHEX { return encode("utf8", pack("U", hex(substr($_[2],1)))) } |
+ BACKSLASH QUOTEDNORMAL {
+ if ($_[2] eq 'n') {
+ return "\n";
+ } elsif ($_[2] eq 'r') {
+ return "\r";
+ } else {
+ return $_[2];
+ }};
+
+char:
+ QUOTEDNORMAL |
+ U |
+ QUADHEX |
+ SIMPLECHAR |
+ INTEGER |
+ COLON |
+ COMMA |
+ SINGLEQUOTE |
+ TRUE |
+ FALSE |
+ NULL |
+ escapedchar;
+
+%%
+++ /dev/null
-%%
-array:
- LEFTC kvlist RIGHTC { return $_[2] };
-
-kvlist:
- kvlist COMMA kvpair { return [ @{$_[1]}, @{$_[3]} ] } |
- kvpair;
-
-kvpair:
- quotestring COLON quotestring { return [ $_[1], $_[3] ] };
-
-quotestring:
- QUOTE QUOTE { return "" } |
- QUOTE ID QUOTE { return $_[2] };
-%%