From c41cdd5161342c69724537a5094fb9e270f48c55 Mon Sep 17 00:00:00 2001 From: Ralf Ertzinger Date: Sat, 17 Apr 2010 18:21:10 +0200 Subject: [PATCH] Add a new JSON parser, replacing jsarray. This parser can do all jsarray could, and much more. --- videosite/JSLexArrayParser.pm | 42 ++- videosite/{jsarray.pm => JSONNospace.pm} | 525 ++++++++++++++++++++++++++++--- videosite/JSONNospace.yp | 111 +++++++ videosite/jsarray.yp | 15 - 4 files changed, 613 insertions(+), 80 deletions(-) rename videosite/{jsarray.pm => JSONNospace.pm} (63%) create mode 100644 videosite/JSONNospace.yp delete mode 100644 videosite/jsarray.yp diff --git a/videosite/JSLexArrayParser.pm b/videosite/JSLexArrayParser.pm index f250d61..73a8d7e 100644 --- a/videosite/JSLexArrayParser.pm +++ b/videosite/JSLexArrayParser.pm @@ -1,9 +1,9 @@ # -# A helper class for parsing textual JS hashes into perl -# hashes +# A helper class for parsing textual JSON structures into perl +# structures # -# The parser is in jsarray.yp, to regenerate you'll need the Parse::YAPP -# package. Use 'yapp -m videosite::jsarray -s jsarray.yp' to regenerate +# The parser is in JSONNospace.yp, to regenerate you'll need the Parse::YAPP +# package. Use 'yapp -m videosite::JSONNospace -s JSONNospace.yp' to regenerate # package videosite::JSLexArrayParser; @@ -12,23 +12,35 @@ use videosite::JSArrayParser; @ISA = qw(videosite::JSArrayParser); use Parse::Lex; -use videosite::jsarray; +use videosite::JSONNospace; +use Data::Dumper; use strict; my @tokens = ( - COLON => '[:]', - RIGHTC => '[\}]', - LEFTC => '[\{]', - QUOTE => '[\"]', - COMMA => '[,]', - ID => '[\w_%\.\+-]+' + COLON => ':', + QUOTE => '\"', + SINGLEQUOTE => '\\\'', + TRUE => 'true', + FALSE => 'false', + NULL => 'null', + QUADHEX => 'u[0-9a-fA-F]{4}', + INTEGER => '[0-9]+', + QUOTEDNORMAL => '[nr]', + SIMPLECHAR => '[-\w\._\?\+=\&\!%<>;]+', + BACKSLASH => '\\\\', + SLASH => '/', + COMMA => ',', + CURLYOPEN => '{', + CURLYCLOSE => '}', + SQUAREOPEN => '\[', + SQUARECLOSE => '\]', ); sub new { my $class = shift; my $self = $class->SUPER::new(); - $self->{'_PARSER'} = videosite::jsarray->new(); + $self->{'_PARSER'} = videosite::JSONNospace->new(); $self->{'_LEXER'} = Parse::Lex->new(@tokens); return bless($self, $class); @@ -37,11 +49,11 @@ sub new { sub parse { my $self = shift; my $s = shift; - my @result; + my $result; my $l = $self->{'_LEXER'}; $l->from($s); - @result = $self->{'_PARSER'}->YYParse( + $result = $self->{'_PARSER'}->YYParse( yylex => sub { my $tok = $l->next(); return ('', undef) unless $tok; @@ -52,7 +64,7 @@ sub parse { $_[0]->YYAbort(); }, yydebug => 0x0); - return $result[0]?{@{$result[0]}}:undef; + return ref($result)?$result->[0]:$result; } 1; diff --git a/videosite/jsarray.pm b/videosite/JSONNospace.pm similarity index 63% rename from videosite/jsarray.pm rename to videosite/JSONNospace.pm index 6f9075a..2c18cb9 100644 --- a/videosite/jsarray.pm +++ b/videosite/JSONNospace.pm @@ -7,7 +7,7 @@ # ANY CHANGE MADE HERE WILL BE LOST ! # #################################################################### -package videosite::jsarray; +package videosite::JSONNospace; use vars qw ( @ISA ); use strict; @@ -489,6 +489,9 @@ sub _Parse { #End of include-------------------------------------------------- +#line 31 "JSONNospace.yp" + +use Encode; sub new { @@ -501,86 +504,312 @@ sub new { [ {#State 0 ACTIONS => { - 'LEFTC' => 2 + 'TRUE' => 1, + 'FALSE' => 10, + 'CURLYOPEN' => 2, + 'SQUAREOPEN' => 11, + 'QUOTE' => 3 }, GOTOS => { - 'array' => 1 + 'boolean' => 8, + 'array' => 7, + 'hash' => 6, + 'basicstream' => 5, + 'quotestring' => 9, + 'basictype' => 4 } }, {#State 1 - ACTIONS => { - '' => 3 - } + DEFAULT => -7 }, {#State 2 ACTIONS => { - 'QUOTE' => 7 + 'CURLYCLOSE' => 14, + 'QUOTE' => 3 }, GOTOS => { - 'quotestring' => 4, - 'kvpair' => 5, - 'kvlist' => 6 + 'quotestring' => 15, + 'kvstream' => 12, + 'kvpair' => 13 } }, {#State 3 - DEFAULT => 0 + ACTIONS => { + 'SIMPLECHAR' => 17, + 'INTEGER' => 16, + 'TRUE' => 19, + 'NULL' => 20, + 'COLON' => 21, + 'BACKSLASH' => 23, + 'COMMA' => 22, + 'QUOTE' => 25, + 'SINGLEQUOTE' => 26, + 'FALSE' => 27, + 'QUOTEDNORMAL' => 31, + 'U' => 30, + 'QUADHEX' => 29 + }, + GOTOS => { + 'charstream' => 18, + 'char' => 28, + 'escapedchar' => 24 + } }, {#State 4 + DEFAULT => -1 + }, + {#State 5 ACTIONS => { - 'COLON' => 8 + '' => 32, + 'COMMA' => 33 } }, - {#State 5 + {#State 6 DEFAULT => -3 }, - {#State 6 + {#State 7 + DEFAULT => -4 + }, + {#State 8 + DEFAULT => -6 + }, + {#State 9 + DEFAULT => -5 + }, + {#State 10 + DEFAULT => -8 + }, + {#State 11 ACTIONS => { - 'COMMA' => 9, - 'RIGHTC' => 10 + 'TRUE' => 1, + 'FALSE' => 10, + 'SQUARECLOSE' => 35, + 'CURLYOPEN' => 2, + 'SQUAREOPEN' => 11, + 'QUOTE' => 3 + }, + GOTOS => { + 'basicstream' => 34, + 'hash' => 6, + 'array' => 7, + 'boolean' => 8, + 'quotestring' => 9, + 'basictype' => 4 } }, - {#State 7 + {#State 12 ACTIONS => { - 'ID' => 11, - 'QUOTE' => 12 + 'CURLYCLOSE' => 37, + 'COMMA' => 36 } }, - {#State 8 + {#State 13 + DEFAULT => -13 + }, + {#State 14 + DEFAULT => -11 + }, + {#State 15 ACTIONS => { - 'QUOTE' => 7 + 'COLON' => 38 + } + }, + {#State 16 + DEFAULT => -39 + }, + {#State 17 + DEFAULT => -38 + }, + {#State 18 + ACTIONS => { + 'SIMPLECHAR' => 17, + 'INTEGER' => 16, + 'TRUE' => 19, + 'NULL' => 20, + 'COLON' => 21, + 'BACKSLASH' => 23, + 'COMMA' => 22, + 'QUOTE' => 39, + 'SINGLEQUOTE' => 26, + 'FALSE' => 27, + 'QUOTEDNORMAL' => 31, + 'U' => 30, + 'QUADHEX' => 29 }, GOTOS => { - 'quotestring' => 13 + 'char' => 40, + 'escapedchar' => 24 } }, - {#State 9 + {#State 19 + DEFAULT => -43 + }, + {#State 20 + DEFAULT => -45 + }, + {#State 21 + DEFAULT => -40 + }, + {#State 22 + DEFAULT => -41 + }, + {#State 23 + ACTIONS => { + 'BACKSLASH' => 41, + 'CURLYOPEN' => 43, + 'SLASH' => 42, + 'QUOTE' => 44, + 'SINGLEQUOTE' => 45, + 'CURLYCLOSE' => 46, + 'SQUARECLOSE' => 47, + 'SQUAREOPEN' => 50, + 'QUADHEX' => 49, + 'QUOTEDNORMAL' => 48 + } + }, + {#State 24 + DEFAULT => -46 + }, + {#State 25 + DEFAULT => -21 + }, + {#State 26 + DEFAULT => -42 + }, + {#State 27 + DEFAULT => -44 + }, + {#State 28 + DEFAULT => -23 + }, + {#State 29 + DEFAULT => -37 + }, + {#State 30 + DEFAULT => -36 + }, + {#State 31 + DEFAULT => -35 + }, + {#State 32 + DEFAULT => 0 + }, + {#State 33 ACTIONS => { - 'QUOTE' => 7 + 'TRUE' => 1, + 'FALSE' => 10, + 'CURLYOPEN' => 2, + 'SQUAREOPEN' => 11, + 'QUOTE' => 3 }, GOTOS => { - 'quotestring' => 4, - 'kvpair' => 14 + 'hash' => 6, + 'array' => 7, + 'boolean' => 8, + 'quotestring' => 9, + 'basictype' => 51 } }, - {#State 10 - DEFAULT => -1 + {#State 34 + ACTIONS => { + 'SQUARECLOSE' => 52, + 'COMMA' => 33 + } }, - {#State 11 + {#State 35 + DEFAULT => -9 + }, + {#State 36 ACTIONS => { - 'QUOTE' => 15 + 'QUOTE' => 3 + }, + GOTOS => { + 'quotestring' => 15, + 'kvpair' => 53 } }, - {#State 12 - DEFAULT => -5 + {#State 37 + DEFAULT => -12 }, - {#State 13 - DEFAULT => -4 + {#State 38 + ACTIONS => { + 'INTEGER' => 54, + 'TRUE' => 1, + 'NULL' => 55, + 'FALSE' => 10, + 'CURLYOPEN' => 2, + 'SQUAREOPEN' => 11, + 'QUOTE' => 3 + }, + GOTOS => { + 'hash' => 58, + 'array' => 57, + 'boolean' => 56, + 'quotestring' => 59 + } }, - {#State 14 + {#State 39 + DEFAULT => -22 + }, + {#State 40 + DEFAULT => -24 + }, + {#State 41 + DEFAULT => -32 + }, + {#State 42 + DEFAULT => -31 + }, + {#State 43 + DEFAULT => -25 + }, + {#State 44 + DEFAULT => -29 + }, + {#State 45 + DEFAULT => -30 + }, + {#State 46 + DEFAULT => -26 + }, + {#State 47 + DEFAULT => -28 + }, + {#State 48 + DEFAULT => -34 + }, + {#State 49 + DEFAULT => -33 + }, + {#State 50 + DEFAULT => -27 + }, + {#State 51 DEFAULT => -2 }, - {#State 15 - DEFAULT => -6 + {#State 52 + DEFAULT => -10 + }, + {#State 53 + DEFAULT => -14 + }, + {#State 54 + DEFAULT => -20 + }, + {#State 55 + DEFAULT => -19 + }, + {#State 56 + DEFAULT => -18 + }, + {#State 57 + DEFAULT => -17 + }, + {#State 58 + DEFAULT => -16 + }, + {#State 59 + DEFAULT => -15 } ], yyrules => @@ -589,44 +818,240 @@ sub new { '$start', 2, undef ], [#Rule 1 - 'array', 3, + 'basicstream', 1, sub -#line 3 "jsarray.yp" -{ return $_[2] } +#line 37 "JSONNospace.yp" +{ return [ $_[1] ] } ], [#Rule 2 - 'kvlist', 3, + 'basicstream', 3, sub -#line 6 "jsarray.yp" -{ return [ @{$_[1]}, @{$_[3]} ] } +#line 38 "JSONNospace.yp" +{ return [ @{$_[1]}, $_[3] ] } ], [#Rule 3 - 'kvlist', 1, undef + 'basictype', 1, undef ], [#Rule 4 + 'basictype', 1, undef + ], + [#Rule 5 + 'basictype', 1, undef + ], + [#Rule 6 + 'basictype', 1, undef + ], + [#Rule 7 + 'boolean', 1, +sub +#line 47 "JSONNospace.yp" +{ return 1 } + ], + [#Rule 8 + 'boolean', 1, +sub +#line 48 "JSONNospace.yp" +{ return 0 } + ], + [#Rule 9 + 'array', 2, +sub +#line 51 "JSONNospace.yp" +{ return [] } + ], + [#Rule 10 + 'array', 3, +sub +#line 52 "JSONNospace.yp" +{ return [ @{$_[2]} ] } + ], + [#Rule 11 + 'hash', 2, +sub +#line 55 "JSONNospace.yp" +{ return { } } + ], + [#Rule 12 + 'hash', 3, +sub +#line 56 "JSONNospace.yp" +{ return { @{$_[2]} } } + ], + [#Rule 13 + 'kvstream', 1, undef + ], + [#Rule 14 + 'kvstream', 3, +sub +#line 60 "JSONNospace.yp" +{ return [ @{$_[1]}, @{$_[3]} ] } + ], + [#Rule 15 'kvpair', 3, sub -#line 10 "jsarray.yp" +#line 63 "JSONNospace.yp" { return [ $_[1], $_[3] ] } ], - [#Rule 5 + [#Rule 16 + 'kvpair', 3, +sub +#line 64 "JSONNospace.yp" +{ return [ $_[1], $_[3] ] } + ], + [#Rule 17 + 'kvpair', 3, +sub +#line 65 "JSONNospace.yp" +{ return [ $_[1], $_[3] ] } + ], + [#Rule 18 + 'kvpair', 3, +sub +#line 66 "JSONNospace.yp" +{ return [ $_[1], $_[3] ] } + ], + [#Rule 19 + 'kvpair', 3, +sub +#line 67 "JSONNospace.yp" +{ return [ $_[1], undef ] } + ], + [#Rule 20 + 'kvpair', 3, +sub +#line 68 "JSONNospace.yp" +{ return [ $_[1], $_[3] ] } + ], + [#Rule 21 'quotestring', 2, sub -#line 13 "jsarray.yp" +#line 71 "JSONNospace.yp" { return "" } ], - [#Rule 6 + [#Rule 22 'quotestring', 3, sub -#line 14 "jsarray.yp" +#line 72 "JSONNospace.yp" +{ return $_[2] } + ], + [#Rule 23 + 'charstream', 1, undef + ], + [#Rule 24 + 'charstream', 2, +sub +#line 76 "JSONNospace.yp" +{ return $_[1] . $_[2] } + ], + [#Rule 25 + 'escapedchar', 2, +sub +#line 79 "JSONNospace.yp" +{ return $_[2] } + ], + [#Rule 26 + 'escapedchar', 2, +sub +#line 80 "JSONNospace.yp" +{ return $_[2] } + ], + [#Rule 27 + 'escapedchar', 2, +sub +#line 81 "JSONNospace.yp" +{ return $_[2] } + ], + [#Rule 28 + 'escapedchar', 2, +sub +#line 82 "JSONNospace.yp" +{ return $_[2] } + ], + [#Rule 29 + 'escapedchar', 2, +sub +#line 83 "JSONNospace.yp" +{ return $_[2] } + ], + [#Rule 30 + 'escapedchar', 2, +sub +#line 84 "JSONNospace.yp" { return $_[2] } + ], + [#Rule 31 + 'escapedchar', 2, +sub +#line 85 "JSONNospace.yp" +{ return $_[2] } + ], + [#Rule 32 + 'escapedchar', 2, +sub +#line 86 "JSONNospace.yp" +{ return $_[2] } + ], + [#Rule 33 + 'escapedchar', 2, +sub +#line 87 "JSONNospace.yp" +{ return encode("utf8", pack("U", hex(substr($_[2],1)))) } + ], + [#Rule 34 + 'escapedchar', 2, +sub +#line 88 "JSONNospace.yp" +{ + if ($_[2] eq 'n') { + return "\n"; + } elsif ($_[2] eq 'r') { + return "\r"; + } else { + return $_[2]; + }} + ], + [#Rule 35 + 'char', 1, undef + ], + [#Rule 36 + 'char', 1, undef + ], + [#Rule 37 + 'char', 1, undef + ], + [#Rule 38 + 'char', 1, undef + ], + [#Rule 39 + 'char', 1, undef + ], + [#Rule 40 + 'char', 1, undef + ], + [#Rule 41 + 'char', 1, undef + ], + [#Rule 42 + 'char', 1, undef + ], + [#Rule 43 + 'char', 1, undef + ], + [#Rule 44 + 'char', 1, undef + ], + [#Rule 45 + 'char', 1, undef + ], + [#Rule 46 + 'char', 1, undef ] ], @_); bless($self,$class); } -#line 15 "jsarray.yp" +#line 111 "JSONNospace.yp" 1; diff --git a/videosite/JSONNospace.yp b/videosite/JSONNospace.yp new file mode 100644 index 0000000..384eda3 --- /dev/null +++ b/videosite/JSONNospace.yp @@ -0,0 +1,111 @@ +/* + * This file contains a simple and quite possible wrong parser for + * JSON encoded structures. + * + * It does not support white space in quoted strings, those are silently + * dropped. + * + * It has primarily been written to parse video parameters from the + * DailyMotion site + * + * Token definitions: + * COLON => ':', + * QUOTE => '\"', + * SINGLEQUOTE => '\\\'', + * TRUE => 'true', + * FALSE => 'false', + * NULL => 'null', + * QUADHEX => 'u[0-9a-fA-F]{4}', + * INTEGER => '[0-9]+', + * QUOTEDNORMAL => '[nr]', + * SIMPLECHAR => '[-a-zA-Z\._\?\+=\&\!%<>]+', + * BACKSLASH => '\\\\', + * SLASH => '/', + * COMMA => ',', + * CURLYOPEN => '{', + * CURLYCLOSE => '}', + * SQUAREOPEN => '\[', + * SQUARECLOSE => '\]', + */ + +%{ +use Encode; +%} +%% + +basicstream: + basictype { return [ $_[1] ] } | + basicstream COMMA basictype { return [ @{$_[1]}, $_[3] ] }; + +basictype: + hash | + array | + quotestring | + boolean; + +boolean: + TRUE { return 1 } | + FALSE { return 0 }; + +array: + SQUAREOPEN SQUARECLOSE { return [] } | + SQUAREOPEN basicstream SQUARECLOSE { return [ @{$_[2]} ] }; + +hash: + CURLYOPEN CURLYCLOSE { return { } } | + CURLYOPEN kvstream CURLYCLOSE { return { @{$_[2]} } }; + +kvstream: + kvpair | + kvstream COMMA kvpair { return [ @{$_[1]}, @{$_[3]} ] }; + +kvpair: + quotestring COLON quotestring { return [ $_[1], $_[3] ] } | + quotestring COLON hash { return [ $_[1], $_[3] ] } | + quotestring COLON array { return [ $_[1], $_[3] ] } | + quotestring COLON boolean { return [ $_[1], $_[3] ] } | + quotestring COLON NULL { return [ $_[1], undef ] } | + quotestring COLON INTEGER { return [ $_[1], $_[3] ] }; + +quotestring: + QUOTE QUOTE { return "" } | + QUOTE charstream QUOTE { return $_[2] }; + +charstream: + char | + charstream char { return $_[1] . $_[2] }; + +escapedchar: + BACKSLASH CURLYOPEN { return $_[2] } | + BACKSLASH CURLYCLOSE { return $_[2] } | + BACKSLASH SQUAREOPEN { return $_[2] } | + BACKSLASH SQUARECLOSE { return $_[2] } | + BACKSLASH QUOTE { return $_[2] } | + BACKSLASH SINGLEQUOTE { return $_[2] } | + BACKSLASH SLASH { return $_[2] } | + BACKSLASH BACKSLASH { return $_[2] } | + BACKSLASH QUADHEX { return encode("utf8", pack("U", hex(substr($_[2],1)))) } | + BACKSLASH QUOTEDNORMAL { + if ($_[2] eq 'n') { + return "\n"; + } elsif ($_[2] eq 'r') { + return "\r"; + } else { + return $_[2]; + }}; + +char: + QUOTEDNORMAL | + U | + QUADHEX | + SIMPLECHAR | + INTEGER | + COLON | + COMMA | + SINGLEQUOTE | + TRUE | + FALSE | + NULL | + escapedchar; + +%% diff --git a/videosite/jsarray.yp b/videosite/jsarray.yp deleted file mode 100644 index 3b32aca..0000000 --- a/videosite/jsarray.yp +++ /dev/null @@ -1,15 +0,0 @@ -%% -array: - LEFTC kvlist RIGHTC { return $_[2] }; - -kvlist: - kvlist COMMA kvpair { return [ @{$_[1]}, @{$_[3]} ] } | - kvpair; - -kvpair: - quotestring COLON quotestring { return [ $_[1], $_[3] ] }; - -quotestring: - QUOTE QUOTE { return "" } | - QUOTE ID QUOTE { return $_[2] }; -%% -- 1.8.3.1