Add a new JSON parser, replacing jsarray. This parser
authorRalf Ertzinger <ralf@skytale.net>
Sat, 17 Apr 2010 16:21:10 +0000 (18:21 +0200)
committerRalf Ertzinger <ralf@skytale.net>
Sat, 17 Apr 2010 16:21:10 +0000 (18:21 +0200)
can do all jsarray could, and much more.

videosite/JSLexArrayParser.pm
videosite/JSONNospace.pm [moved from videosite/jsarray.pm with 63% similarity]
videosite/JSONNospace.yp [new file with mode: 0644]
videosite/jsarray.yp [deleted file]

index f250d61..73a8d7e 100644 (file)
@@ -1,9 +1,9 @@
 #
-# A helper class for parsing textual JS hashes into perl 
-# hashes
+# A helper class for parsing textual JSON structures into perl 
+# structures
 #
-# The parser is in jsarray.yp, to regenerate you'll need the Parse::YAPP
-# package. Use 'yapp -m videosite::jsarray -s jsarray.yp' to regenerate
+# The parser is in JSONNospace.yp, to regenerate you'll need the Parse::YAPP
+# package. Use 'yapp -m videosite::JSONNospace -s JSONNospace.yp' to regenerate
 #
 
 package videosite::JSLexArrayParser;
@@ -12,23 +12,35 @@ use videosite::JSArrayParser;
 @ISA = qw(videosite::JSArrayParser);
 
 use Parse::Lex;
-use videosite::jsarray;
+use videosite::JSONNospace;
+use Data::Dumper;
 use strict;
 
 my @tokens = (
-    COLON  => '[:]',
-    RIGHTC => '[\}]',
-    LEFTC => '[\{]',
-    QUOTE => '[\"]',
-    COMMA => '[,]',
-    ID =>    '[\w_%\.\+-]+'
+    COLON => ':',
+    QUOTE => '\"',
+    SINGLEQUOTE => '\\\'',
+    TRUE => 'true',
+    FALSE => 'false',
+    NULL => 'null',
+    QUADHEX => 'u[0-9a-fA-F]{4}',
+    INTEGER => '[0-9]+',
+    QUOTEDNORMAL => '[nr]',
+    SIMPLECHAR => '[-\w\._\?\+=\&\!%<>;]+',
+    BACKSLASH => '\\\\',
+    SLASH => '/',
+    COMMA => ',',
+    CURLYOPEN => '{',
+    CURLYCLOSE => '}',
+    SQUAREOPEN => '\[',
+    SQUARECLOSE => '\]',
 );
 
 sub new {
     my $class = shift;
     my $self = $class->SUPER::new();
 
-    $self->{'_PARSER'} = videosite::jsarray->new();
+    $self->{'_PARSER'} = videosite::JSONNospace->new();
     $self->{'_LEXER'} = Parse::Lex->new(@tokens);
 
     return bless($self, $class);
@@ -37,11 +49,11 @@ sub new {
 sub parse {
     my $self = shift;
     my $s = shift;
-    my @result;
+    my $result;
     my $l = $self->{'_LEXER'};
 
     $l->from($s);
-    @result = $self->{'_PARSER'}->YYParse(
+    $result = $self->{'_PARSER'}->YYParse(
         yylex => sub {
             my $tok = $l->next();
             return ('', undef) unless $tok;
@@ -52,7 +64,7 @@ sub parse {
             $_[0]->YYAbort();
         },
         yydebug => 0x0);
-    return $result[0]?{@{$result[0]}}:undef;
+    return ref($result)?$result->[0]:$result;
 }
 
 1;
similarity index 63%
rename from videosite/jsarray.pm
rename to videosite/JSONNospace.pm
index 6f9075a..2c18cb9 100644 (file)
@@ -7,7 +7,7 @@
 #             ANY CHANGE MADE HERE WILL BE LOST !
 #
 ####################################################################
-package videosite::jsarray;
+package videosite::JSONNospace;
 use vars qw ( @ISA );
 use strict;
 
@@ -489,6 +489,9 @@ sub _Parse {
 #End of include--------------------------------------------------
 
 
+#line 31 "JSONNospace.yp"
+
+use Encode;
 
 
 sub new {
@@ -501,86 +504,312 @@ sub new {
 [
        {#State 0
                ACTIONS => {
-                       'LEFTC' => 2
+                       'TRUE' => 1,
+                       'FALSE' => 10,
+                       'CURLYOPEN' => 2,
+                       'SQUAREOPEN' => 11,
+                       'QUOTE' => 3
                },
                GOTOS => {
-                       'array' => 1
+                       'boolean' => 8,
+                       'array' => 7,
+                       'hash' => 6,
+                       'basicstream' => 5,
+                       'quotestring' => 9,
+                       'basictype' => 4
                }
        },
        {#State 1
-               ACTIONS => {
-                       '' => 3
-               }
+               DEFAULT => -7
        },
        {#State 2
                ACTIONS => {
-                       'QUOTE' => 7
+                       'CURLYCLOSE' => 14,
+                       'QUOTE' => 3
                },
                GOTOS => {
-                       'quotestring' => 4,
-                       'kvpair' => 5,
-                       'kvlist' => 6
+                       'quotestring' => 15,
+                       'kvstream' => 12,
+                       'kvpair' => 13
                }
        },
        {#State 3
-               DEFAULT => 0
+               ACTIONS => {
+                       'SIMPLECHAR' => 17,
+                       'INTEGER' => 16,
+                       'TRUE' => 19,
+                       'NULL' => 20,
+                       'COLON' => 21,
+                       'BACKSLASH' => 23,
+                       'COMMA' => 22,
+                       'QUOTE' => 25,
+                       'SINGLEQUOTE' => 26,
+                       'FALSE' => 27,
+                       'QUOTEDNORMAL' => 31,
+                       'U' => 30,
+                       'QUADHEX' => 29
+               },
+               GOTOS => {
+                       'charstream' => 18,
+                       'char' => 28,
+                       'escapedchar' => 24
+               }
        },
        {#State 4
+               DEFAULT => -1
+       },
+       {#State 5
                ACTIONS => {
-                       'COLON' => 8
+                       '' => 32,
+                       'COMMA' => 33
                }
        },
-       {#State 5
+       {#State 6
                DEFAULT => -3
        },
-       {#State 6
+       {#State 7
+               DEFAULT => -4
+       },
+       {#State 8
+               DEFAULT => -6
+       },
+       {#State 9
+               DEFAULT => -5
+       },
+       {#State 10
+               DEFAULT => -8
+       },
+       {#State 11
                ACTIONS => {
-                       'COMMA' => 9,
-                       'RIGHTC' => 10
+                       'TRUE' => 1,
+                       'FALSE' => 10,
+                       'SQUARECLOSE' => 35,
+                       'CURLYOPEN' => 2,
+                       'SQUAREOPEN' => 11,
+                       'QUOTE' => 3
+               },
+               GOTOS => {
+                       'basicstream' => 34,
+                       'hash' => 6,
+                       'array' => 7,
+                       'boolean' => 8,
+                       'quotestring' => 9,
+                       'basictype' => 4
                }
        },
-       {#State 7
+       {#State 12
                ACTIONS => {
-                       'ID' => 11,
-                       'QUOTE' => 12
+                       'CURLYCLOSE' => 37,
+                       'COMMA' => 36
                }
        },
-       {#State 8
+       {#State 13
+               DEFAULT => -13
+       },
+       {#State 14
+               DEFAULT => -11
+       },
+       {#State 15
                ACTIONS => {
-                       'QUOTE' => 7
+                       'COLON' => 38
+               }
+       },
+       {#State 16
+               DEFAULT => -39
+       },
+       {#State 17
+               DEFAULT => -38
+       },
+       {#State 18
+               ACTIONS => {
+                       'SIMPLECHAR' => 17,
+                       'INTEGER' => 16,
+                       'TRUE' => 19,
+                       'NULL' => 20,
+                       'COLON' => 21,
+                       'BACKSLASH' => 23,
+                       'COMMA' => 22,
+                       'QUOTE' => 39,
+                       'SINGLEQUOTE' => 26,
+                       'FALSE' => 27,
+                       'QUOTEDNORMAL' => 31,
+                       'U' => 30,
+                       'QUADHEX' => 29
                },
                GOTOS => {
-                       'quotestring' => 13
+                       'char' => 40,
+                       'escapedchar' => 24
                }
        },
-       {#State 9
+       {#State 19
+               DEFAULT => -43
+       },
+       {#State 20
+               DEFAULT => -45
+       },
+       {#State 21
+               DEFAULT => -40
+       },
+       {#State 22
+               DEFAULT => -41
+       },
+       {#State 23
+               ACTIONS => {
+                       'BACKSLASH' => 41,
+                       'CURLYOPEN' => 43,
+                       'SLASH' => 42,
+                       'QUOTE' => 44,
+                       'SINGLEQUOTE' => 45,
+                       'CURLYCLOSE' => 46,
+                       'SQUARECLOSE' => 47,
+                       'SQUAREOPEN' => 50,
+                       'QUADHEX' => 49,
+                       'QUOTEDNORMAL' => 48
+               }
+       },
+       {#State 24
+               DEFAULT => -46
+       },
+       {#State 25
+               DEFAULT => -21
+       },
+       {#State 26
+               DEFAULT => -42
+       },
+       {#State 27
+               DEFAULT => -44
+       },
+       {#State 28
+               DEFAULT => -23
+       },
+       {#State 29
+               DEFAULT => -37
+       },
+       {#State 30
+               DEFAULT => -36
+       },
+       {#State 31
+               DEFAULT => -35
+       },
+       {#State 32
+               DEFAULT => 0
+       },
+       {#State 33
                ACTIONS => {
-                       'QUOTE' => 7
+                       'TRUE' => 1,
+                       'FALSE' => 10,
+                       'CURLYOPEN' => 2,
+                       'SQUAREOPEN' => 11,
+                       'QUOTE' => 3
                },
                GOTOS => {
-                       'quotestring' => 4,
-                       'kvpair' => 14
+                       'hash' => 6,
+                       'array' => 7,
+                       'boolean' => 8,
+                       'quotestring' => 9,
+                       'basictype' => 51
                }
        },
-       {#State 10
-               DEFAULT => -1
+       {#State 34
+               ACTIONS => {
+                       'SQUARECLOSE' => 52,
+                       'COMMA' => 33
+               }
        },
-       {#State 11
+       {#State 35
+               DEFAULT => -9
+       },
+       {#State 36
                ACTIONS => {
-                       'QUOTE' => 15
+                       'QUOTE' => 3
+               },
+               GOTOS => {
+                       'quotestring' => 15,
+                       'kvpair' => 53
                }
        },
-       {#State 12
-               DEFAULT => -5
+       {#State 37
+               DEFAULT => -12
        },
-       {#State 13
-               DEFAULT => -4
+       {#State 38
+               ACTIONS => {
+                       'INTEGER' => 54,
+                       'TRUE' => 1,
+                       'NULL' => 55,
+                       'FALSE' => 10,
+                       'CURLYOPEN' => 2,
+                       'SQUAREOPEN' => 11,
+                       'QUOTE' => 3
+               },
+               GOTOS => {
+                       'hash' => 58,
+                       'array' => 57,
+                       'boolean' => 56,
+                       'quotestring' => 59
+               }
        },
-       {#State 14
+       {#State 39
+               DEFAULT => -22
+       },
+       {#State 40
+               DEFAULT => -24
+       },
+       {#State 41
+               DEFAULT => -32
+       },
+       {#State 42
+               DEFAULT => -31
+       },
+       {#State 43
+               DEFAULT => -25
+       },
+       {#State 44
+               DEFAULT => -29
+       },
+       {#State 45
+               DEFAULT => -30
+       },
+       {#State 46
+               DEFAULT => -26
+       },
+       {#State 47
+               DEFAULT => -28
+       },
+       {#State 48
+               DEFAULT => -34
+       },
+       {#State 49
+               DEFAULT => -33
+       },
+       {#State 50
+               DEFAULT => -27
+       },
+       {#State 51
                DEFAULT => -2
        },
-       {#State 15
-               DEFAULT => -6
+       {#State 52
+               DEFAULT => -10
+       },
+       {#State 53
+               DEFAULT => -14
+       },
+       {#State 54
+               DEFAULT => -20
+       },
+       {#State 55
+               DEFAULT => -19
+       },
+       {#State 56
+               DEFAULT => -18
+       },
+       {#State 57
+               DEFAULT => -17
+       },
+       {#State 58
+               DEFAULT => -16
+       },
+       {#State 59
+               DEFAULT => -15
        }
 ],
                                   yyrules  =>
@@ -589,44 +818,240 @@ sub new {
                 '$start', 2, undef
        ],
        [#Rule 1
-                'array', 3,
+                'basicstream', 1,
 sub
-#line 3 "jsarray.yp"
-{ return $_[2] }
+#line 37 "JSONNospace.yp"
+{ return [ $_[1] ] }
        ],
        [#Rule 2
-                'kvlist', 3,
+                'basicstream', 3,
 sub
-#line 6 "jsarray.yp"
-{ return [ @{$_[1]}, @{$_[3]} ] }
+#line 38 "JSONNospace.yp"
+{ return [ @{$_[1]}, $_[3] ] }
        ],
        [#Rule 3
-                'kvlist', 1, undef
+                'basictype', 1, undef
        ],
        [#Rule 4
+                'basictype', 1, undef
+       ],
+       [#Rule 5
+                'basictype', 1, undef
+       ],
+       [#Rule 6
+                'basictype', 1, undef
+       ],
+       [#Rule 7
+                'boolean', 1,
+sub
+#line 47 "JSONNospace.yp"
+{ return 1 }
+       ],
+       [#Rule 8
+                'boolean', 1,
+sub
+#line 48 "JSONNospace.yp"
+{ return 0 }
+       ],
+       [#Rule 9
+                'array', 2,
+sub
+#line 51 "JSONNospace.yp"
+{ return [] }
+       ],
+       [#Rule 10
+                'array', 3,
+sub
+#line 52 "JSONNospace.yp"
+{ return [ @{$_[2]} ] }
+       ],
+       [#Rule 11
+                'hash', 2,
+sub
+#line 55 "JSONNospace.yp"
+{ return { } }
+       ],
+       [#Rule 12
+                'hash', 3,
+sub
+#line 56 "JSONNospace.yp"
+{ return { @{$_[2]} } }
+       ],
+       [#Rule 13
+                'kvstream', 1, undef
+       ],
+       [#Rule 14
+                'kvstream', 3,
+sub
+#line 60 "JSONNospace.yp"
+{ return [ @{$_[1]}, @{$_[3]} ] }
+       ],
+       [#Rule 15
                 'kvpair', 3,
 sub
-#line 10 "jsarray.yp"
+#line 63 "JSONNospace.yp"
 { return [ $_[1], $_[3] ] }
        ],
-       [#Rule 5
+       [#Rule 16
+                'kvpair', 3,
+sub
+#line 64 "JSONNospace.yp"
+{ return [ $_[1], $_[3] ] }
+       ],
+       [#Rule 17
+                'kvpair', 3,
+sub
+#line 65 "JSONNospace.yp"
+{ return [ $_[1], $_[3] ] }
+       ],
+       [#Rule 18
+                'kvpair', 3,
+sub
+#line 66 "JSONNospace.yp"
+{ return [ $_[1], $_[3] ] }
+       ],
+       [#Rule 19
+                'kvpair', 3,
+sub
+#line 67 "JSONNospace.yp"
+{ return [ $_[1], undef ] }
+       ],
+       [#Rule 20
+                'kvpair', 3,
+sub
+#line 68 "JSONNospace.yp"
+{ return [ $_[1], $_[3] ] }
+       ],
+       [#Rule 21
                 'quotestring', 2,
 sub
-#line 13 "jsarray.yp"
+#line 71 "JSONNospace.yp"
 { return "" }
        ],
-       [#Rule 6
+       [#Rule 22
                 'quotestring', 3,
 sub
-#line 14 "jsarray.yp"
+#line 72 "JSONNospace.yp"
+{ return $_[2] }
+       ],
+       [#Rule 23
+                'charstream', 1, undef
+       ],
+       [#Rule 24
+                'charstream', 2,
+sub
+#line 76 "JSONNospace.yp"
+{ return $_[1] . $_[2] }
+       ],
+       [#Rule 25
+                'escapedchar', 2,
+sub
+#line 79 "JSONNospace.yp"
+{ return $_[2] }
+       ],
+       [#Rule 26
+                'escapedchar', 2,
+sub
+#line 80 "JSONNospace.yp"
+{ return $_[2] }
+       ],
+       [#Rule 27
+                'escapedchar', 2,
+sub
+#line 81 "JSONNospace.yp"
+{ return $_[2] }
+       ],
+       [#Rule 28
+                'escapedchar', 2,
+sub
+#line 82 "JSONNospace.yp"
+{ return $_[2] }
+       ],
+       [#Rule 29
+                'escapedchar', 2,
+sub
+#line 83 "JSONNospace.yp"
+{ return $_[2] }
+       ],
+       [#Rule 30
+                'escapedchar', 2,
+sub
+#line 84 "JSONNospace.yp"
 { return $_[2] }
+       ],
+       [#Rule 31
+                'escapedchar', 2,
+sub
+#line 85 "JSONNospace.yp"
+{ return $_[2] }
+       ],
+       [#Rule 32
+                'escapedchar', 2,
+sub
+#line 86 "JSONNospace.yp"
+{ return $_[2] }
+       ],
+       [#Rule 33
+                'escapedchar', 2,
+sub
+#line 87 "JSONNospace.yp"
+{ return encode("utf8", pack("U", hex(substr($_[2],1)))) }
+       ],
+       [#Rule 34
+                'escapedchar', 2,
+sub
+#line 88 "JSONNospace.yp"
+{
+        if ($_[2] eq 'n') {
+            return "\n";
+        } elsif ($_[2] eq 'r') {
+            return "\r";
+        } else {
+            return $_[2];
+        }}
+       ],
+       [#Rule 35
+                'char', 1, undef
+       ],
+       [#Rule 36
+                'char', 1, undef
+       ],
+       [#Rule 37
+                'char', 1, undef
+       ],
+       [#Rule 38
+                'char', 1, undef
+       ],
+       [#Rule 39
+                'char', 1, undef
+       ],
+       [#Rule 40
+                'char', 1, undef
+       ],
+       [#Rule 41
+                'char', 1, undef
+       ],
+       [#Rule 42
+                'char', 1, undef
+       ],
+       [#Rule 43
+                'char', 1, undef
+       ],
+       [#Rule 44
+                'char', 1, undef
+       ],
+       [#Rule 45
+                'char', 1, undef
+       ],
+       [#Rule 46
+                'char', 1, undef
        ]
 ],
                                   @_);
     bless($self,$class);
 }
 
-#line 15 "jsarray.yp"
+#line 111 "JSONNospace.yp"
 
 
 1;
diff --git a/videosite/JSONNospace.yp b/videosite/JSONNospace.yp
new file mode 100644 (file)
index 0000000..384eda3
--- /dev/null
@@ -0,0 +1,111 @@
+/*
+ * This file contains a simple and quite possible wrong parser for
+ * JSON encoded structures.
+ *
+ * It does not support white space in quoted strings, those are silently
+ * dropped.
+ *
+ * It has primarily been written to parse video parameters from the
+ * DailyMotion site
+ *
+ * Token definitions:
+ *  COLON => ':',
+ *  QUOTE => '\"',
+ *  SINGLEQUOTE => '\\\'',
+ *  TRUE => 'true',
+ *  FALSE => 'false',
+ *  NULL => 'null',
+ *  QUADHEX => 'u[0-9a-fA-F]{4}',
+ *  INTEGER => '[0-9]+',
+ *  QUOTEDNORMAL => '[nr]',
+ *  SIMPLECHAR => '[-a-zA-Z\._\?\+=\&\!%<>]+',
+ *  BACKSLASH => '\\\\',
+ *  SLASH => '/',
+ *  COMMA => ',',
+ *  CURLYOPEN => '{',
+ *  CURLYCLOSE => '}',
+ *  SQUAREOPEN => '\[',
+ *  SQUARECLOSE => '\]',
+ */
+
+%{
+use Encode;
+%}
+%%
+    
+basicstream:
+    basictype { return [ $_[1] ] } |
+    basicstream COMMA basictype { return [ @{$_[1]}, $_[3] ] };
+
+basictype:
+    hash |
+    array |
+    quotestring |
+    boolean;
+
+boolean:
+    TRUE { return 1 } |
+    FALSE { return 0 };
+
+array:
+    SQUAREOPEN SQUARECLOSE { return [] } |
+    SQUAREOPEN basicstream SQUARECLOSE { return [ @{$_[2]} ] };
+
+hash:
+    CURLYOPEN CURLYCLOSE { return { } } |
+    CURLYOPEN kvstream CURLYCLOSE { return { @{$_[2]} } };
+
+kvstream:
+    kvpair |
+    kvstream COMMA kvpair { return [ @{$_[1]}, @{$_[3]} ] };
+
+kvpair:
+    quotestring COLON quotestring { return [ $_[1], $_[3] ] } |
+    quotestring COLON hash { return [ $_[1], $_[3] ] } |
+    quotestring COLON array { return [ $_[1], $_[3] ] } |
+    quotestring COLON boolean { return [ $_[1], $_[3] ] } |
+    quotestring COLON NULL { return [ $_[1], undef ] } |
+    quotestring COLON INTEGER { return [ $_[1], $_[3] ] };
+
+quotestring:
+    QUOTE QUOTE { return "" } | 
+    QUOTE charstream QUOTE { return $_[2] };
+
+charstream:
+    char |
+    charstream char { return $_[1] . $_[2] };
+
+escapedchar:
+    BACKSLASH CURLYOPEN { return $_[2] } |
+    BACKSLASH CURLYCLOSE { return $_[2] } |
+    BACKSLASH SQUAREOPEN { return $_[2] } |
+    BACKSLASH SQUARECLOSE { return $_[2] } |
+    BACKSLASH QUOTE { return $_[2] } |
+    BACKSLASH SINGLEQUOTE { return $_[2] } |
+    BACKSLASH SLASH { return $_[2] } |
+    BACKSLASH BACKSLASH { return $_[2] } |
+    BACKSLASH QUADHEX { return encode("utf8", pack("U", hex(substr($_[2],1)))) } |
+    BACKSLASH QUOTEDNORMAL {
+        if ($_[2] eq 'n') {
+            return "\n";
+        } elsif ($_[2] eq 'r') {
+            return "\r";
+        } else {
+            return $_[2];
+        }};
+
+char:
+    QUOTEDNORMAL |
+    U |
+    QUADHEX |
+    SIMPLECHAR | 
+    INTEGER |
+    COLON |
+    COMMA |
+    SINGLEQUOTE |
+    TRUE |
+    FALSE |
+    NULL |
+    escapedchar;
+
+%%
diff --git a/videosite/jsarray.yp b/videosite/jsarray.yp
deleted file mode 100644 (file)
index 3b32aca..0000000
+++ /dev/null
@@ -1,15 +0,0 @@
-%%
-array:
-    LEFTC kvlist RIGHTC { return $_[2] };
-
-kvlist:
-    kvlist COMMA kvpair { return [ @{$_[1]}, @{$_[3]} ] } |
-    kvpair;
-
-kvpair:
-    quotestring COLON quotestring { return [ $_[1], $_[3] ] };
-
-quotestring:
-    QUOTE QUOTE { return "" } |
-    QUOTE ID QUOTE { return $_[2] };
-%%