/* * JSL JSON Parser, Xan Gregg, 2014-12-29 * pattern matching speedup, Craige Hales, 2015-01-01 * * This file contains functions for converting JSON (http://www.json.org/) * text into either a JSL data structure or a JMP data table. * JSON represents a structure of nested "arrays" and "objects". * JSON values are represented as follows in JSL: * JSON JSL * array list sequence of values * object associative array set of named values * number number * string string * true 1.0 * false 0.0 * null . (missing values) * * When converted to a JMP data table, names represent columns * and array values represent rows. Non-array values are filled * down to match array values. * * Main Functions * * All functions are within the namespace "JSON" to avoid name collisions with * other functions. * * dt = JSON:Make Table( json text | obj ) * obj = JSON:Parse( json text ) * * Example * JSON: {"region":"Europe", "country":[ {"name":"France", "population":66.1}, {"name":"Germany", "population":80.8} ] } * * Converted JSL: Associative Array( {{"country", {["name" => "France", "population" => 66.1], ["name" => "Germany", "population" => 80.8]}}, {"region", "Europe"}} ) * * Note the order of the set elements is not preserved. It * becomes alphabetical. * * Values of the parsed object can be accessed with normal * JSL subscripting. For example, if 'data' holds the parse result: * * data["country"][1]["name"]; // "France" * * Converted Table: country.name country.population region France 66.1 Europe Germany 80.8 Europe * * Note that the region value gets repeated. * * Implementation notes: * This code is not particularly fast or well-tested. Please report issues * to the File Exchange discussion or to xan.gregg@jmp.com. * One large test of a 250K JSON file took over 5 minutes to parse. */ New Namespace("JSON", { Char Escape = Function({c}, Match(c, "\!"", "\!"", "b", "\!u0008", "f", "\!u000C", "n", "\!u000A", "r", "\!u000D", "t", "\!u0009", c ) ); , Unicode Escape = Function( {u}, u = "\!"\!\!u" || u || "\!""; Parse( u ); ); , Pattern Name = "\!"" // leading quotation mark + pattest( value = ""; 1) // init string accumulator + patrepeat( (Pat break( "\!\\!"" ) >> temp + Pat Test( value ||= temp; 1; )) // run of characters that are not \ or " | ("\u"+patlen(4)>>temp+pattest(value ||= JSON:Unicode Escape(temp);1)) // \uXXXX | ("\"+patlen(1)>>temp+pattest(value ||= JSON:Char Escape(temp);1)) // \", for example...embedded quotation mark ) + "\!""; // trailing quotation mark , Pattern Number = patspan("0123456789-+eE.") >> temp + pattest(value=num(temp);1); , starttime=0; , progressTable=0; , progress = Function( {p}, If( Is Empty( JSON:progressTable ), JSON:progressTable = New Table( "JSON Parsing Progress Log", New Column( "Time" ), New Column( "Percent" ) ); JSON:progressTable << Graph Builder( Show Control Panel( 0 ), Variables( X( :Time ), Y( :Percent ) ), Elements( Points( X, Y, Legend( 1 ), Jitter( 1 ) ), Line Of Fit( X, Y, Legend( 3 ), Confidence of Fit( 1 ), Confidence of Prediction( 0 ), Degree( "Linear" ), Equation( 0 ), Root Mean Square Error( 0 ), R²( 0 ) ) ) ); ); JSON:progressTable << addrows( 1 ); JSON:progressTable:time = Tick Seconds() - JSON:starttime; JSON:progressTable:percent = 100 * p; ); , PatternParse = patpos(0) + patrepeat(patpos(/* remember how far we parsed for error message */)>>failpos /* comment out this line for a little more speed */+pattest(if(tickseconds()-lasttime>1,JSON:progress(failpos/totalLen);lasttime=tickseconds();wait(0));1) +patfence(/* fence off previously parsed text...ain't no going back */) +( patregex("\s*") // ignore whitespace | JSON:Pattern Name | ( ":" + pattest(key=value;1) ) | JSON:Pattern Number | ("true" + pattest(value = 1;1)) | ("false" + pattest(value = 0;1)) | ("null" + pattest(value = .;1)) | ("[" + pattest(Insert Into( stack, {{}}, 1 );value = Empty();1)) | ("{" + pattest(Insert Into( stack, Associative Array(), 1 );value = Empty();Insert Into( keys, key, 1 );1)) | ("]" + pattest(If( !Is Empty( value ),Insert Into( stack[1], value, 1 /*needs reverse later*/ ));value = stack[1];value=reverse(value);Remove From( stack, 1 );1)) | ("}" + pattest(If( !Is Empty( value ),stack[1][key] = value);value = stack[1];Remove From( stack, 1 );key = keys[1];Remove From( keys, 1 );1)) | ("," + pattest(If( Is List( stack[1] ),Insert Into( stack[1], value, 1 /*needs reverse later*/ ),stack[1][key] = value);value = Empty();1)) )) + patrpos(0); , Parse = Function( {json}, {stack = {}, keys = {}, key = "", value = Empty(), temp=0,totalLen = length(json),lasttime = tickseconds(),failpos=0}, JSON:starttime = tickseconds(); JSON:progressTable=empty(); if( !try(patmatch(json,JSON:PatternParse),0), Throw( "unrecognized JSON at " || Char( failpos ) )); value; ); , Append Rows = Function( {dt, start, prefix, a}, {key, value, dc, c, nc, r, delta, added = 0}, If( Is List( a ), For( r = 1, r <= Length( a ), r++, delta = JSON:Append Rows( dt, start + added, prefix, a[r] ); added += delta; ); // fill empty cells nc = N Col( dt ); For( c = 1, c <= nc, c++, dc = Column( dt, c ); value = dc[start]; For( r = start + 1, r <= N Row( dt ), r++, If( dc[r] == "", dc[r] = value ) ); );, Is Associative Array( a ), key = a << First; While( !Is Empty( key ), value = a[key]; delta = JSON:Append Rows( dt, start, If( prefix == "", key, prefix || "." || key ), value ); added = Max( added, delta ); key = a << Next( key ); );, // else a literal If( prefix == "", prefix = "v" ); // can only happen in a top array of literals dc = Try( Column( dt, prefix ), dt << New Column( prefix, "Character" ) ); added = 1; // don't count padding If( N Rows( dt ) < start, dt << Add Rows( start - N Rows( dt ) ) ); // fill to end in case a previous column was a JSON array For( r = start, r <= N Rows( dt ), r++, dc[r] = Char( a ) ); ); added; ); , Make Table = Function( {json}, {dt}, If( Is String( json ), json = JSON:Parse( json ) ); dt = New Table(); dt << Begin Data Update(); JSON:Append Rows( dt, N Rows( dt ) + 1, "", json ); dt << End Data Update(); dt; ); }); // end namespace Stop(); // test code beyond this point (ignored by include) Delete Globals(); Namespace("JSON") << show contents; JSON:Char Escape( "t" ); JSON:Unicode Escape( "0041"); butler = JSON:Parse( "\[{"name":"Alfred", "age":42}]\" ); butlers = JSON:Parse( "\[[{"name":"Alfred", "age":42},{"name":"Jeeves", "age":61}]]\" ); JSON:Make Table( butlers ); bdt = JSON:Make Table( "\[[{"name":"Alfred", "age":42},{"name":"Jeeves", "age":61}]]\" ); nums = JSON:Parse( "\[[1,2,12.3, 99e1]]\" ); JSON:Make Table( nums ); b = JSON:Parse( "\[{"name":{"first" : "Alfred", "last":"Smith"}, "age":42}]\" ); JSON:Make Table( b ); b["name"]["last"]; s = "abc"; Substr( s, 1, 1 ); JSON:Make Table( "\[{"name":{"first" : "Alfred", "last":"Smith"}, "age":42}]\" ); b = JSON:Parse( "\[[{"name":{"first" : "Alfred", "last":"Smith"}, "age":[11, 22, 33]}]]\" ); JSON:Make Table( b ); data = JSON:Parse("\[{"region":"Europe", "country":[ {"name":"France", "population":66.1}, {"name":"Germany", "population":80.8} ] }]\"); data["country"][1]["name"]; JSON:Make Table(data); sites = Load Text File( "sites.jsl" ); Length( sites ); t0 = Tick Seconds(); data = JSON:Parse( sites ); Length( data ); t1 = Tick Seconds(); Show( t1 - t0 ); data[1] << Get Keys(); data[1]["censorship_results"][1]; Length( data[2]["censorship_results"] ); JSON:Make Table( data ); // http://stackoverflow.com/questions/13083491/looking-for-big-sample-dummy-json-data-file pointed to... // https://raw.githubusercontent.com/zemirco/sf-city-lots-json/master/citylots.json careful! it is 180MB! sites = Load Text File( "$DESKTOP/citylots.JSON.txt" );Length( sites ); t0 = Tick Seconds(); data = JSON:Parse( sites ); t1 = Tick Seconds(); Show( t1 - t0 ); // about 90 seconds