1 /*
2 Copyright (c) 2018-2021 Timur Gafarov
3 
4 Boost Software License - Version 1.0 - August 17th, 2003
5 
6 Permission is hereby granted, free of charge, to any person or organization
7 obtaining a copy of the software and accompanying documentation covered by
8 this license (the "Software") to use, reproduce, display, distribute,
9 execute, and transmit the Software, and to prepare derivative works of the
10 Software, and to permit third-parties to whom the Software is furnished to
11 do so, all subject to the following:
12 
13 The copyright notices in the Software and this entire statement, including
14 the above license grant, this restriction and the following disclaimer,
15 must be included in all copies of the Software, in whole or in part, and
16 all derivative works of the Software, unless such copies or derivative
17 works are solely in the form of machine-executable object code generated by
18 a source language processor.
19 
20 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
23 SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
24 FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
25 ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
26 DEALINGS IN THE SOFTWARE.
27 */
28 
29 /**
30  * JSON parser
31  *
32  * Copyright: Timur Gafarov 2018-2021.
33  * License: $(LINK2 boost.org/LICENSE_1_0.txt, Boost License 1.0).
34  * Authors: Timur Gafarov
35  */
36 module dlib.serialization.json;
37 
38 import std.stdio;
39 import std.conv;
40 import std..string;
41 import std.ascii;
42 
43 import dlib.core.memory;
44 import dlib.core.compound;
45 import dlib.container.array;
46 import dlib.container.dict;
47 import dlib.text.utils;
48 import dlib.text.utf8;
49 import dlib.text.lexer;
50 import dlib.text.str;
51 
52 class JSONLexer
53 {
54     Lexer lexer;
55     string text;
56     string currentLexeme = "";
57     String internalString;
58     UTF8Encoder encoder;
59 
60     enum delimiters = [
61         "{", "}", "[", "]", ",", ":", "\n", " ", "\"", "\'", "`",
62         "\\a", "\\b", "\\f", "\\n", "\\r", "\\t", "\\v", "\\\"", "\\\'", "\\\\", "\\?",
63         "\\u"
64     ];
65 
66     this(string text)
67     {
68         this.text = text;
69         internalString.reserve(text.length);
70         lexer = New!Lexer(this.text, delimiters);
71         nextLexeme();
72     }
73 
74     ~this()
75     {
76         Delete(lexer);
77         internalString.free();
78     }
79 
80     void nextLexeme()
81     {
82         string lexeme;
83         while (true)
84         {
85             lexeme = lexer.getLexeme();
86             
87             if (lexeme.length == 0)
88             {
89                 internalString ~= lexeme;
90                 currentLexeme = cast(string)(internalString.data[$-1-lexeme.length..$-1]);
91                 return;
92             }
93             else if (lexeme != "\n" && !isWhitespace(lexeme))
94             {
95                 if (lexeme == "\"" || lexeme == "\'" || lexeme == "`")
96                 {
97                     string quote = lexeme;
98                     size_t startPos = internalString.length;
99                     internalString ~= lexeme;
100                     size_t endPos = startPos;
101                     while (lexeme.length)
102                     {
103                         lexeme = lexer.getLexeme();
104 
105                         if (lexeme == "\\a") internalString ~= "\a";
106                         else if (lexeme == "\\b") internalString ~= "\b";
107                         else if (lexeme == "\\f") internalString ~= "\f";
108                         else if (lexeme == "\\n") internalString ~= "\n";
109                         else if (lexeme == "\\r") internalString ~= "\r";
110                         else if (lexeme == "\\t") internalString ~= "\t";
111                         else if (lexeme == "\\v") internalString ~= "\v";
112                         else if (lexeme == "\\\"") internalString ~= "\"";
113                         else if (lexeme == "\\\'") internalString ~= "\'";
114                         else if (lexeme == "\\\\") internalString ~= "\\";
115                         else if (lexeme == "\\?") internalString ~= "\?";
116                         else if (lexeme == "\\u")
117                         {
118                             lexeme = lexer.getLexeme();
119                             char[4] buffer;
120                             auto num = hexToUTF8(lexeme, buffer);
121                             internalString ~= cast(string)(buffer[0..num]);
122                         }
123                         else internalString ~= lexeme;
124 
125                         endPos = internalString.length;
126                         if (lexeme == quote)
127                             break;
128                     }
129                     currentLexeme = cast(string)(internalString.data[startPos..endPos]);
130                     return;
131                 }
132                 else
133                 {
134                     internalString ~= lexeme;
135                     currentLexeme = cast(string)(internalString.data[$-1-lexeme.length..$-1]);
136                     return;
137                 }
138             }
139         }
140     }
141 
142     bool isWhitespace(string lexeme)
143     {
144         return isWhite(lexeme[0]);
145     }
146 
147     size_t hexToUTF8(string input, ref char[4] buffer)
148     {
149         uint codepoint = '\u0000';
150 
151         // TODO: invalid codepoint should be an error
152         if (input.length >= 4)
153         {
154             string hex = input[0..4];
155             codepoint = std.conv.parse!(uint, string)(hex, 16);
156         }
157 
158         return encoder.encode(codepoint, buffer);
159     }
160 }
161 
162 /// JSON types enum
163 enum JSONType
164 {
165     Null,
166     Number,
167     String,
168     Array,
169     Object,
170     Boolean
171 }
172 
173 /// JSON array
174 alias JSONArray = Array!JSONValue;
175 
176 /// JSON object
177 alias JSONObject = Dict!(JSONValue, string);
178 
179 /// JSON value
180 class JSONValue
181 {
182     JSONType type;
183     double asNumber;
184     string asString;
185     JSONArray asArray;
186     JSONObject asObject;
187     bool asBoolean;
188 
189     this()
190     {
191         asNumber = 0.0;
192         asString = "";
193         asObject = null;
194         asBoolean = false;
195     }
196 
197     void addArrayElement(JSONValue element)
198     {
199         type = JSONType.Array;
200         asArray.append(element);
201     }
202 
203     void addField(string name, JSONValue element)
204     {
205         if (asObject is null)
206             asObject = New!JSONObject();
207         type = JSONType.Object;
208         asObject[name] = element;
209     }
210 
211     ~this()
212     {
213         if (asArray.length)
214         {
215             foreach(i, e; asArray.data)
216                 Delete(e);
217             asArray.free();
218         }
219 
220         if (asObject)
221         {
222             foreach(name, e; asObject)
223                 Delete(e);
224             Delete(asObject);
225         }
226     }
227 }
228 
229 /// JSON parsing result
230 alias JSONResult = Compound!(bool, string);
231 
232 /// JSON parsing errors enum
233 enum JSONError
234 {
235     EOI = JSONResult(false, "unexpected end of input")
236 }
237 
238 /// JSON document
239 class JSONDocument
240 {
241     public:
242     bool isValid;
243     JSONValue root;
244 
245     this(string input)
246     {
247         root = New!JSONValue();
248         root.type = JSONType.Object;
249         lexer = New!JSONLexer(input);
250         JSONResult res = parseValue(root);
251         isValid = res[0];
252         if (!isValid)
253             writeln(res[1]);
254     }
255 
256     ~this()
257     {
258         Delete(root);
259         Delete(lexer);
260     }
261 
262     protected:
263 
264     JSONLexer lexer;
265     string currentLexeme() @property
266     {
267         return lexer.currentLexeme;
268     }
269 
270     void nextLexeme()
271     {
272         lexer.nextLexeme();
273     }
274 
275     JSONResult parseValue(JSONValue value)
276     {
277         if (!currentLexeme.length)
278             return JSONError.EOI;
279 
280         if (currentLexeme == "{")
281         {
282             nextLexeme();
283             while (currentLexeme.length && currentLexeme != "}")
284             {
285                 string identifier = currentLexeme;
286                 if (!identifier.length)
287                     return JSONError.EOI;
288                 if (identifier[0] != '\"' || identifier[$-1] != '\"')
289                     return JSONResult(false, format("illegal identifier \"%s\"", identifier));
290                 identifier = identifier[1..$-1];
291 
292                 nextLexeme();
293                 if (currentLexeme != ":")
294                     return JSONResult(false, format("\":\" expected, got \"%s\"", currentLexeme));
295 
296                 nextLexeme();
297                 JSONValue newValue = New!JSONValue();
298                 JSONResult res = parseValue(newValue);
299                 if (!res[0])
300                     return res;
301 
302                 value.addField(identifier, newValue);
303 
304                 nextLexeme();
305 
306                 if (currentLexeme == ",")
307                     nextLexeme();
308                 else if (currentLexeme != "}")
309                     return JSONResult(false, format("\"}\" expected, got \"%s\"", currentLexeme));
310             }
311         }
312         else if (currentLexeme == "[")
313         {
314             nextLexeme();
315             while (currentLexeme.length && currentLexeme != "]")
316             {
317                 JSONValue newValue = New!JSONValue();
318                 JSONResult res = parseValue(newValue);
319                 if (!res[0])
320                     return res;
321 
322                 value.addArrayElement(newValue);
323 
324                 nextLexeme();
325 
326                 if (currentLexeme == ",")
327                     nextLexeme();
328                 else if (currentLexeme != "]")
329                     return JSONResult(false, format("\"}\" expected, got \"%s\"", currentLexeme));
330             }
331         }
332         else
333         {
334             string data = currentLexeme;
335             if (data[0] == '\"')
336             {
337                 if (data[$-1] != '\"')
338                     return JSONResult(false, format("illegal string \"%s\"", data));
339                 data = data[1..$-1];
340                 value.type = JSONType.String;
341                 value.asString = data;
342             }
343             else if (data == "true" || data == "false")
344             {
345                 value.type = JSONType.Boolean;
346                 value.asBoolean = data.to!bool;
347             }
348             else
349             {
350                 value.type = JSONType.Number;
351                 value.asNumber = data.to!double;
352             }
353         }
354 
355         return JSONResult(true, "");
356     }
357 }
358 
359 //
360 unittest
361 {
362     string input = "
363     {
364         \"foo\": \"bar\",
365         \"test\": 100,
366         \"bool\": true,
367         \"arr\": [0, 1, 2]
368     }
369     ";
370     
371     JSONDocument doc = New!JSONDocument(input);
372     assert(doc.root.asObject["foo"].asString == "bar");
373     assert(doc.root.asObject["test"].asNumber == 100);
374     assert(doc.root.asObject["bool"].asBoolean == true);
375     assert(doc.root.asObject["arr"].asArray[1].asNumber == 1);
376     Delete(doc);
377 }