You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucy.apache.org by ma...@apache.org on 2011/08/12 05:31:04 UTC
[lucy-commits] svn commit: r1156951 - in /incubator/lucy/trunk: ./ clownfish/src/
core/Lucy/Test/Util/ core/Lucy/Util/ core/Lucy/Util/Json/ perl/
perl/buildlib/Lucy/ perl/lib/ perl/xs/Lucy/Util/
Author: marvin
Date: Fri Aug 12 03:31:03 2011
New Revision: 1156951
URL: http://svn.apache.org/viewvc?rev=1156951&view=rev
Log:
LUCY-133 Replace JSON::XS with Lemon-powered parser.
Eliminate Lucy's runtime dependency on the CPAN module JSON::XS with a new
parser powered by the Lemon parser generator which operates directly on
Clownfish data structures.
Added:
incubator/lucy/trunk/core/Lucy/Util/Json/
incubator/lucy/trunk/core/Lucy/Util/Json.c
- copied, changed from r1154651, incubator/lucy/trunk/perl/xs/Lucy/Util/Json.c
incubator/lucy/trunk/core/Lucy/Util/Json/JsonParser.y
Removed:
incubator/lucy/trunk/perl/xs/Lucy/Util/Json.c
Modified:
incubator/lucy/trunk/STATUS
incubator/lucy/trunk/clownfish/src/CFCBindAliases.c
incubator/lucy/trunk/core/Lucy/Test/Util/TestJson.c
incubator/lucy/trunk/core/Lucy/Util/Json.cfh
incubator/lucy/trunk/perl/Build.PL
incubator/lucy/trunk/perl/MANIFEST
incubator/lucy/trunk/perl/buildlib/Lucy/Build.pm
incubator/lucy/trunk/perl/lib/Lucy.pm
Modified: incubator/lucy/trunk/STATUS
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/STATUS?rev=1156951&r1=1156950&r2=1156951&view=diff
==============================================================================
--- incubator/lucy/trunk/STATUS (original)
+++ incubator/lucy/trunk/STATUS Fri Aug 12 03:31:03 2011
@@ -29,9 +29,6 @@ TODO LIST:
<https://issues.apache.org/jira/browse/LUCY-143>
<https://issues.apache.org/jira/browse/LUCY-134>
- * Replace dependency on JSON::XS with ???
- <https://issues.apache.org/jira/browse/LUCY-133>
-
* Refactor away C89 idioms, since we have chosen the intersection of C99
and C++ as our C dialect.
<https://issues.apache.org/jira/browse/LUCY-144>
Modified: incubator/lucy/trunk/clownfish/src/CFCBindAliases.c
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/clownfish/src/CFCBindAliases.c?rev=1156951&r1=1156950&r2=1156951&view=diff
==============================================================================
--- incubator/lucy/trunk/clownfish/src/CFCBindAliases.c (original)
+++ incubator/lucy/trunk/clownfish/src/CFCBindAliases.c Fri Aug 12 03:31:03 2011
@@ -91,6 +91,7 @@ struct alias aliases[] = {
{"Cfish_VA_Get_Size", "Lucy_VA_Get_Size"},
{"Cfish_VA_Resize", "Lucy_VA_Resize"},
{"Cfish_VA_Store", "Lucy_VA_Store"},
+ {"Cfish_VA_Push", "Lucy_VA_Push"},
{"cfish_VTable", "lucy_VTable"},
{"CFISH_VTABLE", "LUCY_VTABLE"},
Modified: incubator/lucy/trunk/core/Lucy/Test/Util/TestJson.c
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/core/Lucy/Test/Util/TestJson.c?rev=1156951&r1=1156950&r2=1156951&view=diff
==============================================================================
--- incubator/lucy/trunk/core/Lucy/Test/Util/TestJson.c (original)
+++ incubator/lucy/trunk/core/Lucy/Test/Util/TestJson.c Fri Aug 12 03:31:03 2011
@@ -269,9 +269,85 @@ test_syntax_errors(TestBatch *batch) {
S_verify_bad_syntax(batch, "\"\\uAAAZ\"", "invalid \\u escape");
}
+static void
+S_round_trip_integer(TestBatch *batch, int64_t value) {
+ Integer64 *num = Int64_new(value);
+ VArray *array = VA_new(1);
+ VA_Store(array, 0, (Obj*)num);
+ CharBuf *json = Json_to_json((Obj*)array);
+ Obj *dump = Json_from_json(json);
+ TEST_TRUE(batch, VA_Equals(array, dump), "Round trip integer %ld",
+ (long)value);
+ DECREF(dump);
+ DECREF(json);
+ DECREF(array);
+}
+
+static void
+test_integers(TestBatch *batch) {
+ S_round_trip_integer(batch, 0);
+ S_round_trip_integer(batch, -1);
+ S_round_trip_integer(batch, -1000000);
+ S_round_trip_integer(batch, 1000000);
+}
+
+static void
+S_round_trip_float(TestBatch *batch, double value, double max_diff) {
+ Float64 *num = Float64_new(value);
+ VArray *array = VA_new(1);
+ VA_Store(array, 0, (Obj*)num);
+ CharBuf *json = Json_to_json((Obj*)array);
+ Obj *dump = CERTIFY(Json_from_json(json), VARRAY);
+ Float64 *got = (Float64*)CERTIFY(VA_Fetch((VArray*)dump, 0), FLOAT64);
+ double diff = Float64_Get_Value(num) - Float64_Get_Value(got);
+ if (diff < 0) { diff = 0 - diff; }
+ TEST_TRUE(batch, diff <= max_diff, "Round trip float %f", value);
+ DECREF(dump);
+ DECREF(json);
+ DECREF(array);
+}
+
+static void
+test_floats(TestBatch *batch) {
+ S_round_trip_float(batch, 0.0, 0.0);
+ S_round_trip_float(batch, 0.1, 0.00001);
+ S_round_trip_float(batch, -0.1, 0.00001);
+ S_round_trip_float(batch, 1000000.5, 1.0);
+ S_round_trip_float(batch, -1000000.5, 1.0);
+}
+
+static void
+test_max_depth(TestBatch *batch) {
+ Hash *circular = Hash_new(0);
+ Hash_Store_Str(circular, "circular", 8, INCREF(circular));
+ Err_set_error(NULL);
+ CharBuf *not_json = Json_to_json((Obj*)circular);
+ TEST_TRUE(batch, not_json == NULL,
+ "to_json returns NULL when fed recursing data");
+ TEST_TRUE(batch, Err_get_error() != NULL,
+ "to_json sets Err_error when fed recursing data");
+ DECREF(Hash_Delete_Str(circular, "circular", 8));
+ DECREF(circular);
+}
+
+static void
+test_illegal_keys(TestBatch *batch) {
+ Hash *hash = Hash_new(0);
+ Float64 *key = Float64_new(1.1);
+ Hash_Store(hash, (Obj*)key, (Obj*)CB_newf("blah"));
+ Err_set_error(NULL);
+ CharBuf *not_json = Json_to_json((Obj*)hash);
+ TEST_TRUE(batch, not_json == NULL,
+ "to_json returns NULL when fed an illegal key");
+ TEST_TRUE(batch, Err_get_error() != NULL,
+ "to_json sets Err_error when fed an illegal key");
+ DECREF(key);
+ DECREF(hash);
+}
+
void
TestJson_run_tests() {
- int num_tests = 94;
+ int num_tests = 107;
#ifndef LUCY_VALGRIND
num_tests += 28; // FIXME: syntax errors leak memory.
#endif
@@ -286,6 +362,10 @@ TestJson_run_tests() {
test_escapes(batch);
test_numbers(batch);
test_spew_and_slurp(batch);
+ test_integers(batch);
+ test_floats(batch);
+ test_max_depth(batch);
+ test_illegal_keys(batch);
#ifndef LUCY_VALGRIND
test_syntax_errors(batch);
Copied: incubator/lucy/trunk/core/Lucy/Util/Json.c (from r1154651, incubator/lucy/trunk/perl/xs/Lucy/Util/Json.c)
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/core/Lucy/Util/Json.c?p2=incubator/lucy/trunk/core/Lucy/Util/Json.c&p1=incubator/lucy/trunk/perl/xs/Lucy/Util/Json.c&r1=1154651&r2=1156951&rev=1156951&view=diff
==============================================================================
--- incubator/lucy/trunk/perl/xs/Lucy/Util/Json.c (original)
+++ incubator/lucy/trunk/core/Lucy/Util/Json.c Fri Aug 12 03:31:03 2011
@@ -14,46 +14,677 @@
* limitations under the License.
*/
+#include <ctype.h>
+#include <stdio.h>
+
#include "Lucy/Util/ToolSet.h"
#include "Lucy/Util/Json.h"
#include "Lucy/Object/Host.h"
#include "Lucy/Store/Folder.h"
+#include "Lucy/Store/InStream.h"
+#include "Lucy/Store/OutStream.h"
+#include "Lucy/Util/Memory.h"
+#include "Lucy/Util/Json/JsonParser.h"
-bool_t
-Json_spew_json(Obj *dump, Folder *folder, const CharBuf *path) {
- bool_t result = (bool_t)Host_callback_i64(JSON, "spew_json", 3,
- ARG_OBJ("dump", dump),
- ARG_OBJ("folder", folder),
- ARG_STR("path", path));
- if (!result) { ERR_ADD_FRAME(Err_get_error()); }
- return result;
+/* Routines generated by Lemon. */
+void*
+LucyParseJsonAlloc(void * (*allocate)(size_t));
+void
+LucyParseJson(void *json_parser, int token_type, lucy_Obj *value,
+ lucy_JsonParserState *state);
+void
+LucyParseJsonFree(void *json_parser, void(*freemem)(void*));
+void
+LucyParseJsonTrace(FILE *trace, char *line_prefix);
+
+// Encode JSON for supplied "dump". On failure, sets Err_error and returns
+// false.
+static bool_t
+S_to_json(Obj *dump, CharBuf *json, int32_t depth);
+
+// Parse JSON from raw UTF-8 in memory.
+static Obj*
+S_parse_json(char *text, size_t size);
+static Obj*
+S_do_parse_json(void *json_parser, char *json, size_t len);
+
+// Parse a JSON number. Advance the text buffer just past the number.
+static Float64*
+S_parse_number(char **json_ptr, char *const limit);
+
+// Parse a JSON string. Advance the text buffer from pointing at the opening
+// double quote to pointing just after the closing double quote.
+static CharBuf*
+S_parse_string(char **json_ptr, char *const limit);
+
+// Unescape JSON string text. Expects pointers bookending the text data (i.e.
+// pointing just after the opening double quote and directly at the closing
+// double quote), and assumes that escapes have already been sanity checked
+// for length.
+static CharBuf*
+S_unescape_text(char *const top, char *const end);
+
+// Check that the supplied text begins with the specified keyword, which must
+// then end on a word boundary (i.e. match "null" but not the first four
+// letters of "nullify").
+static INLINE bool_t
+SI_check_keyword(char *json, char* end, const char *keyword, size_t len);
+
+// Make it possible to be loosen constraints during testing.
+static bool_t tolerant = false;
+
+// Indentation: two spaces per level.
+static const char indentation[] = " ";
+static const size_t INDENTATION_LEN = sizeof(indentation) - 1;
+
+// Append indentation spaces x depth.
+static void
+S_cat_whitespace(CharBuf *json, int32_t depth);
+
+// Set Err_error, appending escaped JSON in the vicinity of the error.
+static void
+S_set_error(CharBuf *mess, char *json, char *limit, int line,
+ const char *func);
+#define SET_ERROR(_mess, _json, _end) \
+ S_set_error(_mess, _json, _end, __LINE__, CFISH_ERR_FUNC_MACRO)
+
+Obj*
+Json_from_json(CharBuf *json) {
+ Obj *dump = S_parse_json((char*)CB_Get_Ptr8(json), CB_Get_Size(json));
+ if (!dump) {
+ ERR_ADD_FRAME(Err_get_error());
+ }
+ return dump;
}
Obj*
Json_slurp_json(Folder *folder, const CharBuf *path) {
- Obj *dump = Host_callback_obj(JSON, "slurp_json", 2,
- ARG_OBJ("folder", folder),
- ARG_STR("path", path));
- if (!dump) { ERR_ADD_FRAME(Err_get_error()); }
+ InStream *instream = Folder_Open_In(folder, path);
+ if (!instream) {
+ ERR_ADD_FRAME(Err_get_error());
+ return NULL;
+ }
+ size_t len = (size_t)InStream_Length(instream);
+ char *buf = InStream_Buf(instream, len);
+ Obj *dump = S_parse_json(buf, len);
+ InStream_Close(instream);
+ DECREF(instream);
+ if (!dump) {
+ ERR_ADD_FRAME(Err_get_error());
+ }
return dump;
}
+bool_t
+Json_spew_json(Obj *dump, Folder *folder, const CharBuf *path) {
+ CharBuf *json = Json_to_json(dump);
+ if (!json) {
+ ERR_ADD_FRAME(Err_get_error());
+ return false;
+ }
+ OutStream *outstream = Folder_Open_Out(folder, path);
+ if (!outstream) {
+ ERR_ADD_FRAME(Err_get_error());
+ DECREF(json);
+ return false;
+ }
+ size_t size = CB_Get_Size(json);
+ OutStream_Write_Bytes(outstream, CB_Get_Ptr8(json), size);
+ OutStream_Close(outstream);
+ DECREF(outstream);
+ DECREF(json);
+ return true;
+}
+
CharBuf*
Json_to_json(Obj *dump) {
- return Host_callback_str(JSON, "to_json", 1,
- ARG_OBJ("dump", dump));
-}
+ // Validate object type, only allowing hashes and arrays per JSON spec.
+ if (!dump || !(Obj_Is_A(dump, HASH) || Obj_Is_A(dump, VARRAY))) {
+ if (!tolerant) {
+ CharBuf *class_name = dump ? Obj_Get_Class_Name(dump) : NULL;
+ CharBuf *mess = MAKE_MESS("Illegal top-level object type: %o",
+ class_name);
+ Err_set_error(Err_new(mess));
+ return NULL;
+ }
+ }
-Obj*
-Json_from_json(CharBuf *json) {
- return Host_callback_obj(JSON, "from_json", 1,
- ARG_STR("json", json));
+ // Encode.
+ CharBuf *json = CB_new(31);
+ if (!S_to_json(dump, json, 0)) {
+ DECREF(json);
+ ERR_ADD_FRAME(Err_get_error());
+ json = NULL;
+ }
+ else {
+ // Append newline.
+ CB_Cat_Trusted_Str(json, "\n", 1);
+ }
+
+ return json;
}
void
-Json_set_tolerant(bool_t tolerant) {
- Host_callback(JSON, "set_tolerant", 1,
- ARG_I32("tolerant", tolerant));
+Json_set_tolerant(bool_t tolerance) {
+ tolerant = tolerance;
+}
+
+static const int32_t MAX_DEPTH = 200;
+
+static void
+S_append_json_string(Obj *dump, CharBuf *json) {
+ // Append opening quote.
+ CB_Cat_Trusted_Str(json, "\"", 1);
+
+ // Process string data.
+ ZombieCharBuf *iterator = ZCB_WRAP((CharBuf*)dump);
+ while (ZCB_Get_Size(iterator)) {
+ uint32_t code_point = ZCB_Nip_One(iterator);
+ if (code_point > 127) {
+ // There is no need to escape any high characters, including those
+ // above the BMP, as we assume that the destination channel can
+ // handle arbitrary UTF-8 data.
+ CB_Cat_Char(json, code_point);
+ }
+ else {
+ char buffer[7];
+ size_t len;
+ switch (code_point & 127) {
+ // Perform all mandatory escapes enumerated in the JSON spec.
+ // Note that the spec makes escaping forward slash optional;
+ // we choose not to.
+ case 0x00: case 0x01: case 0x02: case 0x03:
+ case 0x04: case 0x05: case 0x06: case 0x07:
+ case 0x0b: case 0x0e: case 0x0f:
+ case 0x10: case 0x11: case 0x12: case 0x13:
+ case 0x14: case 0x15: case 0x16: case 0x17:
+ case 0x18: case 0x19: case 0x1a: case 0x1b:
+ case 0x1c: case 0x1d: case 0x1e: case 0x1f: {
+ sprintf(buffer, "\\u%04x", (unsigned)code_point);
+ len = 6;
+ break;
+ }
+ case '\b':
+ memcpy(buffer, "\\b", 2);
+ len = 2;
+ break;
+ case '\t':
+ memcpy(buffer, "\\t", 2);
+ len = 2;
+ break;
+ case '\n':
+ memcpy(buffer, "\\n", 2);
+ len = 2;
+ break;
+ case '\f':
+ memcpy(buffer, "\\f", 2);
+ len = 2;
+ break;
+ case '\r':
+ memcpy(buffer, "\\r", 2);
+ len = 2;
+ break;
+ case '\\':
+ memcpy(buffer, "\\\\", 2);
+ len = 2;
+ break;
+ case '\"':
+ memcpy(buffer, "\\\"", 2);
+ len = 2;
+ break;
+
+ // Ordinary printable ASCII.
+ default:
+ buffer[0] = (char)code_point;
+ len = 1;
+ }
+ CB_Cat_Trusted_Str(json, buffer, len);
+ }
+ }
+
+ // Append closing quote.
+ CB_Cat_Trusted_Str(json, "\"", 1);
+}
+
+static void
+S_cat_whitespace(CharBuf *json, int32_t depth) {
+ while (depth--) {
+ CB_Cat_Trusted_Str(json, indentation, INDENTATION_LEN);
+ }
+}
+
+static bool_t
+S_to_json(Obj *dump, CharBuf *json, int32_t depth) {
+ // Guard against infinite recursion in self-referencing data structures.
+ if (depth > MAX_DEPTH) {
+ CharBuf *mess = MAKE_MESS("Exceeded max depth of %i32", MAX_DEPTH);
+ Err_set_error(Err_new(mess));
+ return false;
+ }
+
+ if (!dump) {
+ CB_Cat_Trusted_Str(json, "null", 4);
+ }
+ else if (dump == (Obj*)CFISH_TRUE) {
+ CB_Cat_Trusted_Str(json, "true", 4);
+ }
+ else if (dump == (Obj*)CFISH_FALSE) {
+ CB_Cat_Trusted_Str(json, "false", 5);
+ }
+ else if (Obj_Is_A(dump, CHARBUF)) {
+ S_append_json_string(dump, json);
+ }
+ else if (Obj_Is_A(dump, INTNUM)) {
+ CB_catf(json, "%i64", Obj_To_I64(dump));
+ }
+ else if (Obj_Is_A(dump, FLOATNUM)) {
+ CB_catf(json, "%f64", Obj_To_F64(dump));
+ }
+ else if (Obj_Is_A(dump, VARRAY)) {
+ VArray *array = (VArray*)dump;
+ size_t size = VA_Get_Size(array);
+ if (size == 0) {
+ // Put empty array on single line.
+ CB_Cat_Trusted_Str(json, "[]", 2);
+ return true;
+ }
+ else if (size == 1) {
+ Obj *elem = VA_Fetch(array, 0);
+ if (!(Obj_Is_A(elem, HASH) || Obj_Is_A(elem, VARRAY))) {
+ // Put array containing single scalar element on one line.
+ CB_Cat_Trusted_Str(json, "[", 1);
+ if (!S_to_json(elem, json, depth + 1)) {
+ return false;
+ }
+ CB_Cat_Trusted_Str(json, "]", 1);
+ return true;
+ }
+ }
+ // Fall back to spreading elements across multiple lines.
+ CB_Cat_Trusted_Str(json, "[", 1);
+ for (size_t i = 0; i < size; i++) {
+ CB_Cat_Trusted_Str(json, "\n", 1);
+ S_cat_whitespace(json, depth + 1);
+ if (!S_to_json(VA_Fetch(array, i), json, depth + 1)) {
+ return false;
+ }
+ if (i + 1 < size) {
+ CB_Cat_Trusted_Str(json, ",", 1);
+ }
+ }
+ CB_Cat_Trusted_Str(json, "\n", 1);
+ S_cat_whitespace(json, depth);
+ CB_Cat_Trusted_Str(json, "]", 1);
+ }
+ else if (Obj_Is_A(dump, HASH)) {
+ Hash *hash = (Hash*)dump;
+ size_t size = Hash_Get_Size(hash);
+
+ // Put empty hash on single line.
+ if (size == 0) {
+ CB_Cat_Trusted_Str(json, "{}", 2);
+ return true;
+ }
+
+ // Validate that all keys are strings, then sort.
+ VArray *keys = Hash_Keys(hash);
+ for (size_t i = 0; i < size; i++) {
+ Obj *key = VA_Fetch(keys, i);
+ if (!key || !Obj_Is_A(key, CHARBUF)) {
+ DECREF(keys);
+ CharBuf *key_class = key ? Obj_Get_Class_Name(key) : NULL;
+ CharBuf *mess = MAKE_MESS("Illegal key type: %o", key_class);
+ Err_set_error(Err_new(mess));
+ return false;
+ }
+ }
+ VA_Sort(keys, NULL, NULL);
+
+ // Spread pairs across multiple lines.
+ CB_Cat_Trusted_Str(json, "{", 1);
+ for (size_t i = 0; i < size; i++) {
+ Obj *key = VA_Fetch(keys, i);
+ CB_Cat_Trusted_Str(json, "\n", 1);
+ S_cat_whitespace(json, depth + 1);
+ S_append_json_string(key, json);
+ CB_Cat_Trusted_Str(json, ": ", 2);
+ if (!S_to_json(Hash_Fetch(hash, key), json, depth + 1)) {
+ DECREF(keys);
+ return false;
+ }
+ if (i + 1 < size) {
+ CB_Cat_Trusted_Str(json, ",", 1);
+ }
+ }
+ CB_Cat_Trusted_Str(json, "\n", 1);
+ S_cat_whitespace(json, depth);
+ CB_Cat_Trusted_Str(json, "}", 1);
+
+ DECREF(keys);
+ }
+
+ return true;
+}
+
+static Obj*
+S_parse_json(char *text, size_t size) {
+ void *json_parser = LucyParseJsonAlloc(lucy_Memory_wrapped_malloc);
+ if (json_parser == NULL) {
+ CharBuf *mess = MAKE_MESS("Failed to allocate JSON parser");
+ Err_set_error(Err_new(mess));
+ return NULL;
+ }
+ Obj *dump = S_do_parse_json(json_parser, text, size);
+ LucyParseJsonFree(json_parser, lucy_Memory_wrapped_free);
+ return dump;
+}
+
+static Obj*
+S_do_parse_json(void *json_parser, char *json, size_t len) {
+ lucy_JsonParserState state;
+ state.result = NULL;
+ state.errors = false;
+
+ char *text = json;
+ char *const end = text + len;
+ while (text < end) {
+ int token_type = -1;
+ Obj *value = NULL;
+ char *const save = text;
+ switch (*text) {
+ case ' ': case '\n': case '\r': case '\t':
+ // Skip insignificant whitespace, which the JSON RFC defines
+ // as only four ASCII characters.
+ text++;
+ continue;
+ case '[':
+ token_type = LUCY_JSON_TOKENTYPE_LEFT_SQUARE_BRACKET;
+ text++;
+ break;
+ case ']':
+ token_type = LUCY_JSON_TOKENTYPE_RIGHT_SQUARE_BRACKET;
+ text++;
+ break;
+ case '{':
+ token_type = LUCY_JSON_TOKENTYPE_LEFT_CURLY_BRACKET;
+ text++;
+ break;
+ case '}':
+ token_type = LUCY_JSON_TOKENTYPE_RIGHT_CURLY_BRACKET;
+ text++;
+ break;
+ case ':':
+ token_type = LUCY_JSON_TOKENTYPE_COLON;
+ text++;
+ break;
+ case ',':
+ token_type = LUCY_JSON_TOKENTYPE_COMMA;
+ text++;
+ break;
+ case '"':
+ value = (Obj*)S_parse_string(&text, end);
+ if (value) {
+ token_type = LUCY_JSON_TOKENTYPE_STRING;
+ }
+ else {
+ // Clear out parser and return.
+ LucyParseJson(json_parser, 0, NULL, &state);
+ ERR_ADD_FRAME(Err_get_error());
+ return NULL;
+ }
+ break;
+ case 'n':
+ if (SI_check_keyword(text, end, "null", 4)) {
+ token_type = LUCY_JSON_TOKENTYPE_NULL;
+ text += 4;
+ }
+ break;
+ case 't':
+ if (SI_check_keyword(text, end, "true", 4)) {
+ token_type = LUCY_JSON_TOKENTYPE_TRUE;
+ value = (Obj*)CFISH_TRUE;
+ text += 4;
+ }
+ break;
+ case 'f':
+ if (SI_check_keyword(text, end, "false", 5)) {
+ token_type = LUCY_JSON_TOKENTYPE_FALSE;
+ value = (Obj*)CFISH_FALSE;
+ text += 5;
+ }
+ break;
+ case '0': case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7': case '8': case '9':
+ case '-': { // Note no '+', as JSON spec doesn't allow it.
+ value = (Obj*)S_parse_number(&text, end);
+ if (value) {
+ token_type = LUCY_JSON_TOKENTYPE_NUMBER;
+ }
+ else {
+ // Clear out parser and return.
+ LucyParseJson(json_parser, 0, NULL, &state);
+ ERR_ADD_FRAME(Err_get_error());
+ return NULL;
+ }
+ }
+ break;
+ }
+ LucyParseJson(json_parser, token_type, value, &state);
+ if (state.errors) {
+ SET_ERROR(CB_newf("JSON syntax error"), save, end);
+ return NULL;
+ }
+ }
+
+ // Finish up.
+ LucyParseJson(json_parser, 0, NULL, &state);
+ if (state.errors) {
+ SET_ERROR(CB_newf("JSON syntax error"), json, end);
+ return NULL;
+ }
+ return state.result;
+}
+
+static Float64*
+S_parse_number(char **json_ptr, char *const limit) {
+ char *top = *json_ptr;
+ char *end = top;
+ bool_t terminated = false;
+
+ // We can't assume NULL termination for the JSON string, so we need to
+ // ensure that strtod() cannot overrun and access invalid memory.
+ for (; end < limit; end++) {
+ switch (*end) {
+ // Only these characters may legally follow a number in
+ // Javascript. If we don't find one before the end of the JSON,
+ // it's a parse error.
+ case ' ': case '\n': case '\r': case '\t':
+ case ']':
+ case '}':
+ case ':':
+ case ',':
+ terminated = true;
+ break;
+ }
+ }
+
+ Float64 *result = NULL;
+ if (terminated) {
+ char *terminus;
+ double number = strtod(top, &terminus);
+ if (terminus != top) {
+ *json_ptr = terminus;
+ result = Float64_new(number);
+ }
+ }
+ if (!result) {
+ SET_ERROR(CB_newf("JSON syntax error"), top, limit);
+ }
+ return result;
+}
+
+static CharBuf*
+S_parse_string(char **json_ptr, char *const limit) {
+ // Find terminating double quote, determine whether there are any escapes.
+ char *top = *json_ptr + 1;
+ char *end = NULL;
+ bool_t saw_backslash = false;
+ for (char *text = top; text < limit; text++) {
+ if (*text == '"') {
+ end = text;
+ break;
+ }
+ else if (*text == '\\') {
+ saw_backslash = true;
+ if (text + 1 < limit && text[1] == 'u') {
+ text += 5;
+ }
+ else {
+ text += 1;
+ }
+ }
+ }
+ if (!end) {
+ SET_ERROR(CB_newf("Unterminated string"), *json_ptr, limit);
+ return NULL;
+ }
+
+ // Advance the text buffer to just beyond the closing quote.
+ *json_ptr = end + 1;
+
+ if (saw_backslash) {
+ return S_unescape_text(top, end);
+ }
+ else {
+ // Optimize common case where there are no escapes.
+ size_t len = end - top;
+ if (!StrHelp_utf8_valid(top, len)) {
+ CharBuf *mess = MAKE_MESS("Bad UTF-8 in JSON");
+ Err_set_error(Err_new(mess));
+ return NULL;
+ }
+ return CB_new_from_trusted_utf8(top, len);
+ }
+}
+
+static CharBuf*
+S_unescape_text(char *const top, char *const end) {
+ // The unescaped string will never be longer than the escaped string
+ // because only a \u escape can theoretically be too long and
+ // StrHelp_encode_utf8_char guards against sequences over 4 bytes.
+ // Therefore we can allocate once and not worry about reallocating.
+ size_t cap = end - top + 1;
+ char *target_buf = (char*)MALLOCATE(cap);
+ size_t target_size = 0;
+ for (char *text = top; text < end; text++) {
+ if (*text != '\\') {
+ target_buf[target_size++] = *text;
+ }
+ else {
+ // Process escape.
+ text++;
+ switch (*text) {
+ case '"':
+ target_buf[target_size++] = '"';
+ break;
+ case '\\':
+ target_buf[target_size++] = '\\';
+ break;
+ case '/':
+ target_buf[target_size++] = '/';
+ break;
+ case 'b':
+ target_buf[target_size++] = '\b';
+ break;
+ case 'f':
+ target_buf[target_size++] = '\f';
+ break;
+ case 'n':
+ target_buf[target_size++] = '\n';
+ break;
+ case 'r':
+ target_buf[target_size++] = '\r';
+ break;
+ case 't':
+ target_buf[target_size++] = '\t';
+ break;
+ case 'u': {
+ // Copy into a temp buffer because strtol will overrun
+ // into adjacent text data for e.g. "\uAAAA1".
+ char temp[5] = { 0, 0, 0, 0, 0 };
+ memcpy(temp, text + 1, 4);
+ text += 4;
+ char *num_end;
+ long code_point = strtol(temp, &num_end, 16);
+ char *temp_ptr = temp;
+ if (num_end != temp_ptr + 4 || code_point < 0) {
+ FREEMEM(target_buf);
+ SET_ERROR(CB_newf("Invalid \\u escape"), text - 5, end);
+ return NULL;
+ }
+ if (code_point >= 0xD800 && code_point <= 0xDFFF) {
+ FREEMEM(target_buf);
+ SET_ERROR(CB_newf("Surrogate pairs not supported"),
+ text - 5, end);
+ return NULL;
+ }
+ target_size += StrHelp_encode_utf8_char((uint32_t)code_point,
+ target_buf + target_size);
+ }
+ break;
+ default:
+ FREEMEM(target_buf);
+ SET_ERROR(CB_newf("Illegal escape"), text - 1, end);
+ return NULL;
+ }
+ }
+ }
+
+ // NULL-terminate, sanity check, then return the escaped string.
+ target_buf[target_size] = '\0';
+ if (!StrHelp_utf8_valid(target_buf, target_size)) {
+ FREEMEM(target_buf);
+ CharBuf *mess = MAKE_MESS("Bad UTF-8 in JSON");
+ Err_set_error(Err_new(mess));
+ return NULL;
+ }
+ return CB_new_steal_from_trusted_str(target_buf, target_size, cap);
+}
+
+static INLINE bool_t
+SI_check_keyword(char *json, char* end, const char *keyword, size_t len) {
+ if (end - json > len
+ && strncmp(json, keyword, len) == 0
+ && json[len] != '_'
+ && !isalnum(json[len])
+ ) {
+ return true;
+ }
+ return false;
+}
+
+static void
+S_set_error(CharBuf *mess, char *json, char *limit, int line,
+ const char *func) {
+ if (func) {
+ CB_catf(mess, " at %s %s line %i32 near ", func, __FILE__,
+ (int32_t)line);
+ }
+ else {
+ CB_catf(mess, " at %s line %i32 near ", __FILE__, (int32_t)line);
+ }
+
+ // Append escaped text.
+ int64_t len = limit - json;
+ if (len > 32) {
+ const char *end = StrHelp_back_utf8_char(json + 32, json);
+ len = end - json;
+ }
+ ZombieCharBuf *snippet = ZCB_WRAP_STR(json, len);
+ S_append_json_string((Obj*)snippet, mess);
+
+ // Set Err_error.
+ Err_set_error(Err_new(mess));
}
Modified: incubator/lucy/trunk/core/Lucy/Util/Json.cfh
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/core/Lucy/Util/Json.cfh?rev=1156951&r1=1156950&r2=1156951&view=diff
==============================================================================
--- incubator/lucy/trunk/core/Lucy/Util/Json.cfh (original)
+++ incubator/lucy/trunk/core/Lucy/Util/Json.cfh Fri Aug 12 03:31:03 2011
@@ -58,3 +58,14 @@ class Lucy::Util::Json inherits Lucy::Ob
}
+__C__
+
+struct lucy_JsonParserState
+{
+ lucy_Obj *result;
+ chy_bool_t errors;
+};
+typedef struct lucy_JsonParserState lucy_JsonParserState;
+
+__END_C__
+
Added: incubator/lucy/trunk/core/Lucy/Util/Json/JsonParser.y
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/core/Lucy/Util/Json/JsonParser.y?rev=1156951&view=auto
==============================================================================
--- incubator/lucy/trunk/core/Lucy/Util/Json/JsonParser.y (added)
+++ incubator/lucy/trunk/core/Lucy/Util/Json/JsonParser.y Fri Aug 12 03:31:03 2011
@@ -0,0 +1,162 @@
+%name LucyParseJson
+
+/* Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+%token_type { cfish_Obj* }
+%token_destructor { CFISH_DECREF($$); }
+%token_prefix LUCY_JSON_TOKENTYPE_
+
+%include {
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include "Lucy/Object/Hash.h"
+#include "Lucy/Object/VArray.h"
+#include "Lucy/Object/CharBuf.h"
+#include "Lucy/Object/Err.h"
+#include "Lucy/Util/Json.h"
+}
+
+%extra_argument { lucy_JsonParserState *state }
+
+%syntax_error {
+ state->errors = true;
+}
+
+result ::= top_level_value(A).
+{
+ state->result = A;
+}
+
+/* Allow any "value" as a top-level construct. This "loose", tolerant grammar
+ * makes testing somewhat easier. A strict JSON parser would only allow JSON
+ * Objects and Arrays at the top level.
+ */
+top_level_value(A) ::= value(B). { A = B; }
+
+/* Structural characters. */
+begin_array ::= LEFT_SQUARE_BRACKET.
+end_array ::= RIGHT_SQUARE_BRACKET.
+begin_object ::= LEFT_CURLY_BRACKET.
+end_object ::= RIGHT_CURLY_BRACKET.
+name_separator ::= COLON.
+value_separator ::= COMMA.
+
+/* Values */
+%type STRING { cfish_CharBuf* }
+
+value(A) ::= FALSE(B). { A = B; }
+value(A) ::= NULL(B). { A = B; }
+value(A) ::= TRUE(B). { A = B; }
+value(A) ::= object(B). { A = (cfish_Obj*)B; }
+value(A) ::= array(B). { A = (cfish_Obj*)B; }
+value(A) ::= NUMBER(B). { A = (cfish_Obj*)B; }
+value(A) ::= STRING(B). { A = B; }
+
+/* Javascript Objects, implemented as Clownfish Hashes. */
+%type object { cfish_Hash* }
+%type empty_object { cfish_Hash* }
+%type single_pair_object { cfish_Hash* }
+%type multi_pair_object { cfish_Hash* }
+%type key_value_pair_list { cfish_Hash* }
+%destructor object { CFISH_DECREF($$); }
+%destructor empty_object { CFISH_DECREF($$); }
+%destructor single_pair_object { CFISH_DECREF($$); }
+%destructor multi_pair_object { CFISH_DECREF($$); }
+%destructor key_value_pair_list { CFISH_DECREF($$); }
+
+object(A) ::= empty_object(B). { A = B; }
+object(A) ::= single_pair_object(B). { A = B; }
+object(A) ::= multi_pair_object(B). { A = B; }
+
+empty_object(A) ::= begin_object end_object.
+{
+ A = cfish_Hash_new(0);
+}
+
+single_pair_object(A) ::= begin_object STRING(B) name_separator value(C) end_object.
+{
+ A = cfish_Hash_new(1);
+ Cfish_Hash_Store(A, (cfish_Obj*)B, C);
+ CFISH_DECREF(B);
+}
+
+multi_pair_object(A) ::= begin_object key_value_pair_list(B) STRING(C) name_separator value(D) end_object.
+{
+ A = B;
+ Cfish_Hash_Store(A, (cfish_Obj*)C, D);
+ CFISH_DECREF(C);
+}
+
+key_value_pair_list(A) ::= key_value_pair_list(B) STRING(C) name_separator value(D) value_separator.
+{
+ A = B;
+ Cfish_Hash_Store(A, (cfish_Obj*)C, D);
+ CFISH_DECREF(C);
+}
+
+key_value_pair_list(A) ::= STRING(B) name_separator value(C) value_separator.
+{
+ A = cfish_Hash_new(0);
+ Cfish_Hash_Store(A, (cfish_Obj*)B, C);
+ CFISH_DECREF(B);
+}
+
+/* Arrays. */
+%type array { cfish_VArray* }
+%type empty_array { cfish_VArray* }
+%type single_elem_array { cfish_VArray* }
+%type multi_elem_array { cfish_VArray* }
+%type array_elem_list { cfish_VArray* }
+%destructor array { CFISH_DECREF($$); }
+%destructor single_elem_array { CFISH_DECREF($$); }
+%destructor multi_elem_array { CFISH_DECREF($$); }
+%destructor array_elem_list { CFISH_DECREF($$); }
+
+array(A) ::= empty_array(B). { A = B; }
+array(A) ::= single_elem_array(B). { A = B; }
+array(A) ::= multi_elem_array(B). { A = B; }
+
+empty_array(A) ::= begin_array end_array.
+{
+ A = cfish_VA_new(0);
+}
+
+single_elem_array(A) ::= begin_array value(B) end_array.
+{
+ A = cfish_VA_new(1);
+ Cfish_VA_Push(A, B);
+}
+
+multi_elem_array(A) ::= begin_array array_elem_list(B) value(C) end_array.
+{
+ A = B;
+ Cfish_VA_Push(A, C);
+}
+
+array_elem_list(A) ::= array_elem_list(B) value(C) value_separator.
+{
+ A = B;
+ Cfish_VA_Push(A, C);
+}
+
+array_elem_list(A) ::= value(B) value_separator.
+{
+ A = cfish_VA_new(1);
+ Cfish_VA_Push(A, B);
+}
+
Modified: incubator/lucy/trunk/perl/Build.PL
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/perl/Build.PL?rev=1156951&r1=1156950&r2=1156951&view=diff
==============================================================================
--- incubator/lucy/trunk/perl/Build.PL (original)
+++ incubator/lucy/trunk/perl/Build.PL Fri Aug 12 03:31:03 2011
@@ -24,11 +24,8 @@ my $builder = Lucy::Build->new(
license => 'apache',
dist_author =>
'The Apache Lucy Project <lucy-dev at incubator dot apache dot org>',
- dist_version => '0.2.0',
- requires => {
- 'JSON::XS' => 1.53,
- 'perl' => '5.8.3',
- },
+ dist_version => '0.2.0',
+ requires => { 'perl' => '5.8.3', },
build_requires => {
'Parse::RecDescent' => 1.94,
'Module::Build' => 0.280801,
Modified: incubator/lucy/trunk/perl/MANIFEST
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/perl/MANIFEST?rev=1156951&r1=1156950&r2=1156951&view=diff
==============================================================================
--- incubator/lucy/trunk/perl/MANIFEST (original)
+++ incubator/lucy/trunk/perl/MANIFEST Fri Aug 12 03:31:03 2011
@@ -404,7 +404,6 @@ xs/Lucy/Object/LockFreeRegistry.c
xs/Lucy/Object/Obj.c
xs/Lucy/Object/VTable.c
xs/Lucy/Store/FSFolder.c
-xs/Lucy/Util/Json.c
xs/Lucy/Util/StringHelper.c
xs/XSBind.c
xs/XSBind.h
Modified: incubator/lucy/trunk/perl/buildlib/Lucy/Build.pm
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/perl/buildlib/Lucy/Build.pm?rev=1156951&r1=1156950&r2=1156951&view=diff
==============================================================================
--- incubator/lucy/trunk/perl/buildlib/Lucy/Build.pm (original)
+++ incubator/lucy/trunk/perl/buildlib/Lucy/Build.pm Fri Aug 12 03:31:03 2011
@@ -82,7 +82,7 @@ sub extra_ccflags {
my $self = shift;
my $gcc_flags = '-std=gnu99 -D_GNU_SOURCE ';
if ( defined $ENV{LUCY_VALGRIND} ) {
- return "$gcc_flags -fno-inline-functions ";
+ return "$gcc_flags -DLUCY_VALGRIND -fno-inline-functions ";
}
elsif ( defined $ENV{LUCY_DEBUG} ) {
return "$gcc_flags -DLUCY_DEBUG -pedantic -Wall -Wextra "
@@ -476,10 +476,27 @@ sub ACTION_test_valgrind {
}
}
+# Run all .y files through lemon.
+sub ACTION_parsers {
+ my $self = shift;
+ $self->dispatch('lemon');
+ my $y_files = $self->rscan_dir( $CORE_SOURCE_DIR, qr/\.y$/ );
+ for my $y_file (@$y_files) {
+ my $c_file = $y_file;
+ my $h_file = $y_file;
+ $c_file =~ s/\.y$/.c/ or die "no match";
+ $h_file =~ s/\.y$/.h/ or die "no match";
+ next if $self->up_to_date( $y_file, [ $c_file, $h_file ] );
+ $self->add_to_cleanup( $c_file, $h_file );
+ system( $LEMON_EXE_PATH, '-q', $y_file ) and die "lemon failed";
+ }
+}
+
sub ACTION_compile_custom_xs {
my $self = shift;
$self->dispatch('ppport');
+ $self->dispatch('parsers');
require ExtUtils::ParseXS;
Modified: incubator/lucy/trunk/perl/lib/Lucy.pm
URL: http://svn.apache.org/viewvc/incubator/lucy/trunk/perl/lib/Lucy.pm?rev=1156951&r1=1156950&r2=1156951&view=diff
==============================================================================
--- incubator/lucy/trunk/perl/lib/Lucy.pm (original)
+++ incubator/lucy/trunk/perl/lib/Lucy.pm Fri Aug 12 03:31:03 2011
@@ -517,89 +517,6 @@ sub error {$Lucy::Object::Err::error}
}
{
- package Lucy::Util::Json;
- use Scalar::Util qw( blessed );
- use Lucy qw( to_clownfish );
- use Lucy::Util::StringHelper qw( utf8_valid utf8_flag_on );
- use JSON::XS qw();
-
- my $json_encoder = JSON::XS->new->pretty(1)->canonical(1);
-
- sub slurp_json {
- my ( undef, %args ) = @_;
- my $result;
- my $instream = $args{folder}->open_in( $args{path} )
- or return;
- my $len = $instream->length;
- my $json;
- $instream->read( $json, $len );
- if ( utf8_valid($json) ) {
- utf8_flag_on($json);
- $result = eval { to_clownfish( $json_encoder->decode($json) ) };
- }
- else {
- $@ = "Invalid UTF-8";
- }
- if ( $@ or !$result ) {
- Lucy::Object::Err->set_error(
- Lucy::Object::Err->new( $@ || "Failed to decode JSON" ) );
- return;
- }
- return $result;
- }
-
- sub spew_json {
- my ( undef, %args ) = @_;
- my $json = eval { $json_encoder->encode( $args{'dump'} ) };
- if ( !defined $json ) {
- Lucy::Object::Err->set_error( Lucy::Object::Err->new($@) );
- return 0;
- }
- my $outstream = $args{folder}->open_out( $args{path} );
- return 0 unless $outstream;
- eval {
- $outstream->print($json);
- $outstream->close;
- };
- if ($@) {
- my $error;
- if ( blessed($@) && $@->isa("Lucy::Object::Err") ) {
- $error = $@;
- }
- else {
- $error = Lucy::Object::Err->new($@);
- }
- Lucy::Object::Err->set_error($error);
- return 0;
- }
- return 1;
- }
-
- sub to_json {
- my ( undef, $dump ) = @_;
- my $json = eval { $json_encoder->encode($dump) };
- if ($@) {
- my $error = Lucy::Object::Err->new($@);
- Lucy::Object::Err->set_error($error);
- return;
- }
- return $json;
- }
-
- sub from_json {
- my $dump = eval { to_clownfish( $json_encoder->decode( $_[1] ) ) };
- if ($@) {
- my $error = Lucy::Object::Err->new($@);
- Lucy::Object::Err->set_error($error);
- return;
- }
- return $dump;
- }
-
- sub set_tolerant { $json_encoder->allow_nonref( $_[1] ) }
-}
-
-{
package Lucy::Object::Host;
BEGIN {
if ( !__PACKAGE__->isa('Lucy::Object::Obj') ) {