You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@couchdb.apache.org by fd...@apache.org on 2011/04/05 11:42:42 UTC

svn commit: r1088941 [2/3] - in /couchdb/trunk: ./ src/ src/couchdb/ src/ejson/ src/ejson/yajl/ test/etap/ utils/

Added: couchdb/trunk/src/ejson/yajl/yajl_buf.c
URL: http://svn.apache.org/viewvc/couchdb/trunk/src/ejson/yajl/yajl_buf.c?rev=1088941&view=auto
==============================================================================
--- couchdb/trunk/src/ejson/yajl/yajl_buf.c (added)
+++ couchdb/trunk/src/ejson/yajl/yajl_buf.c Tue Apr  5 09:42:41 2011
@@ -0,0 +1,119 @@
+/*
+ * Copyright 2010, Lloyd Hilaiel.
+ * 
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ * 
+ *  1. Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ * 
+ *  2. Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in
+ *     the documentation and/or other materials provided with the
+ *     distribution.
+ * 
+ *  3. Neither the name of Lloyd Hilaiel nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ * 
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */ 
+
+#include "yajl_buf.h"
+
+#include <assert.h>
+#include <stdlib.h>
+#include <string.h>
+
+#define YAJL_BUF_INIT_SIZE 2048
+
+struct yajl_buf_t {
+    unsigned int len;
+    unsigned int used;
+    unsigned char * data;
+    yajl_alloc_funcs * alloc;
+};
+
+static
+void yajl_buf_ensure_available(yajl_buf buf, unsigned int want)
+{
+    unsigned int need;
+    
+    assert(buf != NULL);
+
+    /* first call */
+    if (buf->data == NULL) {
+        buf->len = YAJL_BUF_INIT_SIZE;
+        buf->data = (unsigned char *) YA_MALLOC(buf->alloc, buf->len);
+        buf->data[0] = 0;
+    }
+
+    need = buf->len;
+
+    while (want >= (need - buf->used)) need <<= 1;
+
+    if (need != buf->len) {
+        buf->data = (unsigned char *) YA_REALLOC(buf->alloc, buf->data, need);
+        buf->len = need;
+    }
+}
+
+yajl_buf yajl_buf_alloc(yajl_alloc_funcs * alloc)
+{
+    yajl_buf b = YA_MALLOC(alloc, sizeof(struct yajl_buf_t));
+    memset((void *) b, 0, sizeof(struct yajl_buf_t));
+    b->alloc = alloc;
+    return b;
+}
+
+void yajl_buf_free(yajl_buf buf)
+{
+    assert(buf != NULL);
+    if (buf->data) YA_FREE(buf->alloc, buf->data);
+    YA_FREE(buf->alloc, buf);
+}
+
+void yajl_buf_append(yajl_buf buf, const void * data, unsigned int len)
+{
+    yajl_buf_ensure_available(buf, len);
+    if (len > 0) {
+        assert(data != NULL);
+        memcpy(buf->data + buf->used, data, len);
+        buf->used += len;
+        buf->data[buf->used] = 0;
+    }
+}
+
+void yajl_buf_clear(yajl_buf buf)
+{
+    buf->used = 0;
+    if (buf->data) buf->data[buf->used] = 0;
+}
+
+const unsigned char * yajl_buf_data(yajl_buf buf)
+{
+    return buf->data;
+}
+
+unsigned int yajl_buf_len(yajl_buf buf)
+{
+    return buf->used;
+}
+
+void
+yajl_buf_truncate(yajl_buf buf, unsigned int len)
+{
+    assert(len <= buf->used);
+    buf->used = len;
+}

Added: couchdb/trunk/src/ejson/yajl/yajl_buf.h
URL: http://svn.apache.org/viewvc/couchdb/trunk/src/ejson/yajl/yajl_buf.h?rev=1088941&view=auto
==============================================================================
--- couchdb/trunk/src/ejson/yajl/yajl_buf.h (added)
+++ couchdb/trunk/src/ejson/yajl/yajl_buf.h Tue Apr  5 09:42:41 2011
@@ -0,0 +1,73 @@
+/*
+ * Copyright 2010, Lloyd Hilaiel.
+ * 
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ * 
+ *  1. Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ * 
+ *  2. Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in
+ *     the documentation and/or other materials provided with the
+ *     distribution.
+ * 
+ *  3. Neither the name of Lloyd Hilaiel nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ * 
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */ 
+
+#ifndef __YAJL_BUF_H__
+#define __YAJL_BUF_H__
+
+#include "yajl_common.h"
+#include "yajl_alloc.h"
+
+/*
+ * Implementation/performance notes.  If this were moved to a header
+ * only implementation using #define's where possible we might be 
+ * able to sqeeze a little performance out of the guy by killing function
+ * call overhead.  YMMV.
+ */
+
+/**
+ * yajl_buf is a buffer with exponential growth.  the buffer ensures that
+ * you are always null padded.
+ */
+typedef struct yajl_buf_t * yajl_buf;
+
+/* allocate a new buffer */
+yajl_buf yajl_buf_alloc(yajl_alloc_funcs * alloc);
+
+/* free the buffer */
+void yajl_buf_free(yajl_buf buf);
+
+/* append a number of bytes to the buffer */
+void yajl_buf_append(yajl_buf buf, const void * data, unsigned int len);
+
+/* empty the buffer */
+void yajl_buf_clear(yajl_buf buf);
+
+/* get a pointer to the beginning of the buffer */
+const unsigned char * yajl_buf_data(yajl_buf buf);
+
+/* get the length of the buffer */
+unsigned int yajl_buf_len(yajl_buf buf);
+
+/* truncate the buffer */
+void yajl_buf_truncate(yajl_buf buf, unsigned int len);
+
+#endif

Added: couchdb/trunk/src/ejson/yajl/yajl_bytestack.h
URL: http://svn.apache.org/viewvc/couchdb/trunk/src/ejson/yajl/yajl_bytestack.h?rev=1088941&view=auto
==============================================================================
--- couchdb/trunk/src/ejson/yajl/yajl_bytestack.h (added)
+++ couchdb/trunk/src/ejson/yajl/yajl_bytestack.h Tue Apr  5 09:42:41 2011
@@ -0,0 +1,85 @@
+/*
+ * Copyright 2010, Lloyd Hilaiel.
+ * 
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ * 
+ *  1. Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ * 
+ *  2. Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in
+ *     the documentation and/or other materials provided with the
+ *     distribution.
+ * 
+ *  3. Neither the name of Lloyd Hilaiel nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ * 
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */ 
+
+/*
+ * A header only implementation of a simple stack of bytes, used in YAJL
+ * to maintain parse state.
+ */
+
+#ifndef __YAJL_BYTESTACK_H__
+#define __YAJL_BYTESTACK_H__
+
+#include "yajl_common.h"
+
+#define YAJL_BS_INC 128
+
+typedef struct yajl_bytestack_t
+{
+    unsigned char * stack;
+    unsigned int size;
+    unsigned int used;
+    yajl_alloc_funcs * yaf;
+} yajl_bytestack;
+
+/* initialize a bytestack */
+#define yajl_bs_init(obs, _yaf) {               \
+        (obs).stack = NULL;                     \
+        (obs).size = 0;                         \
+        (obs).used = 0;                         \
+        (obs).yaf = (_yaf);                     \
+    }                                           \
+
+
+/* initialize a bytestack */
+#define yajl_bs_free(obs)                 \
+    if ((obs).stack) (obs).yaf->free((obs).yaf->ctx, (obs).stack);   
+
+#define yajl_bs_current(obs)               \
+    (assert((obs).used > 0), (obs).stack[(obs).used - 1])
+
+#define yajl_bs_push(obs, byte) {                       \
+    if (((obs).size - (obs).used) == 0) {               \
+        (obs).size += YAJL_BS_INC;                      \
+        (obs).stack = (obs).yaf->realloc((obs).yaf->ctx,\
+                                         (void *) (obs).stack, (obs).size);\
+    }                                                   \
+    (obs).stack[((obs).used)++] = (byte);               \
+}
+    
+/* removes the top item of the stack, returns nothing */
+#define yajl_bs_pop(obs) { ((obs).used)--; }
+
+#define yajl_bs_set(obs, byte)                          \
+    (obs).stack[((obs).used) - 1] = (byte);             
+    
+
+#endif

Added: couchdb/trunk/src/ejson/yajl/yajl_common.h
URL: http://svn.apache.org/viewvc/couchdb/trunk/src/ejson/yajl/yajl_common.h?rev=1088941&view=auto
==============================================================================
--- couchdb/trunk/src/ejson/yajl/yajl_common.h (added)
+++ couchdb/trunk/src/ejson/yajl/yajl_common.h Tue Apr  5 09:42:41 2011
@@ -0,0 +1,85 @@
+/*
+ * Copyright 2010, Lloyd Hilaiel.
+ * 
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ * 
+ *  1. Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ * 
+ *  2. Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in
+ *     the documentation and/or other materials provided with the
+ *     distribution.
+ * 
+ *  3. Neither the name of Lloyd Hilaiel nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ * 
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */ 
+
+#ifndef __YAJL_COMMON_H__
+#define __YAJL_COMMON_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif    
+
+#define YAJL_MAX_DEPTH 128
+
+/* msft dll export gunk.  To build a DLL on windows, you
+ * must define WIN32, YAJL_SHARED, and YAJL_BUILD.  To use a shared
+ * DLL, you must define YAJL_SHARED and WIN32 */
+#if defined(WIN32) && defined(YAJL_SHARED)
+#  ifdef YAJL_BUILD
+#    define YAJL_API __declspec(dllexport)
+#  else
+#    define YAJL_API __declspec(dllimport)
+#  endif
+#else
+#  define YAJL_API
+#endif 
+
+/** pointer to a malloc function, supporting client overriding memory
+ *  allocation routines */
+typedef void * (*yajl_malloc_func)(void *ctx, unsigned int sz);
+
+/** pointer to a free function, supporting client overriding memory
+ *  allocation routines */
+typedef void (*yajl_free_func)(void *ctx, void * ptr);
+
+/** pointer to a realloc function which can resize an allocation. */
+typedef void * (*yajl_realloc_func)(void *ctx, void * ptr, unsigned int sz);
+
+/** A structure which can be passed to yajl_*_alloc routines to allow the
+ *  client to specify memory allocation functions to be used. */
+typedef struct
+{
+    /** pointer to a function that can allocate uninitialized memory */
+    yajl_malloc_func malloc;
+    /** pointer to a function that can resize memory allocations */
+    yajl_realloc_func realloc;
+    /** pointer to a function that can free memory allocated using
+     *  reallocFunction or mallocFunction */
+    yajl_free_func free;
+    /** a context pointer that will be passed to above allocation routines */
+    void * ctx;
+} yajl_alloc_funcs;
+
+#ifdef __cplusplus
+}
+#endif    
+
+#endif

Added: couchdb/trunk/src/ejson/yajl/yajl_encode.c
URL: http://svn.apache.org/viewvc/couchdb/trunk/src/ejson/yajl/yajl_encode.c?rev=1088941&view=auto
==============================================================================
--- couchdb/trunk/src/ejson/yajl/yajl_encode.c (added)
+++ couchdb/trunk/src/ejson/yajl/yajl_encode.c Tue Apr  5 09:42:41 2011
@@ -0,0 +1,188 @@
+/*
+ * Copyright 2010, Lloyd Hilaiel.
+ * 
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ * 
+ *  1. Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ * 
+ *  2. Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in
+ *     the documentation and/or other materials provided with the
+ *     distribution.
+ * 
+ *  3. Neither the name of Lloyd Hilaiel nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ * 
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */ 
+
+#include "yajl_encode.h"
+
+#include <assert.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+
+static void CharToHex(unsigned char c, char * hexBuf)
+{
+    const char * hexchar = "0123456789ABCDEF";
+    hexBuf[0] = hexchar[c >> 4];
+    hexBuf[1] = hexchar[c & 0x0F];
+}
+
+void
+yajl_string_encode(yajl_buf buf, const unsigned char * str,
+                   unsigned int len)
+{
+    yajl_string_encode2((const yajl_print_t) &yajl_buf_append, buf, str, len);
+}
+
+void
+yajl_string_encode2(const yajl_print_t print,
+                    void * ctx,
+                    const unsigned char * str,
+                    unsigned int len)
+{
+    unsigned int beg = 0;
+    unsigned int end = 0;    
+    char hexBuf[7];
+    hexBuf[0] = '\\'; hexBuf[1] = 'u'; hexBuf[2] = '0'; hexBuf[3] = '0';
+    hexBuf[6] = 0;
+
+    while (end < len) {
+        const char * escaped = NULL;
+        switch (str[end]) {
+            case '\r': escaped = "\\r"; break;
+            case '\n': escaped = "\\n"; break;
+            case '\\': escaped = "\\\\"; break;
+            /* case '/': escaped = "\\/"; break; */
+            case '"': escaped = "\\\""; break;
+            case '\f': escaped = "\\f"; break;
+            case '\b': escaped = "\\b"; break;
+            case '\t': escaped = "\\t"; break;
+            default:
+                if ((unsigned char) str[end] < 32) {
+                    CharToHex(str[end], hexBuf + 4);
+                    escaped = hexBuf;
+                }
+                break;
+        }
+        if (escaped != NULL) {
+            print(ctx, (const char *) (str + beg), end - beg);
+            print(ctx, escaped, strlen(escaped));
+            beg = ++end;
+        } else {
+            ++end;
+        }
+    }
+    print(ctx, (const char *) (str + beg), end - beg);
+}
+
+static void hexToDigit(unsigned int * val, const unsigned char * hex)
+{
+    unsigned int i;
+    for (i=0;i<4;i++) {
+        unsigned char c = hex[i];
+        if (c >= 'A') c = (c & ~0x20) - 7;
+        c -= '0';
+        assert(!(c & 0xF0));
+        *val = (*val << 4) | c;
+    }
+}
+
+static void Utf32toUtf8(unsigned int codepoint, char * utf8Buf) 
+{
+    if (codepoint < 0x80) {
+        utf8Buf[0] = (char) codepoint;
+        utf8Buf[1] = 0;
+    } else if (codepoint < 0x0800) {
+        utf8Buf[0] = (char) ((codepoint >> 6) | 0xC0);
+        utf8Buf[1] = (char) ((codepoint & 0x3F) | 0x80);
+        utf8Buf[2] = 0;
+    } else if (codepoint < 0x10000) {
+        utf8Buf[0] = (char) ((codepoint >> 12) | 0xE0);
+        utf8Buf[1] = (char) (((codepoint >> 6) & 0x3F) | 0x80);
+        utf8Buf[2] = (char) ((codepoint & 0x3F) | 0x80);
+        utf8Buf[3] = 0;
+    } else if (codepoint < 0x200000) {
+        utf8Buf[0] =(char)((codepoint >> 18) | 0xF0);
+        utf8Buf[1] =(char)(((codepoint >> 12) & 0x3F) | 0x80);
+        utf8Buf[2] =(char)(((codepoint >> 6) & 0x3F) | 0x80);
+        utf8Buf[3] =(char)((codepoint & 0x3F) | 0x80);
+        utf8Buf[4] = 0;
+    } else {
+        utf8Buf[0] = '?';
+        utf8Buf[1] = 0;
+    }
+}
+
+void yajl_string_decode(yajl_buf buf, const unsigned char * str,
+                        unsigned int len)
+{
+    unsigned int beg = 0;
+    unsigned int end = 0;    
+
+    while (end < len) {
+        if (str[end] == '\\') {
+            char utf8Buf[5];
+            const char * unescaped = "?";
+            yajl_buf_append(buf, str + beg, end - beg);
+            switch (str[++end]) {
+                case 'r': unescaped = "\r"; break;
+                case 'n': unescaped = "\n"; break;
+                case '\\': unescaped = "\\"; break;
+                case '/': unescaped = "/"; break;
+                case '"': unescaped = "\""; break;
+                case 'f': unescaped = "\f"; break;
+                case 'b': unescaped = "\b"; break;
+                case 't': unescaped = "\t"; break;
+                case 'u': {
+                    unsigned int codepoint = 0;
+                    hexToDigit(&codepoint, str + ++end);
+                    end+=3;
+                    /* check if this is a surrogate */
+                    if ((codepoint & 0xFC00) == 0xD800) {
+                        end++;
+                        if (str[end] == '\\' && str[end + 1] == 'u') {
+                            unsigned int surrogate = 0;
+                            hexToDigit(&surrogate, str + end + 2);
+                            codepoint =
+                                (((codepoint & 0x3F) << 10) | 
+                                 ((((codepoint >> 6) & 0xF) + 1) << 16) | 
+                                 (surrogate & 0x3FF));
+                            end += 5;
+                        } else {
+                            unescaped = "?";
+                            break;
+                        }
+                    }
+                    
+                    Utf32toUtf8(codepoint, utf8Buf);
+                    unescaped = utf8Buf;
+                    break;
+                }
+                default:
+                    assert("this should never happen" == NULL);
+            }
+            yajl_buf_append(buf, unescaped, strlen(unescaped));
+            beg = ++end;
+        } else {
+            end++;
+        }
+    }
+    yajl_buf_append(buf, str + beg, end - beg);
+}

Added: couchdb/trunk/src/ejson/yajl/yajl_encode.h
URL: http://svn.apache.org/viewvc/couchdb/trunk/src/ejson/yajl/yajl_encode.h?rev=1088941&view=auto
==============================================================================
--- couchdb/trunk/src/ejson/yajl/yajl_encode.h (added)
+++ couchdb/trunk/src/ejson/yajl/yajl_encode.h Tue Apr  5 09:42:41 2011
@@ -0,0 +1,50 @@
+/*
+ * Copyright 2010, Lloyd Hilaiel.
+ * 
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ * 
+ *  1. Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ * 
+ *  2. Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in
+ *     the documentation and/or other materials provided with the
+ *     distribution.
+ * 
+ *  3. Neither the name of Lloyd Hilaiel nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ * 
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */ 
+
+#ifndef __YAJL_ENCODE_H__
+#define __YAJL_ENCODE_H__
+
+#include "yajl_buf.h"
+#include "yajl_gen.h"
+
+void yajl_string_encode2(const yajl_print_t printer,
+                         void * ctx,
+                         const unsigned char * str,
+                         unsigned int length);
+
+void yajl_string_encode(yajl_buf buf, const unsigned char * str,
+                        unsigned int length);
+
+void yajl_string_decode(yajl_buf buf, const unsigned char * str,
+                        unsigned int length);
+
+#endif

Added: couchdb/trunk/src/ejson/yajl/yajl_gen.c
URL: http://svn.apache.org/viewvc/couchdb/trunk/src/ejson/yajl/yajl_gen.c?rev=1088941&view=auto
==============================================================================
--- couchdb/trunk/src/ejson/yajl/yajl_gen.c (added)
+++ couchdb/trunk/src/ejson/yajl/yajl_gen.c Tue Apr  5 09:42:41 2011
@@ -0,0 +1,322 @@
+/*
+ * Copyright 2010, Lloyd Hilaiel.
+ * 
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ * 
+ *  1. Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ * 
+ *  2. Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in
+ *     the documentation and/or other materials provided with the
+ *     distribution.
+ * 
+ *  3. Neither the name of Lloyd Hilaiel nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ * 
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */ 
+
+#include "yajl_gen.h"
+#include "yajl_buf.h"
+#include "yajl_encode.h"
+
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <math.h>
+
+typedef enum {
+    yajl_gen_start,
+    yajl_gen_map_start,
+    yajl_gen_map_key,
+    yajl_gen_map_val,
+    yajl_gen_array_start,
+    yajl_gen_in_array,
+    yajl_gen_complete,
+    yajl_gen_error
+} yajl_gen_state;
+
+struct yajl_gen_t 
+{
+    unsigned int depth;
+    unsigned int pretty;
+    const char * indentString;
+    yajl_gen_state state[YAJL_MAX_DEPTH];
+    yajl_print_t print;
+    void * ctx; /* yajl_buf */
+    /* memory allocation routines */
+    yajl_alloc_funcs alloc;
+};
+
+yajl_gen
+yajl_gen_alloc(const yajl_gen_config * config,
+               const yajl_alloc_funcs * afs)
+{
+    return yajl_gen_alloc2(NULL, config, afs, NULL);
+}
+
+yajl_gen
+yajl_gen_alloc2(const yajl_print_t callback,
+                const yajl_gen_config * config,
+                const yajl_alloc_funcs * afs,
+                void * ctx)
+{
+    yajl_gen g = NULL;
+    yajl_alloc_funcs afsBuffer;
+
+    /* first order of business is to set up memory allocation routines */
+    if (afs != NULL) {
+        if (afs->malloc == NULL || afs->realloc == NULL || afs->free == NULL)
+        {
+            return NULL;
+        }
+    } else {
+        yajl_set_default_alloc_funcs(&afsBuffer);
+        afs = &afsBuffer;
+    }
+
+    g = (yajl_gen) YA_MALLOC(afs, sizeof(struct yajl_gen_t));
+    memset((void *) g, 0, sizeof(struct yajl_gen_t));
+    /* copy in pointers to allocation routines */
+    memcpy((void *) &(g->alloc), (void *) afs, sizeof(yajl_alloc_funcs));
+
+    if (config) {
+        g->pretty = config->beautify;
+        g->indentString = config->indentString ? config->indentString : "  ";
+    }
+
+    if (callback) {
+        g->print = callback;
+        g->ctx = ctx;
+    } else {
+        g->print = (yajl_print_t)&yajl_buf_append;
+        g->ctx = yajl_buf_alloc(&(g->alloc));
+    }
+
+    return g;
+}
+
+void
+yajl_gen_free(yajl_gen g)
+{
+    if (g->print == (yajl_print_t)&yajl_buf_append) yajl_buf_free((yajl_buf)g->ctx);
+    YA_FREE(&(g->alloc), g);
+}
+
+#define INSERT_SEP \
+    if (g->state[g->depth] == yajl_gen_map_key ||               \
+        g->state[g->depth] == yajl_gen_in_array) {              \
+        g->print(g->ctx, ",", 1);                               \
+        if (g->pretty) g->print(g->ctx, "\n", 1);               \
+    } else if (g->state[g->depth] == yajl_gen_map_val) {        \
+        g->print(g->ctx, ":", 1);                               \
+        if (g->pretty) g->print(g->ctx, " ", 1);                \
+   } 
+
+#define INSERT_WHITESPACE                                               \
+    if (g->pretty) {                                                    \
+        if (g->state[g->depth] != yajl_gen_map_val) {                   \
+            unsigned int _i;                                            \
+            for (_i=0;_i<g->depth;_i++)                                 \
+                g->print(g->ctx, g->indentString,                       \
+                         strlen(g->indentString));                      \
+        }                                                               \
+    }
+
+#define ENSURE_NOT_KEY \
+    if (g->state[g->depth] == yajl_gen_map_key) {   \
+        return yajl_gen_keys_must_be_strings;       \
+    }                                               \
+
+/* check that we're not complete, or in error state.  in a valid state
+ * to be generating */
+#define ENSURE_VALID_STATE \
+    if (g->state[g->depth] == yajl_gen_error) {   \
+        return yajl_gen_in_error_state;\
+    } else if (g->state[g->depth] == yajl_gen_complete) {   \
+        return yajl_gen_generation_complete;                \
+    }
+
+#define INCREMENT_DEPTH \
+    if (++(g->depth) >= YAJL_MAX_DEPTH) return yajl_max_depth_exceeded;
+
+#define APPENDED_ATOM \
+    switch (g->state[g->depth]) {                   \
+        case yajl_gen_start:                        \
+            g->state[g->depth] = yajl_gen_complete; \
+            break;                                  \
+        case yajl_gen_map_start:                    \
+        case yajl_gen_map_key:                      \
+            g->state[g->depth] = yajl_gen_map_val;  \
+            break;                                  \
+        case yajl_gen_array_start:                  \
+            g->state[g->depth] = yajl_gen_in_array; \
+            break;                                  \
+        case yajl_gen_map_val:                      \
+            g->state[g->depth] = yajl_gen_map_key;  \
+            break;                                  \
+        default:                                    \
+            break;                                  \
+    }                                               \
+
+#define FINAL_NEWLINE                                        \
+    if (g->pretty && g->state[g->depth] == yajl_gen_complete) \
+        g->print(g->ctx, "\n", 1);        
+    
+yajl_gen_status
+yajl_gen_integer(yajl_gen g, long int number)
+{
+    char i[32];
+    ENSURE_VALID_STATE; ENSURE_NOT_KEY; INSERT_SEP; INSERT_WHITESPACE;
+    sprintf(i, "%ld", number);
+    g->print(g->ctx, i, strlen(i));
+    APPENDED_ATOM;
+    FINAL_NEWLINE;
+    return yajl_gen_status_ok;
+}
+
+#ifdef WIN32
+#include <float.h>
+#define isnan _isnan
+#define isinf !_finite
+#endif
+
+yajl_gen_status
+yajl_gen_double(yajl_gen g, double number)
+{
+    char i[32];
+    ENSURE_VALID_STATE; ENSURE_NOT_KEY; 
+    if (isnan(number) || isinf(number)) return yajl_gen_invalid_number;
+    INSERT_SEP; INSERT_WHITESPACE;
+    sprintf(i, "%g", number);
+    g->print(g->ctx, i, strlen(i));
+    APPENDED_ATOM;
+    FINAL_NEWLINE;
+    return yajl_gen_status_ok;
+}
+
+yajl_gen_status
+yajl_gen_number(yajl_gen g, const char * s, unsigned int l)
+{
+    ENSURE_VALID_STATE; ENSURE_NOT_KEY; INSERT_SEP; INSERT_WHITESPACE;
+    g->print(g->ctx, s, l);
+    APPENDED_ATOM;
+    FINAL_NEWLINE;
+    return yajl_gen_status_ok;
+}
+
+yajl_gen_status
+yajl_gen_string(yajl_gen g, const unsigned char * str,
+                unsigned int len)
+{
+    ENSURE_VALID_STATE; INSERT_SEP; INSERT_WHITESPACE;
+    g->print(g->ctx, "\"", 1);
+    yajl_string_encode2(g->print, g->ctx, str, len);
+    g->print(g->ctx, "\"", 1);
+    APPENDED_ATOM;
+    FINAL_NEWLINE;
+    return yajl_gen_status_ok;
+}
+
+yajl_gen_status
+yajl_gen_null(yajl_gen g)
+{
+    ENSURE_VALID_STATE; ENSURE_NOT_KEY; INSERT_SEP; INSERT_WHITESPACE;
+    g->print(g->ctx, "null", strlen("null"));
+    APPENDED_ATOM;
+    FINAL_NEWLINE;
+    return yajl_gen_status_ok;
+}
+
+yajl_gen_status
+yajl_gen_bool(yajl_gen g, int boolean)
+{
+    const char * val = boolean ? "true" : "false";
+
+	ENSURE_VALID_STATE; ENSURE_NOT_KEY; INSERT_SEP; INSERT_WHITESPACE;
+    g->print(g->ctx, val, strlen(val));
+    APPENDED_ATOM;
+    FINAL_NEWLINE;
+    return yajl_gen_status_ok;
+}
+
+yajl_gen_status
+yajl_gen_map_open(yajl_gen g)
+{
+    ENSURE_VALID_STATE; ENSURE_NOT_KEY; INSERT_SEP; INSERT_WHITESPACE;
+    INCREMENT_DEPTH; 
+    
+    g->state[g->depth] = yajl_gen_map_start;
+    g->print(g->ctx, "{", 1);
+    if (g->pretty) g->print(g->ctx, "\n", 1);
+    FINAL_NEWLINE;
+    return yajl_gen_status_ok;
+}
+
+yajl_gen_status
+yajl_gen_map_close(yajl_gen g)
+{
+    ENSURE_VALID_STATE; 
+    (g->depth)--;
+    if (g->pretty) g->print(g->ctx, "\n", 1);
+    APPENDED_ATOM;
+    INSERT_WHITESPACE;
+    g->print(g->ctx, "}", 1);
+    FINAL_NEWLINE;
+    return yajl_gen_status_ok;
+}
+
+yajl_gen_status
+yajl_gen_array_open(yajl_gen g)
+{
+    ENSURE_VALID_STATE; ENSURE_NOT_KEY; INSERT_SEP; INSERT_WHITESPACE;
+    INCREMENT_DEPTH; 
+    g->state[g->depth] = yajl_gen_array_start;
+    g->print(g->ctx, "[", 1);
+    if (g->pretty) g->print(g->ctx, "\n", 1);
+    FINAL_NEWLINE;
+    return yajl_gen_status_ok;
+}
+
+yajl_gen_status
+yajl_gen_array_close(yajl_gen g)
+{
+    ENSURE_VALID_STATE;
+    if (g->pretty) g->print(g->ctx, "\n", 1);
+    (g->depth)--;
+    APPENDED_ATOM;
+    INSERT_WHITESPACE;
+    g->print(g->ctx, "]", 1);
+    FINAL_NEWLINE;
+    return yajl_gen_status_ok;
+}
+
+yajl_gen_status
+yajl_gen_get_buf(yajl_gen g, const unsigned char ** buf,
+                 unsigned int * len)
+{
+    if (g->print != (yajl_print_t)&yajl_buf_append) return yajl_gen_no_buf;
+    *buf = yajl_buf_data((yajl_buf)g->ctx);
+    *len = yajl_buf_len((yajl_buf)g->ctx);
+    return yajl_gen_status_ok;
+}
+
+void
+yajl_gen_clear(yajl_gen g)
+{
+    if (g->print == (yajl_print_t)&yajl_buf_append) yajl_buf_clear((yajl_buf)g->ctx);
+}

Added: couchdb/trunk/src/ejson/yajl/yajl_gen.h
URL: http://svn.apache.org/viewvc/couchdb/trunk/src/ejson/yajl/yajl_gen.h?rev=1088941&view=auto
==============================================================================
--- couchdb/trunk/src/ejson/yajl/yajl_gen.h (added)
+++ couchdb/trunk/src/ejson/yajl/yajl_gen.h Tue Apr  5 09:42:41 2011
@@ -0,0 +1,159 @@
+/*
+ * Copyright 2010, Lloyd Hilaiel.
+ * 
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ * 
+ *  1. Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ * 
+ *  2. Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in
+ *     the documentation and/or other materials provided with the
+ *     distribution.
+ * 
+ *  3. Neither the name of Lloyd Hilaiel nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ * 
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */ 
+
+/**
+ * \file yajl_gen.h
+ * Interface to YAJL's JSON generation facilities.
+ */
+
+#include "yajl_common.h"
+
+#ifndef __YAJL_GEN_H__
+#define __YAJL_GEN_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif    
+    /** generator status codes */
+    typedef enum {
+        /** no error */
+        yajl_gen_status_ok = 0,
+        /** at a point where a map key is generated, a function other than
+         *  yajl_gen_string was called */
+        yajl_gen_keys_must_be_strings,
+        /** YAJL's maximum generation depth was exceeded.  see
+         *  YAJL_MAX_DEPTH */
+        yajl_max_depth_exceeded,
+        /** A generator function (yajl_gen_XXX) was called while in an error
+         *  state */
+        yajl_gen_in_error_state,
+        /** A complete JSON document has been generated */
+        yajl_gen_generation_complete,                
+        /** yajl_gen_double was passed an invalid floating point value
+         *  (infinity or NaN). */
+        yajl_gen_invalid_number,
+        /** A print callback was passed in, so there is no internal
+         * buffer to get from */
+        yajl_gen_no_buf
+    } yajl_gen_status;
+
+    /** an opaque handle to a generator */
+    typedef struct yajl_gen_t * yajl_gen;
+
+    /** a callback used for "printing" the results. */
+    typedef void (*yajl_print_t)(void * ctx,
+                                 const char * str,
+                                 unsigned int len);
+
+    /** configuration structure for the generator */
+    typedef struct {
+        /** generate indented (beautiful) output */
+        unsigned int beautify;
+        /** an opportunity to define an indent string.  such as \\t or
+         *  some number of spaces.  default is four spaces '    '.  This
+         *  member is only relevant when beautify is true */
+        const char * indentString;
+    } yajl_gen_config;
+
+    /** allocate a generator handle
+     *  \param config a pointer to a structure containing parameters which
+     *                configure the behavior of the json generator
+     *  \param allocFuncs an optional pointer to a structure which allows
+     *                    the client to overide the memory allocation
+     *                    used by yajl.  May be NULL, in which case
+     *                    malloc/free/realloc will be used.
+     *
+     *  \returns an allocated handle on success, NULL on failure (bad params)
+     */
+    YAJL_API yajl_gen yajl_gen_alloc(const yajl_gen_config * config,
+                                     const yajl_alloc_funcs * allocFuncs);
+
+    /** allocate a generator handle that will print to the specified
+     *  callback rather than storing the results in an internal buffer.
+     *  \param callback   a pointer to a printer function.  May be NULL
+     *                    in which case, the results will be store in an
+     *                    internal buffer.
+     *  \param config     a pointer to a structure containing parameters
+     *                    which configure the behavior of the json
+     *                    generator.
+     *  \param allocFuncs an optional pointer to a structure which allows
+     *                    the client to overide the memory allocation
+     *                    used by yajl.  May be NULL, in which case
+     *                    malloc/free/realloc will be used.
+     *  \param ctx        a context pointer that will be passed to the
+     *                    printer callback.
+     *
+     *  \returns an allocated handle on success, NULL on failure (bad params)
+     */
+    YAJL_API yajl_gen yajl_gen_alloc2(const yajl_print_t callback,
+                                      const yajl_gen_config * config,
+                                      const yajl_alloc_funcs * allocFuncs,
+                                      void * ctx);
+
+    /** free a generator handle */    
+    YAJL_API void yajl_gen_free(yajl_gen handle);
+
+    YAJL_API yajl_gen_status yajl_gen_integer(yajl_gen hand, long int number);
+    /** generate a floating point number.  number may not be infinity or
+     *  NaN, as these have no representation in JSON.  In these cases the
+     *  generator will return 'yajl_gen_invalid_number' */
+    YAJL_API yajl_gen_status yajl_gen_double(yajl_gen hand, double number);
+    YAJL_API yajl_gen_status yajl_gen_number(yajl_gen hand,
+                                             const char * num,
+                                             unsigned int len);
+    YAJL_API yajl_gen_status yajl_gen_string(yajl_gen hand,
+                                             const unsigned char * str,
+                                             unsigned int len);
+    YAJL_API yajl_gen_status yajl_gen_null(yajl_gen hand);
+    YAJL_API yajl_gen_status yajl_gen_bool(yajl_gen hand, int boolean);    
+    YAJL_API yajl_gen_status yajl_gen_map_open(yajl_gen hand);
+    YAJL_API yajl_gen_status yajl_gen_map_close(yajl_gen hand);
+    YAJL_API yajl_gen_status yajl_gen_array_open(yajl_gen hand);
+    YAJL_API yajl_gen_status yajl_gen_array_close(yajl_gen hand);
+
+    /** access the null terminated generator buffer.  If incrementally
+     *  outputing JSON, one should call yajl_gen_clear to clear the
+     *  buffer.  This allows stream generation. */
+    YAJL_API yajl_gen_status yajl_gen_get_buf(yajl_gen hand,
+                                              const unsigned char ** buf,
+                                              unsigned int * len);
+
+    /** clear yajl's output buffer, but maintain all internal generation
+     *  state.  This function will not "reset" the generator state, and is
+     *  intended to enable incremental JSON outputing. */
+    YAJL_API void yajl_gen_clear(yajl_gen hand);
+
+#ifdef __cplusplus
+}
+#endif    
+
+#endif

Added: couchdb/trunk/src/ejson/yajl/yajl_lex.c
URL: http://svn.apache.org/viewvc/couchdb/trunk/src/ejson/yajl/yajl_lex.c?rev=1088941&view=auto
==============================================================================
--- couchdb/trunk/src/ejson/yajl/yajl_lex.c (added)
+++ couchdb/trunk/src/ejson/yajl/yajl_lex.c Tue Apr  5 09:42:41 2011
@@ -0,0 +1,737 @@
+/*
+ * Copyright 2010, Lloyd Hilaiel.
+ * 
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ * 
+ *  1. Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ * 
+ *  2. Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in
+ *     the documentation and/or other materials provided with the
+ *     distribution.
+ * 
+ *  3. Neither the name of Lloyd Hilaiel nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ * 
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */ 
+
+#include "yajl_lex.h"
+#include "yajl_buf.h"
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <assert.h>
+#include <string.h>
+
+#ifdef YAJL_LEXER_DEBUG
+static const char *
+tokToStr(yajl_tok tok) 
+{
+    switch (tok) {
+        case yajl_tok_bool: return "bool";
+        case yajl_tok_colon: return "colon";
+        case yajl_tok_comma: return "comma";
+        case yajl_tok_eof: return "eof";
+        case yajl_tok_error: return "error";
+        case yajl_tok_left_brace: return "brace";
+        case yajl_tok_left_bracket: return "bracket";
+        case yajl_tok_null: return "null";
+        case yajl_tok_integer: return "integer";
+        case yajl_tok_double: return "double";
+        case yajl_tok_right_brace: return "brace";
+        case yajl_tok_right_bracket: return "bracket";
+        case yajl_tok_string: return "string";
+        case yajl_tok_string_with_escapes: return "string_with_escapes";
+    }
+    return "unknown";
+}
+#endif
+
+/* Impact of the stream parsing feature on the lexer:
+ *
+ * YAJL support stream parsing.  That is, the ability to parse the first
+ * bits of a chunk of JSON before the last bits are available (still on
+ * the network or disk).  This makes the lexer more complex.  The
+ * responsibility of the lexer is to handle transparently the case where
+ * a chunk boundary falls in the middle of a token.  This is
+ * accomplished is via a buffer and a character reading abstraction. 
+ *
+ * Overview of implementation
+ *
+ * When we lex to end of input string before end of token is hit, we
+ * copy all of the input text composing the token into our lexBuf.
+ * 
+ * Every time we read a character, we do so through the readChar function.
+ * readChar's responsibility is to handle pulling all chars from the buffer
+ * before pulling chars from input text
+ */
+
+struct yajl_lexer_t {
+    /* the overal line and char offset into the data */
+    unsigned int lineOff;
+    unsigned int charOff;
+
+    /* error */
+    yajl_lex_error error;
+
+    /* a input buffer to handle the case where a token is spread over
+     * multiple chunks */ 
+    yajl_buf buf;
+
+    /* in the case where we have data in the lexBuf, bufOff holds
+     * the current offset into the lexBuf. */
+    unsigned int bufOff;
+
+    /* are we using the lex buf? */
+    unsigned int bufInUse;
+
+    /* shall we allow comments? */
+    unsigned int allowComments;
+
+    /* shall we validate utf8 inside strings? */
+    unsigned int validateUTF8;
+
+    yajl_alloc_funcs * alloc;
+};
+
+#define readChar(lxr, txt, off)                      \
+    (((lxr)->bufInUse && yajl_buf_len((lxr)->buf) && lxr->bufOff < yajl_buf_len((lxr)->buf)) ? \
+     (*((const unsigned char *) yajl_buf_data((lxr)->buf) + ((lxr)->bufOff)++)) : \
+     ((txt)[(*(off))++]))
+
+#define unreadChar(lxr, off) ((*(off) > 0) ? (*(off))-- : ((lxr)->bufOff--))
+
+yajl_lexer
+yajl_lex_alloc(yajl_alloc_funcs * alloc,
+               unsigned int allowComments, unsigned int validateUTF8)
+{
+    yajl_lexer lxr = (yajl_lexer) YA_MALLOC(alloc, sizeof(struct yajl_lexer_t));
+    memset((void *) lxr, 0, sizeof(struct yajl_lexer_t));
+    lxr->buf = yajl_buf_alloc(alloc);
+    lxr->allowComments = allowComments;
+    lxr->validateUTF8 = validateUTF8;
+    lxr->alloc = alloc;
+    return lxr;
+}
+
+void
+yajl_lex_free(yajl_lexer lxr)
+{
+    yajl_buf_free(lxr->buf);
+    YA_FREE(lxr->alloc, lxr);
+    return;
+}
+
+/* a lookup table which lets us quickly determine three things:
+ * VEC - valid escaped conrol char
+ * IJC - invalid json char
+ * VHC - valid hex char
+ * note.  the solidus '/' may be escaped or not.
+ * note.  the
+ */
+#define VEC 1
+#define IJC 2
+#define VHC 4
+static const char charLookupTable[256] =
+{
+/*00*/ IJC    , IJC    , IJC    , IJC    , IJC    , IJC    , IJC    , IJC    ,
+/*08*/ IJC    , IJC    , IJC    , IJC    , IJC    , IJC    , IJC    , IJC    ,
+/*10*/ IJC    , IJC    , IJC    , IJC    , IJC    , IJC    , IJC    , IJC    ,
+/*18*/ IJC    , IJC    , IJC    , IJC    , IJC    , IJC    , IJC    , IJC    ,
+
+/*20*/ 0      , 0      , VEC|IJC, 0      , 0      , 0      , 0      , 0      ,
+/*28*/ 0      , 0      , 0      , 0      , 0      , 0      , 0      , VEC    ,
+/*30*/ VHC    , VHC    , VHC    , VHC    , VHC    , VHC    , VHC    , VHC    ,
+/*38*/ VHC    , VHC    , 0      , 0      , 0      , 0      , 0      , 0      ,
+
+/*40*/ 0      , VHC    , VHC    , VHC    , VHC    , VHC    , VHC    , 0      ,
+/*48*/ 0      , 0      , 0      , 0      , 0      , 0      , 0      , 0      ,
+/*50*/ 0      , 0      , 0      , 0      , 0      , 0      , 0      , 0      ,
+/*58*/ 0      , 0      , 0      , 0      , VEC|IJC, 0      , 0      , 0      ,
+
+/*60*/ 0      , VHC    , VEC|VHC, VHC    , VHC    , VHC    , VEC|VHC, 0      ,
+/*68*/ 0      , 0      , 0      , 0      , 0      , 0      , VEC    , 0      ,
+/*70*/ 0      , 0      , VEC    , 0      , VEC    , 0      , 0      , 0      ,
+/*78*/ 0      , 0      , 0      , 0      , 0      , 0      , 0      , 0      ,
+
+/* include these so we don't have to always check the range of the char */
+       0      , 0      , 0      , 0      , 0      , 0      , 0      , 0      , 
+       0      , 0      , 0      , 0      , 0      , 0      , 0      , 0      , 
+       0      , 0      , 0      , 0      , 0      , 0      , 0      , 0      , 
+       0      , 0      , 0      , 0      , 0      , 0      , 0      , 0      , 
+
+       0      , 0      , 0      , 0      , 0      , 0      , 0      , 0      , 
+       0      , 0      , 0      , 0      , 0      , 0      , 0      , 0      , 
+       0      , 0      , 0      , 0      , 0      , 0      , 0      , 0      , 
+       0      , 0      , 0      , 0      , 0      , 0      , 0      , 0      , 
+
+       0      , 0      , 0      , 0      , 0      , 0      , 0      , 0      , 
+       0      , 0      , 0      , 0      , 0      , 0      , 0      , 0      , 
+       0      , 0      , 0      , 0      , 0      , 0      , 0      , 0      , 
+       0      , 0      , 0      , 0      , 0      , 0      , 0      , 0      , 
+
+       0      , 0      , 0      , 0      , 0      , 0      , 0      , 0      , 
+       0      , 0      , 0      , 0      , 0      , 0      , 0      , 0      , 
+       0      , 0      , 0      , 0      , 0      , 0      , 0      , 0      , 
+       0      , 0      , 0      , 0      , 0      , 0      , 0      , 0
+};
+
+/** process a variable length utf8 encoded codepoint.
+ *
+ *  returns:
+ *    yajl_tok_string - if valid utf8 char was parsed and offset was
+ *                      advanced
+ *    yajl_tok_eof - if end of input was hit before validation could
+ *                   complete
+ *    yajl_tok_error - if invalid utf8 was encountered
+ * 
+ *  NOTE: on error the offset will point to the first char of the
+ *  invalid utf8 */
+#define UTF8_CHECK_EOF if (*offset >= jsonTextLen) { return yajl_tok_eof; }
+
+static yajl_tok
+yajl_lex_utf8_char(yajl_lexer lexer, const unsigned char * jsonText,
+                   unsigned int jsonTextLen, unsigned int * offset,
+                   unsigned char curChar)
+{
+    if (curChar <= 0x7f) {
+        /* single byte */
+        return yajl_tok_string;
+    } else if ((curChar >> 5) == 0x6) {
+        /* two byte */ 
+        UTF8_CHECK_EOF;
+        curChar = readChar(lexer, jsonText, offset);
+        if ((curChar >> 6) == 0x2) return yajl_tok_string;
+    } else if ((curChar >> 4) == 0x0e) {
+        /* three byte */
+        UTF8_CHECK_EOF;
+        curChar = readChar(lexer, jsonText, offset);
+        if ((curChar >> 6) == 0x2) {
+            UTF8_CHECK_EOF;
+            curChar = readChar(lexer, jsonText, offset);
+            if ((curChar >> 6) == 0x2) return yajl_tok_string;
+        }
+    } else if ((curChar >> 3) == 0x1e) {
+        /* four byte */
+        UTF8_CHECK_EOF;
+        curChar = readChar(lexer, jsonText, offset);
+        if ((curChar >> 6) == 0x2) {
+            UTF8_CHECK_EOF;
+            curChar = readChar(lexer, jsonText, offset);
+            if ((curChar >> 6) == 0x2) {
+                UTF8_CHECK_EOF;
+                curChar = readChar(lexer, jsonText, offset);
+                if ((curChar >> 6) == 0x2) return yajl_tok_string;
+            }
+        }
+    } 
+
+    return yajl_tok_error;
+}
+
+/* lex a string.  input is the lexer, pointer to beginning of
+ * json text, and start of string (offset).
+ * a token is returned which has the following meanings:
+ * yajl_tok_string: lex of string was successful.  offset points to
+ *                  terminating '"'.
+ * yajl_tok_eof: end of text was encountered before we could complete
+ *               the lex.
+ * yajl_tok_error: embedded in the string were unallowable chars.  offset
+ *               points to the offending char
+ */
+#define STR_CHECK_EOF \
+if (*offset >= jsonTextLen) { \
+   tok = yajl_tok_eof; \
+   goto finish_string_lex; \
+}
+
+static yajl_tok
+yajl_lex_string(yajl_lexer lexer, const unsigned char * jsonText,
+                unsigned int jsonTextLen, unsigned int * offset)
+{
+    yajl_tok tok = yajl_tok_error;
+    int hasEscapes = 0;
+
+    for (;;) {
+		unsigned char curChar;
+
+		STR_CHECK_EOF;
+
+        curChar = readChar(lexer, jsonText, offset);
+
+        /* quote terminates */
+        if (curChar == '"') {
+            tok = yajl_tok_string;
+            break;
+        }
+        /* backslash escapes a set of control chars, */
+        else if (curChar == '\\') {
+            hasEscapes = 1;
+            STR_CHECK_EOF;
+
+            /* special case \u */
+            curChar = readChar(lexer, jsonText, offset);
+            if (curChar == 'u') {
+                unsigned int i = 0;
+
+                for (i=0;i<4;i++) {
+                    STR_CHECK_EOF;                
+                    curChar = readChar(lexer, jsonText, offset);                
+                    if (!(charLookupTable[curChar] & VHC)) {
+                        /* back up to offending char */
+                        unreadChar(lexer, offset);
+                        lexer->error = yajl_lex_string_invalid_hex_char;
+                        goto finish_string_lex;
+                    }
+                }
+            } else if (!(charLookupTable[curChar] & VEC)) {
+                /* back up to offending char */
+                unreadChar(lexer, offset);
+                lexer->error = yajl_lex_string_invalid_escaped_char;
+                goto finish_string_lex;                
+            } 
+        }
+        /* when not validating UTF8 it's a simple table lookup to determine
+         * if the present character is invalid */
+        else if(charLookupTable[curChar] & IJC) {
+            /* back up to offending char */
+            unreadChar(lexer, offset);
+            lexer->error = yajl_lex_string_invalid_json_char;
+            goto finish_string_lex;                
+        }
+        /* when in validate UTF8 mode we need to do some extra work */
+        else if (lexer->validateUTF8) {
+            yajl_tok t = yajl_lex_utf8_char(lexer, jsonText, jsonTextLen,
+                                            offset, curChar);
+            
+            if (t == yajl_tok_eof) {
+                tok = yajl_tok_eof;
+                goto finish_string_lex;
+            } else if (t == yajl_tok_error) {
+                lexer->error = yajl_lex_string_invalid_utf8;
+                goto finish_string_lex;
+            } 
+        }
+        /* accept it, and move on */ 
+    }
+  finish_string_lex:
+    /* tell our buddy, the parser, wether he needs to process this string
+     * again */
+    if (hasEscapes && tok == yajl_tok_string) {
+        tok = yajl_tok_string_with_escapes;
+    } 
+
+    return tok;
+}
+
+#define RETURN_IF_EOF if (*offset >= jsonTextLen) return yajl_tok_eof;
+
+static yajl_tok
+yajl_lex_number(yajl_lexer lexer, const unsigned char * jsonText,
+                unsigned int jsonTextLen, unsigned int * offset)
+{
+    /** XXX: numbers are the only entities in json that we must lex
+     *       _beyond_ in order to know that they are complete.  There
+     *       is an ambiguous case for integers at EOF. */
+
+    unsigned char c;
+
+    yajl_tok tok = yajl_tok_integer;
+
+    RETURN_IF_EOF;    
+    c = readChar(lexer, jsonText, offset);
+
+    /* optional leading minus */
+    if (c == '-') {
+        RETURN_IF_EOF;    
+        c = readChar(lexer, jsonText, offset); 
+    }
+
+    /* a single zero, or a series of integers */
+    if (c == '0') {
+        RETURN_IF_EOF;    
+        c = readChar(lexer, jsonText, offset); 
+    } else if (c >= '1' && c <= '9') {
+        do {
+            RETURN_IF_EOF;    
+            c = readChar(lexer, jsonText, offset); 
+        } while (c >= '0' && c <= '9');
+    } else {
+        unreadChar(lexer, offset);
+        lexer->error = yajl_lex_missing_integer_after_minus;
+        return yajl_tok_error;
+    }
+
+    /* optional fraction (indicates this is floating point) */
+    if (c == '.') {
+        int numRd = 0;
+        
+        RETURN_IF_EOF;
+        c = readChar(lexer, jsonText, offset); 
+
+        while (c >= '0' && c <= '9') {
+            numRd++;
+            RETURN_IF_EOF;
+            c = readChar(lexer, jsonText, offset); 
+        } 
+
+        if (!numRd) {
+            unreadChar(lexer, offset);
+            lexer->error = yajl_lex_missing_integer_after_decimal;
+            return yajl_tok_error;
+        }
+        tok = yajl_tok_double;
+    }
+
+    /* optional exponent (indicates this is floating point) */
+    if (c == 'e' || c == 'E') {
+        RETURN_IF_EOF;
+        c = readChar(lexer, jsonText, offset); 
+
+        /* optional sign */
+        if (c == '+' || c == '-') {
+            RETURN_IF_EOF;
+            c = readChar(lexer, jsonText, offset); 
+        }
+
+        if (c >= '0' && c <= '9') {
+            do {
+                RETURN_IF_EOF;
+                c = readChar(lexer, jsonText, offset); 
+            } while (c >= '0' && c <= '9');
+        } else {
+            unreadChar(lexer, offset);
+            lexer->error = yajl_lex_missing_integer_after_exponent;
+            return yajl_tok_error;
+        }
+        tok = yajl_tok_double;
+    }
+    
+    /* we always go "one too far" */
+    unreadChar(lexer, offset);
+    
+    return tok;
+}
+
+static yajl_tok
+yajl_lex_comment(yajl_lexer lexer, const unsigned char * jsonText,
+                 unsigned int jsonTextLen, unsigned int * offset)
+{
+    unsigned char c;
+
+    yajl_tok tok = yajl_tok_comment;
+
+    RETURN_IF_EOF;    
+    c = readChar(lexer, jsonText, offset);
+
+    /* either slash or star expected */
+    if (c == '/') {
+        /* now we throw away until end of line */
+        do {
+            RETURN_IF_EOF;    
+            c = readChar(lexer, jsonText, offset); 
+        } while (c != '\n');
+    } else if (c == '*') {
+        /* now we throw away until end of comment */        
+        for (;;) {
+            RETURN_IF_EOF;    
+            c = readChar(lexer, jsonText, offset); 
+            if (c == '*') {
+                RETURN_IF_EOF;    
+                c = readChar(lexer, jsonText, offset);                 
+                if (c == '/') {
+                    break;
+                } else {
+                    unreadChar(lexer, offset);
+                }
+            }
+        }
+    } else {
+        lexer->error = yajl_lex_invalid_char;
+        tok = yajl_tok_error;
+    }
+    
+    return tok;
+}
+
+yajl_tok
+yajl_lex_lex(yajl_lexer lexer, const unsigned char * jsonText,
+             unsigned int jsonTextLen, unsigned int * offset,
+             const unsigned char ** outBuf, unsigned int * outLen)
+{
+    yajl_tok tok = yajl_tok_error;
+    unsigned char c;
+    unsigned int startOffset = *offset;
+
+    *outBuf = NULL;
+    *outLen = 0;
+
+    for (;;) {
+        assert(*offset <= jsonTextLen);
+
+        if (*offset >= jsonTextLen) {
+            tok = yajl_tok_eof;
+            goto lexed;
+        }
+
+        c = readChar(lexer, jsonText, offset);
+
+        switch (c) {
+            case '{':
+                tok = yajl_tok_left_bracket;
+                goto lexed;
+            case '}':
+                tok = yajl_tok_right_bracket;
+                goto lexed;
+            case '[':
+                tok = yajl_tok_left_brace;
+                goto lexed;
+            case ']':
+                tok = yajl_tok_right_brace;
+                goto lexed;
+            case ',':
+                tok = yajl_tok_comma;
+                goto lexed;
+            case ':':
+                tok = yajl_tok_colon;
+                goto lexed;
+            case '\t': case '\n': case '\v': case '\f': case '\r': case ' ':
+                startOffset++;
+                break;
+            case 't': {
+                const char * want = "rue";
+                do {
+                    if (*offset >= jsonTextLen) {
+                        tok = yajl_tok_eof;
+                        goto lexed;
+                    }
+                    c = readChar(lexer, jsonText, offset);
+                    if (c != *want) {
+                        unreadChar(lexer, offset);
+                        lexer->error = yajl_lex_invalid_string;
+                        tok = yajl_tok_error;
+                        goto lexed;
+                    }
+                } while (*(++want));
+                tok = yajl_tok_bool;
+                goto lexed;
+            }
+            case 'f': {
+                const char * want = "alse";
+                do {
+                    if (*offset >= jsonTextLen) {
+                        tok = yajl_tok_eof;
+                        goto lexed;
+                    }
+                    c = readChar(lexer, jsonText, offset);
+                    if (c != *want) {
+                        unreadChar(lexer, offset);
+                        lexer->error = yajl_lex_invalid_string;
+                        tok = yajl_tok_error;
+                        goto lexed;
+                    }
+                } while (*(++want));
+                tok = yajl_tok_bool;
+                goto lexed;
+            }
+            case 'n': {
+                const char * want = "ull";
+                do {
+                    if (*offset >= jsonTextLen) {
+                        tok = yajl_tok_eof;
+                        goto lexed;
+                    }
+                    c = readChar(lexer, jsonText, offset);
+                    if (c != *want) {
+                        unreadChar(lexer, offset);
+                        lexer->error = yajl_lex_invalid_string;
+                        tok = yajl_tok_error;
+                        goto lexed;
+                    }
+                } while (*(++want));
+                tok = yajl_tok_null;
+                goto lexed;
+            }
+            case '"': {
+                tok = yajl_lex_string(lexer, (const unsigned char *) jsonText,
+                                      jsonTextLen, offset);
+                goto lexed;
+            }
+            case '-':
+            case '0': case '1': case '2': case '3': case '4': 
+            case '5': case '6': case '7': case '8': case '9': {
+                /* integer parsing wants to start from the beginning */
+                unreadChar(lexer, offset);
+                tok = yajl_lex_number(lexer, (const unsigned char *) jsonText,
+                                      jsonTextLen, offset);
+                goto lexed;
+            }
+            case '/':
+                /* hey, look, a probable comment!  If comments are disabled
+                 * it's an error. */
+                if (!lexer->allowComments) {
+                    unreadChar(lexer, offset);
+                    lexer->error = yajl_lex_unallowed_comment;
+                    tok = yajl_tok_error;
+                    goto lexed;
+                }
+                /* if comments are enabled, then we should try to lex
+                 * the thing.  possible outcomes are
+                 * - successful lex (tok_comment, which means continue),
+                 * - malformed comment opening (slash not followed by
+                 *   '*' or '/') (tok_error)
+                 * - eof hit. (tok_eof) */
+                tok = yajl_lex_comment(lexer, (const unsigned char *) jsonText,
+                                       jsonTextLen, offset);
+                if (tok == yajl_tok_comment) {
+                    /* "error" is silly, but that's the initial
+                     * state of tok.  guilty until proven innocent. */  
+                    tok = yajl_tok_error;
+                    yajl_buf_clear(lexer->buf);
+                    lexer->bufInUse = 0;
+                    startOffset = *offset; 
+                    break;
+                }
+                /* hit error or eof, bail */
+                goto lexed;
+            default:
+                lexer->error = yajl_lex_invalid_char;
+                tok = yajl_tok_error;
+                goto lexed;
+        }
+    }
+
+
+  lexed:
+    /* need to append to buffer if the buffer is in use or
+     * if it's an EOF token */
+    if (tok == yajl_tok_eof || lexer->bufInUse) {
+        if (!lexer->bufInUse) yajl_buf_clear(lexer->buf);
+        lexer->bufInUse = 1;
+        yajl_buf_append(lexer->buf, jsonText + startOffset, *offset - startOffset);
+        lexer->bufOff = 0;
+        
+        if (tok != yajl_tok_eof) {
+            *outBuf = yajl_buf_data(lexer->buf);
+            *outLen = yajl_buf_len(lexer->buf);
+            lexer->bufInUse = 0;
+        }
+    } else if (tok != yajl_tok_error) {
+        *outBuf = jsonText + startOffset;
+        *outLen = *offset - startOffset;
+    }
+
+    /* special case for strings. skip the quotes. */
+    if (tok == yajl_tok_string || tok == yajl_tok_string_with_escapes)
+    {
+        assert(*outLen >= 2);
+        (*outBuf)++;
+        *outLen -= 2; 
+    }
+
+
+#ifdef YAJL_LEXER_DEBUG
+    if (tok == yajl_tok_error) {
+        printf("lexical error: %s\n",
+               yajl_lex_error_to_string(yajl_lex_get_error(lexer)));
+    } else if (tok == yajl_tok_eof) {
+        printf("EOF hit\n");
+    } else {
+        printf("lexed %s: '", tokToStr(tok));
+        fwrite(*outBuf, 1, *outLen, stdout);
+        printf("'\n");
+    }
+#endif
+
+    return tok;
+}
+
+const char *
+yajl_lex_error_to_string(yajl_lex_error error)
+{
+    switch (error) {
+        case yajl_lex_e_ok:
+            return "ok, no error";
+        case yajl_lex_string_invalid_utf8:
+            return "invalid bytes in UTF8 string.";
+        case yajl_lex_string_invalid_escaped_char:
+            return "inside a string, '\\' occurs before a character "
+                   "which it may not.";
+        case yajl_lex_string_invalid_json_char:            
+            return "invalid character inside string.";
+        case yajl_lex_string_invalid_hex_char:
+            return "invalid (non-hex) character occurs after '\\u' inside "
+                   "string.";
+        case yajl_lex_invalid_char:
+            return "invalid char in json text.";
+        case yajl_lex_invalid_string:
+            return "invalid string in json text.";
+        case yajl_lex_missing_integer_after_exponent:
+            return "malformed number, a digit is required after the exponent.";
+        case yajl_lex_missing_integer_after_decimal:
+            return "malformed number, a digit is required after the "
+                   "decimal point.";
+        case yajl_lex_missing_integer_after_minus:
+            return "malformed number, a digit is required after the "
+                   "minus sign.";
+        case yajl_lex_unallowed_comment:
+            return "probable comment found in input text, comments are "
+                   "not enabled.";
+    }
+    return "unknown error code";
+}
+
+
+/** allows access to more specific information about the lexical
+ *  error when yajl_lex_lex returns yajl_tok_error. */
+yajl_lex_error
+yajl_lex_get_error(yajl_lexer lexer)
+{
+    if (lexer == NULL) return (yajl_lex_error) -1;
+    return lexer->error;
+}
+
+unsigned int yajl_lex_current_line(yajl_lexer lexer)
+{
+    return lexer->lineOff;
+}
+
+unsigned int yajl_lex_current_char(yajl_lexer lexer)
+{
+    return lexer->charOff;
+}
+
+yajl_tok yajl_lex_peek(yajl_lexer lexer, const unsigned char * jsonText,
+                       unsigned int jsonTextLen, unsigned int offset)
+{
+    const unsigned char * outBuf;
+    unsigned int outLen;
+    unsigned int bufLen = yajl_buf_len(lexer->buf);
+    unsigned int bufOff = lexer->bufOff;
+    unsigned int bufInUse = lexer->bufInUse;
+    yajl_tok tok;
+    
+    tok = yajl_lex_lex(lexer, jsonText, jsonTextLen, &offset,
+                       &outBuf, &outLen);
+
+    lexer->bufOff = bufOff;
+    lexer->bufInUse = bufInUse;
+    yajl_buf_truncate(lexer->buf, bufLen);
+    
+    return tok;
+}

Added: couchdb/trunk/src/ejson/yajl/yajl_lex.h
URL: http://svn.apache.org/viewvc/couchdb/trunk/src/ejson/yajl/yajl_lex.h?rev=1088941&view=auto
==============================================================================
--- couchdb/trunk/src/ejson/yajl/yajl_lex.h (added)
+++ couchdb/trunk/src/ejson/yajl/yajl_lex.h Tue Apr  5 09:42:41 2011
@@ -0,0 +1,133 @@
+/*
+ * Copyright 2010, Lloyd Hilaiel.
+ * 
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ * 
+ *  1. Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ * 
+ *  2. Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in
+ *     the documentation and/or other materials provided with the
+ *     distribution.
+ * 
+ *  3. Neither the name of Lloyd Hilaiel nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ * 
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */ 
+
+#ifndef __YAJL_LEX_H__
+#define __YAJL_LEX_H__
+
+#include "yajl_common.h"
+
+typedef enum {
+    yajl_tok_bool,         
+    yajl_tok_colon,
+    yajl_tok_comma,     
+    yajl_tok_eof,
+    yajl_tok_error,
+    yajl_tok_left_brace,     
+    yajl_tok_left_bracket,
+    yajl_tok_null,         
+    yajl_tok_right_brace,     
+    yajl_tok_right_bracket,
+
+    /* we differentiate between integers and doubles to allow the
+     * parser to interpret the number without re-scanning */
+    yajl_tok_integer, 
+    yajl_tok_double, 
+
+    /* we differentiate between strings which require further processing,
+     * and strings that do not */
+    yajl_tok_string,
+    yajl_tok_string_with_escapes,
+
+    /* comment tokens are not currently returned to the parser, ever */
+    yajl_tok_comment
+} yajl_tok;
+
+typedef struct yajl_lexer_t * yajl_lexer;
+
+yajl_lexer yajl_lex_alloc(yajl_alloc_funcs * alloc,
+                          unsigned int allowComments,
+                          unsigned int validateUTF8);
+
+void yajl_lex_free(yajl_lexer lexer);
+
+/**
+ * run/continue a lex. "offset" is an input/output parameter.
+ * It should be initialized to zero for a
+ * new chunk of target text, and upon subsetquent calls with the same
+ * target text should passed with the value of the previous invocation.
+ *
+ * the client may be interested in the value of offset when an error is
+ * returned from the lexer.  This allows the client to render useful
+n * error messages.
+ *
+ * When you pass the next chunk of data, context should be reinitialized
+ * to zero.
+ * 
+ * Finally, the output buffer is usually just a pointer into the jsonText,
+ * however in cases where the entity being lexed spans multiple chunks,
+ * the lexer will buffer the entity and the data returned will be
+ * a pointer into that buffer.
+ *
+ * This behavior is abstracted from client code except for the performance
+ * implications which require that the client choose a reasonable chunk
+ * size to get adequate performance.
+ */
+yajl_tok yajl_lex_lex(yajl_lexer lexer, const unsigned char * jsonText,
+                      unsigned int jsonTextLen, unsigned int * offset,
+                      const unsigned char ** outBuf, unsigned int * outLen);
+
+/** have a peek at the next token, but don't move the lexer forward */
+yajl_tok yajl_lex_peek(yajl_lexer lexer, const unsigned char * jsonText,
+                       unsigned int jsonTextLen, unsigned int offset);
+
+
+typedef enum {
+    yajl_lex_e_ok = 0,
+    yajl_lex_string_invalid_utf8,
+    yajl_lex_string_invalid_escaped_char,
+    yajl_lex_string_invalid_json_char,
+    yajl_lex_string_invalid_hex_char,
+    yajl_lex_invalid_char,
+    yajl_lex_invalid_string,
+    yajl_lex_missing_integer_after_decimal,
+    yajl_lex_missing_integer_after_exponent,
+    yajl_lex_missing_integer_after_minus,
+    yajl_lex_unallowed_comment
+} yajl_lex_error;
+
+const char * yajl_lex_error_to_string(yajl_lex_error error);
+
+/** allows access to more specific information about the lexical
+ *  error when yajl_lex_lex returns yajl_tok_error. */
+yajl_lex_error yajl_lex_get_error(yajl_lexer lexer);
+
+/** get the current offset into the most recently lexed json string. */
+unsigned int yajl_lex_current_offset(yajl_lexer lexer);
+
+/** get the number of lines lexed by this lexer instance */
+unsigned int yajl_lex_current_line(yajl_lexer lexer);
+
+/** get the number of chars lexed by this lexer instance since the last
+ *  \n or \r */
+unsigned int yajl_lex_current_char(yajl_lexer lexer);
+
+#endif

Added: couchdb/trunk/src/ejson/yajl/yajl_parse.h
URL: http://svn.apache.org/viewvc/couchdb/trunk/src/ejson/yajl/yajl_parse.h?rev=1088941&view=auto
==============================================================================
--- couchdb/trunk/src/ejson/yajl/yajl_parse.h (added)
+++ couchdb/trunk/src/ejson/yajl/yajl_parse.h Tue Apr  5 09:42:41 2011
@@ -0,0 +1,193 @@
+/*
+ * Copyright 2010, Lloyd Hilaiel.
+ * 
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ * 
+ *  1. Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ * 
+ *  2. Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in
+ *     the documentation and/or other materials provided with the
+ *     distribution.
+ * 
+ *  3. Neither the name of Lloyd Hilaiel nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ * 
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
+ * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+ * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */ 
+
+/**
+ * \file yajl_parse.h
+ * Interface to YAJL's JSON parsing facilities.
+ */
+
+#include "yajl_common.h"
+
+#ifndef __YAJL_PARSE_H__
+#define __YAJL_PARSE_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif    
+    /** error codes returned from this interface */
+    typedef enum {
+        /** no error was encountered */
+        yajl_status_ok,
+        /** a client callback returned zero, stopping the parse */
+        yajl_status_client_canceled,
+        /** The parse cannot yet complete because more json input text
+         *  is required, call yajl_parse with the next buffer of input text.
+         *  (pertinent only when stream parsing) */
+        yajl_status_insufficient_data,
+        /** An error occured during the parse.  Call yajl_get_error for
+         *  more information about the encountered error */
+        yajl_status_error
+    } yajl_status;
+
+    /** attain a human readable, english, string for an error */
+    YAJL_API const char * yajl_status_to_string(yajl_status code);
+
+    /** an opaque handle to a parser */
+    typedef struct yajl_handle_t * yajl_handle;
+
+    /** yajl is an event driven parser.  this means as json elements are
+     *  parsed, you are called back to do something with the data.  The
+     *  functions in this table indicate the various events for which
+     *  you will be called back.  Each callback accepts a "context"
+     *  pointer, this is a void * that is passed into the yajl_parse
+     *  function which the client code may use to pass around context.
+     *
+     *  All callbacks return an integer.  If non-zero, the parse will
+     *  continue.  If zero, the parse will be canceled and
+     *  yajl_status_client_canceled will be returned from the parse.
+     *
+     *  Note about handling of numbers:
+     *    yajl will only convert numbers that can be represented in a double
+     *    or a long int.  All other numbers will be passed to the client
+     *    in string form using the yajl_number callback.  Furthermore, if
+     *    yajl_number is not NULL, it will always be used to return numbers,
+     *    that is yajl_integer and yajl_double will be ignored.  If
+     *    yajl_number is NULL but one of yajl_integer or yajl_double are
+     *    defined, parsing of a number larger than is representable
+     *    in a double or long int will result in a parse error.
+     */
+    typedef struct {
+        int (* yajl_null)(void * ctx);
+        int (* yajl_boolean)(void * ctx, int boolVal);
+        int (* yajl_integer)(void * ctx, long integerVal);
+        int (* yajl_double)(void * ctx, double doubleVal);
+        /** A callback which passes the string representation of the number
+         *  back to the client.  Will be used for all numbers when present */
+        int (* yajl_number)(void * ctx, const char * numberVal,
+                            unsigned int numberLen);
+
+        /** strings are returned as pointers into the JSON text when,
+         * possible, as a result, they are _not_ null padded */
+        int (* yajl_string)(void * ctx, const unsigned char * stringVal,
+                            unsigned int stringLen);
+
+        int (* yajl_start_map)(void * ctx);
+        int (* yajl_map_key)(void * ctx, const unsigned char * key,
+                             unsigned int stringLen);
+        int (* yajl_end_map)(void * ctx);        
+
+        int (* yajl_start_array)(void * ctx);
+        int (* yajl_end_array)(void * ctx);        
+    } yajl_callbacks;
+    
+    /** configuration structure for the generator */
+    typedef struct {
+        /** if nonzero, javascript style comments will be allowed in
+         *  the json input, both slash star and slash slash */
+        unsigned int allowComments;
+        /** if nonzero, invalid UTF8 strings will cause a parse
+         *  error */
+        unsigned int checkUTF8;
+    } yajl_parser_config;
+
+    /** allocate a parser handle
+     *  \param callbacks  a yajl callbacks structure specifying the
+     *                    functions to call when different JSON entities
+     *                    are encountered in the input text.  May be NULL,
+     *                    which is only useful for validation.
+     *  \param config     configuration parameters for the parse.
+     *  \param ctx        a context pointer that will be passed to callbacks.
+     */
+    YAJL_API yajl_handle yajl_alloc(const yajl_callbacks * callbacks,
+                                    const yajl_parser_config * config,
+                                    const yajl_alloc_funcs * allocFuncs,
+                                    void * ctx);
+
+    /** free a parser handle */    
+    YAJL_API void yajl_free(yajl_handle handle);
+
+    /** Parse some json!
+     *  \param hand - a handle to the json parser allocated with yajl_alloc
+     *  \param jsonText - a pointer to the UTF8 json text to be parsed
+     *  \param jsonTextLength - the length, in bytes, of input text
+     */
+    YAJL_API yajl_status yajl_parse(yajl_handle hand,
+                                    const unsigned char * jsonText,
+                                    unsigned int jsonTextLength);
+
+    /** Parse any remaining buffered json.
+     *  Since yajl is a stream-based parser, without an explicit end of
+     *  input, yajl sometimes can't decide if content at the end of the
+     *  stream is valid or not.  For example, if "1" has been fed in,
+     *  yajl can't know whether another digit is next or some character
+     *  that would terminate the integer token.
+     *
+     *  \param hand - a handle to the json parser allocated with yajl_alloc
+     */
+    YAJL_API yajl_status yajl_parse_complete(yajl_handle hand);
+    
+    /** get an error string describing the state of the
+     *  parse.
+     *
+     *  If verbose is non-zero, the message will include the JSON
+     *  text where the error occured, along with an arrow pointing to
+     *  the specific char.
+     *
+     *  \returns A dynamically allocated string will be returned which should
+     *  be freed with yajl_free_error 
+     */
+    YAJL_API unsigned char * yajl_get_error(yajl_handle hand, int verbose,
+                                            const unsigned char * jsonText,
+                                            unsigned int jsonTextLength);
+
+    /**
+     * get the amount of data consumed from the last chunk passed to YAJL.
+     *
+     * In the case of a successful parse this can help you understand if
+     * the entire buffer was consumed (which will allow you to handle
+     * "junk at end of input". 
+     * 
+     * In the event an error is encountered during parsing, this function
+     * affords the client a way to get the offset into the most recent
+     * chunk where the error occured.  0 will be returned if no error
+     * was encountered.
+     */
+    YAJL_API unsigned int yajl_get_bytes_consumed(yajl_handle hand);
+
+    /** free an error returned from yajl_get_error */
+    YAJL_API void yajl_free_error(yajl_handle hand, unsigned char * str);
+
+#ifdef __cplusplus
+}
+#endif    
+
+#endif