You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jackrabbit.apache.org by th...@apache.org on 2011/03/15 17:34:23 UTC

svn commit: r1081847 - in /jackrabbit/sandbox/jackrabbit-j3/src: main/java/org/apache/jackrabbit/j3/json/ test/java/org/apache/jackrabbit/j3/json/

Author: thomasm
Date: Tue Mar 15 16:34:22 2011
New Revision: 1081847

URL: http://svn.apache.org/viewvc?rev=1081847&view=rev
Log:
Json/Jsop Tokenizer

Added:
    jackrabbit/sandbox/jackrabbit-j3/src/main/java/org/apache/jackrabbit/j3/json/
    jackrabbit/sandbox/jackrabbit-j3/src/main/java/org/apache/jackrabbit/j3/json/JsopTokenizer.java
    jackrabbit/sandbox/jackrabbit-j3/src/test/java/org/apache/jackrabbit/j3/json/
    jackrabbit/sandbox/jackrabbit-j3/src/test/java/org/apache/jackrabbit/j3/json/TestAll.java
    jackrabbit/sandbox/jackrabbit-j3/src/test/java/org/apache/jackrabbit/j3/json/TestTokenizer.java

Added: jackrabbit/sandbox/jackrabbit-j3/src/main/java/org/apache/jackrabbit/j3/json/JsopTokenizer.java
URL: http://svn.apache.org/viewvc/jackrabbit/sandbox/jackrabbit-j3/src/main/java/org/apache/jackrabbit/j3/json/JsopTokenizer.java?rev=1081847&view=auto
==============================================================================
--- jackrabbit/sandbox/jackrabbit-j3/src/main/java/org/apache/jackrabbit/j3/json/JsopTokenizer.java (added)
+++ jackrabbit/sandbox/jackrabbit-j3/src/main/java/org/apache/jackrabbit/j3/json/JsopTokenizer.java Tue Mar 15 16:34:22 2011
@@ -0,0 +1,329 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jackrabbit.j3.json;
+
+/**
+ * A tokenizer for Json and Jsop strings.
+ */
+public class JsopTokenizer {
+
+    // "- " is "patch remove", without space is a negative number
+    // please note .0 and -.1 are not valid numbers
+
+    final static int END = 0;
+    final static int STRING = 1, NUMBER = 2, TRUE = 3, FALSE = 4, NULL = 5, ERROR = 6;
+
+    private final String jsop;
+    private int pos;
+    private int tokenType;
+    private String currentToken;
+
+    public JsopTokenizer(String json) {
+        this.jsop = json;
+    }
+
+    public String toString() {
+        return jsop;
+    }
+
+    /**
+     * Get the token type of the last token.
+     *
+     * @return the token type
+     */
+    public int getTokenType() {
+        return tokenType;
+    }
+
+    /**
+     * Get the last token value if the the token type was STRING or NUMBER. For
+     * STRING, the text is decoded; for NUMBER, it is returned as parsed. In all
+     * other cases the result is undefined.
+     *
+     * @return the token
+     */
+    public String getToken() {
+        return currentToken;
+    }
+
+    /**
+     * Read a token and return the token type.
+     *
+     * @return the token type
+     */
+    public int read() {
+        try {
+            return readToken();
+        } catch (StringIndexOutOfBoundsException e) {
+            tokenType = ERROR;
+            throw getFormatException(jsop, pos);
+        }
+    }
+
+    private int readToken() {
+        char ch;
+        while (true) {
+            if (pos >= jsop.length()) {
+                return tokenType = END;
+            }
+            ch = jsop.charAt(pos);
+            if (ch > ' ') {
+                break;
+            }
+            pos++;
+        }
+        int start = pos++;
+        switch (ch) {
+        case '\"': {
+            boolean escaped = false;
+            while (true) {
+                ch = jsop.charAt(pos++);
+                if (ch == '\"') {
+                    break;
+                } else if (ch == '\\') {
+                    escaped = true;
+                    pos++;
+                }
+            }
+            String s = jsop.substring(start + 1, pos - 1);
+            if (escaped) {
+                currentToken = jsonDecode(s);
+            } else {
+                currentToken = s;
+            }
+            tokenType = STRING;
+            break;
+        }
+        case '{':
+        case '}':
+        case '[':
+        case ']':
+        case '+':
+        case ':':
+        case ',':
+            tokenType = ch;
+            break;
+        case '-':
+            ch = jsop.charAt(pos);
+            if (ch < '0' || ch > '9') {
+                // lookahead
+                tokenType = '-';
+                break;
+            }
+            // else fall though
+        default:
+            if (ch >= '0' && ch <= '9') {
+                tokenType = NUMBER;
+                while (pos < jsop.length()) {
+                    ch = jsop.charAt(pos);
+                    if (ch < '0' || ch > '9') {
+                        break;
+                    }
+                    pos++;
+                }
+                if (ch == '.') {
+                    pos++;
+                    while (pos < jsop.length()) {
+                        ch = jsop.charAt(pos);
+                        if (ch < '0' || ch > '9') {
+                            break;
+                        }
+                        pos++;
+                    }
+                }
+                if (ch == 'e' || ch == 'E') {
+                    ch = jsop.charAt(++pos);
+                    if (ch == '+' || ch == '-') {
+                        ch = jsop.charAt(++pos);
+                    }
+                    while (pos < jsop.length()) {
+                        ch = jsop.charAt(pos);
+                        if (ch < '0' || ch > '9') {
+                            break;
+                        }
+                        pos++;
+                    }
+                }
+                currentToken = jsop.substring(start, pos);
+            } else if (ch >= 'a' && ch <= 'z') {
+                while (pos < jsop.length()) {
+                    ch = jsop.charAt(pos);
+                    if (ch < 'a' || ch > 'z') {
+                        break;
+                    }
+                    pos++;
+                }
+                String s = jsop.substring(start, pos);
+                if ("null".equals(s)) {
+                    tokenType = NULL;
+                } else if ("true".equals(s)) {
+                    tokenType = TRUE;
+                } else if ("false".equals(s)) {
+                    tokenType = FALSE;
+                } else {
+                    tokenType = ERROR;
+                    throw getFormatException(jsop, pos);
+                }
+                break;
+            } else {
+                tokenType = ERROR;
+                throw getFormatException(jsop, pos);
+            }
+        }
+        return tokenType;
+    }
+
+    /**
+     * Decode a Json string.
+     *
+     * @param s the encoded string
+     * @return the string
+     */
+    public static String jsonDecode(String s) {
+        int length = s.length();
+        StringBuilder buff = new StringBuilder(length);
+        for (int i = 0; i < length; i++) {
+            char c = s.charAt(i);
+            if (c == '\\') {
+                if (i + 1 >= s.length()) {
+                    throw getFormatException(s, i);
+                }
+                c = s.charAt(++i);
+                switch (c) {
+                case '"':
+                    buff.append('"');
+                    break;
+                case '\\':
+                    buff.append('\\');
+                    break;
+                case '/':
+                    buff.append('/');
+                    break;
+                case 'b':
+                    buff.append('\b');
+                    break;
+                case 'f':
+                    buff.append('\f');
+                    break;
+                case 'n':
+                    buff.append('\n');
+                    break;
+                case 'r':
+                    buff.append('\r');
+                    break;
+                case 't':
+                    buff.append('\t');
+                    break;
+                case 'u': {
+                    try {
+                        c = (char) (Integer.parseInt(s.substring(i + 1, i + 5), 16));
+                    } catch (NumberFormatException e) {
+                        throw getFormatException(s, i);
+                    }
+                    i += 4;
+                    buff.append(c);
+                    break;
+                }
+                default:
+                    throw getFormatException(s, i);
+                }
+            } else {
+                buff.append(c);
+            }
+        }
+        return buff.toString();
+    }
+
+    /**
+     * Convert a string to a Json literal using the correct escape sequences.
+     * The literal is enclosed in double quotes. Characters outside the range
+     * 32..127 are encoded (backslash u xxxx). The forward slash (solidus) is
+     * not escaped.
+     *
+     * @param s the text to convert
+     * @return the Json representation (including double quotes)
+     */
+    public static String jsonEncode(String s) {
+        int length = s.length();
+        StringBuilder buff = new StringBuilder(length + 2);
+        buff.append('\"');
+        for (int i = 0; i < length; i++) {
+            char c = s.charAt(i);
+            switch (c) {
+            case '"':
+                // quotation mark
+                buff.append("\\\"");
+                break;
+            case '\\':
+                // backslash
+                buff.append("\\\\");
+                break;
+            case '\b':
+                // backspace
+                buff.append("\\b");
+                break;
+            case '\f':
+                // formfeed
+                buff.append("\\f");
+                break;
+            case '\n':
+                // newline
+                buff.append("\\n");
+                break;
+            case '\r':
+                // carriage return
+                buff.append("\\r");
+                break;
+            case '\t':
+                // horizontal tab
+                buff.append("\\t");
+                break;
+            default:
+                int ch = c;
+                if (ch >= ' ' && ch <= 127) {
+                    buff.append(c);
+                } else {
+                    buff.append("\\u");
+                    // make sure it's four characters
+                    buff.append(Integer.toHexString(0x10000 | ch).substring(1));
+                }
+            }
+        }
+        buff.append('\"');
+        return buff.toString();
+    }
+
+    private static IllegalArgumentException getFormatException(String s, int i) {
+        return new IllegalArgumentException(addAsterisk(s, i));
+    }
+
+    /**
+     * Add an asterisk ('[*]') at the given position. This format is used to
+     * show where parsing failed in a statement.
+     *
+     * @param s the text
+     * @param index the position
+     * @return the text with asterisk
+     */
+    private static String addAsterisk(String s, int index) {
+        if (s != null && index < s.length()) {
+            s = s.substring(0, index) + "[*]" + s.substring(index);
+        }
+        return s;
+    }
+
+}

Added: jackrabbit/sandbox/jackrabbit-j3/src/test/java/org/apache/jackrabbit/j3/json/TestAll.java
URL: http://svn.apache.org/viewvc/jackrabbit/sandbox/jackrabbit-j3/src/test/java/org/apache/jackrabbit/j3/json/TestAll.java?rev=1081847&view=auto
==============================================================================
--- jackrabbit/sandbox/jackrabbit-j3/src/test/java/org/apache/jackrabbit/j3/json/TestAll.java (added)
+++ jackrabbit/sandbox/jackrabbit-j3/src/test/java/org/apache/jackrabbit/j3/json/TestAll.java Tue Mar 15 16:34:22 2011
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jackrabbit.j3.json;
+
+import junit.framework.Test;
+import junit.framework.TestCase;
+import junit.framework.TestSuite;
+
+/**
+ * Runs all tests.
+ */
+public class TestAll extends TestCase {
+
+    /**
+     * Returns a <code>Test</code> suite that executes all tests inside this
+     * package.
+     *
+     * @return a <code>Test</code> suite that executes all tests inside this
+     *         package.
+     */
+    public static Test suite() {
+        TestSuite suite = new TestSuite("org.apache.jackrabbit.j3.json");
+
+        suite.addTestSuite(TestTokenizer.class);
+
+        return suite;
+    }
+}

Added: jackrabbit/sandbox/jackrabbit-j3/src/test/java/org/apache/jackrabbit/j3/json/TestTokenizer.java
URL: http://svn.apache.org/viewvc/jackrabbit/sandbox/jackrabbit-j3/src/test/java/org/apache/jackrabbit/j3/json/TestTokenizer.java?rev=1081847&view=auto
==============================================================================
--- jackrabbit/sandbox/jackrabbit-j3/src/test/java/org/apache/jackrabbit/j3/json/TestTokenizer.java (added)
+++ jackrabbit/sandbox/jackrabbit-j3/src/test/java/org/apache/jackrabbit/j3/json/TestTokenizer.java Tue Mar 15 16:34:22 2011
@@ -0,0 +1,143 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jackrabbit.j3.json;
+
+import junit.framework.TestCase;
+
+/**
+ * Test the Jsop tokenizer.
+ */
+public class TestTokenizer extends TestCase {
+
+    public void test() {
+        try {
+            JsopTokenizer.jsonDecode("test\\");
+            fail();
+        } catch (IllegalArgumentException e) {
+            // ok
+        }
+        try {
+            JsopTokenizer.jsonDecode("wrong\\uxxxx");
+            fail();
+        } catch (IllegalArgumentException e) {
+            // ok
+        }
+        try {
+            JsopTokenizer.jsonDecode("wrong\\m");
+            fail();
+        } catch (IllegalArgumentException e) {
+            // ok
+        }
+        test("/* error */", "\"\\");
+        test("/* error */1", ".1");
+        assertEquals("x", new JsopTokenizer("x").toString());
+        test("/* error */", "true" + "true");
+        test("/* error */", "truer");
+        test("/* error */", "falsehood");
+        test("/* error */", "nil");
+        test("/* error */", "\"invalid");
+        test("- \"test/test\"", "-\"test\\/test\"");
+        test(" {\n\"x\": 1, \"y\": 2\n}\n", "{\"x\":1, \"y\":2}");
+        test(" [\ntrue, false, null\n]\n", "[true, false, null]");
+        test("\"-\\\\-\\\"-\\b-\\f-\\n-\\r-\\t-\\u1234\"", "\"-\\\\-\\\"-\\b-\\f-\\n-\\r-\\t-\\u1234\"");
+        test("\"-\\b-\\f-\\n-\\r-\\t\"", "\"-\b-\f-\n-\r-\t\"");
+        test(" [\n0, 12, -1, 0.1, -0.1, -2.3e1, 1e+1, 1.e-20\n]\n", "[0,12,-1,0.1,-0.1,-2.3e1,1e+1,1.e-20]");
+        test("\"Hello\"", "\"Hello\"");
+        test(" [\n\n]\n", "[]");
+        test(" {\n\n}\n", "{}");
+    }
+
+    static void test(String expected, String json) {
+        String j2 = prettyPrintWithErrors(json);
+        assertEquals(expected, j2);
+    }
+
+    static String prettyPrintWithErrors(String jsop) {
+        StringBuilder buff = new StringBuilder();
+        JsopTokenizer t = new JsopTokenizer(jsop);
+        while (true) {
+            try {
+                prettyPrint(buff, t);
+            } catch (IllegalArgumentException e) {
+                buff.append("/* error */");
+            }
+            if (t.getTokenType() == JsopTokenizer.END) {
+                return buff.toString();
+            }
+        }
+    }
+
+    static String prettyPrint(String jsop) {
+        StringBuilder buff = new StringBuilder();
+        JsopTokenizer t = new JsopTokenizer(jsop);
+        return prettyPrint(buff, t);
+    }
+
+    static String prettyPrint(StringBuilder buff, JsopTokenizer t) {
+        while (true) {
+            switch (t.read()) {
+            case JsopTokenizer.END:
+                return buff.toString();
+            case JsopTokenizer.STRING:
+                buff.append(JsopTokenizer.jsonEncode(t.getToken()));
+                break;
+            case JsopTokenizer.NUMBER:
+                buff.append(t.getToken());
+                break;
+            case JsopTokenizer.TRUE:
+                buff.append("true");
+                break;
+            case JsopTokenizer.FALSE:
+                buff.append("false");
+                break;
+            case JsopTokenizer.NULL:
+                buff.append("null");
+                break;
+            case '{':
+                buff.append(" {\n");
+                break;
+            case '}':
+                buff.append("\n}\n");
+                break;
+            case '[':
+                buff.append(" [\n");
+                break;
+            case ']':
+                buff.append("\n]\n");
+                break;
+            case ',':
+                buff.append(", ");
+                break;
+            case ':':
+                buff.append(": ");
+                break;
+            case '+':
+                buff.append("+ ");
+                break;
+            case '-':
+                buff.append("- ");
+                break;
+            case '^':
+                buff.append("^ ");
+                break;
+            default:
+                throw new IllegalArgumentException("token type: " + t.getTokenType());
+            }
+        }
+    }
+
+}