You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jackrabbit.apache.org by th...@apache.org on 2011/06/08 11:22:30 UTC

svn commit: r1133299 - in /jackrabbit/sandbox/microkernel/src/main/java/org/apache/jackrabbit/mk/mem: ./ JsopBuilder.java JsopTokenizer.java

Author: thomasm
Date: Wed Jun  8 09:22:30 2011
New Revision: 1133299

URL: http://svn.apache.org/viewvc?rev=1133299&view=rev
Log:
JSOP parser (from jackrabbit-j3) and builder.

Added:
    jackrabbit/sandbox/microkernel/src/main/java/org/apache/jackrabbit/mk/mem/
    jackrabbit/sandbox/microkernel/src/main/java/org/apache/jackrabbit/mk/mem/JsopBuilder.java
    jackrabbit/sandbox/microkernel/src/main/java/org/apache/jackrabbit/mk/mem/JsopTokenizer.java

Added: jackrabbit/sandbox/microkernel/src/main/java/org/apache/jackrabbit/mk/mem/JsopBuilder.java
URL: http://svn.apache.org/viewvc/jackrabbit/sandbox/microkernel/src/main/java/org/apache/jackrabbit/mk/mem/JsopBuilder.java?rev=1133299&view=auto
==============================================================================
--- jackrabbit/sandbox/microkernel/src/main/java/org/apache/jackrabbit/mk/mem/JsopBuilder.java (added)
+++ jackrabbit/sandbox/microkernel/src/main/java/org/apache/jackrabbit/mk/mem/JsopBuilder.java Wed Jun  8 09:22:30 2011
@@ -0,0 +1,51 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jackrabbit.mk.mem;
+
+import java.util.BitSet;
+
+public class JsopBuilder {
+
+    private StringBuilder buff = new StringBuilder();
+    private BitSet commas = new BitSet();
+    private int level;
+
+    void beginObject() {
+        buff.append('{');
+        commas.clear(level++);
+    }
+
+    void endObject() {
+        buff.append("\n}");
+        commas.clear(level--);
+    }
+
+    void addProperty(String propertyName, String encodedValue) {
+        if (commas.get(level)) {
+            buff.append(',');
+        } else {
+            commas.set(level);
+        }
+        buff.append('\n');
+        buff.append(JsopTokenizer.encode(propertyName)).append(':').append(encodedValue);
+    }
+
+    public String toString() {
+        return buff.toString();
+    }
+
+}

Added: jackrabbit/sandbox/microkernel/src/main/java/org/apache/jackrabbit/mk/mem/JsopTokenizer.java
URL: http://svn.apache.org/viewvc/jackrabbit/sandbox/microkernel/src/main/java/org/apache/jackrabbit/mk/mem/JsopTokenizer.java?rev=1133299&view=auto
==============================================================================
--- jackrabbit/sandbox/microkernel/src/main/java/org/apache/jackrabbit/mk/mem/JsopTokenizer.java (added)
+++ jackrabbit/sandbox/microkernel/src/main/java/org/apache/jackrabbit/mk/mem/JsopTokenizer.java Wed Jun  8 09:22:30 2011
@@ -0,0 +1,419 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jackrabbit.mk.mem;
+
+/**
+ * A tokenizer for Json and Jsop strings.
+ */
+public class JsopTokenizer {
+
+    // "- " is "patch remove", without space is a negative number
+    // please note .0 and -.1 are not valid numbers
+
+    final static int STRING = 0, NUMBER = 1, TRUE = 2, FALSE = 3, NULL = 4;
+    final static int COMMENT = 5, ERROR = 6, END = 7;
+
+    private static final String[] TYPE = {
+        "string", "number", "true", "false", "null", "error", "end"
+    };
+
+    private final String jsop;
+    private int pos;
+    private int currentType;
+    private String currentToken;
+    private int lastType;
+    private String lastToken;
+
+    public JsopTokenizer(String json) {
+        this.jsop = json;
+        read();
+    }
+
+    public String toString() {
+        int todo;
+//      version as comment
+//      newlines in strings -> error
+//      project source code format
+
+        return jsop;
+    }
+
+    /**
+     * Get the token type of the last token.
+     *
+     * @return the token type
+     */
+    public int getTokenType() {
+        return lastType;
+    }
+
+    /**
+     * Get the last token value if the the token type was STRING or NUMBER. For
+     * STRING, the text is decoded; for NUMBER, it is returned as parsed. In all
+     * other cases the result is undefined.
+     *
+     * @return the token
+     */
+    public String getToken() {
+        return lastToken;
+    }
+
+    /**
+     * Read a token which must match a given token type.
+     *
+     * @param type the token type
+     * @throws IllegalStateException if the token type doesn't match
+     */
+    public String read(int type) {
+        String token = currentToken;
+        if (matches(type)) {
+            return token;
+        }
+        throw getFormatException(jsop, pos, getTokenType(type));
+    }
+
+    /**
+     * Read a string.
+     *
+     * @throws IllegalStateException if the token type doesn't match
+     */
+    public String readString() {
+        return read(STRING);
+    }
+
+    /**
+     * Read a token which must match a given token type.
+     *
+     * @param type the token type
+     * @return true if there was a match
+     */
+    public boolean matches(int type) {
+        if (currentType == type) {
+            read();
+            return true;
+        }
+        return false;
+    }
+
+    /**
+     * Read a token and return the token type.
+     *
+     * @return the token type
+     */
+    public int read() {
+        lastType = currentType;
+        lastToken = currentToken;
+        try {
+            currentType = readToken();
+        } catch (IllegalArgumentException e) {
+            currentType = ERROR;
+            currentToken = e.getMessage();
+        } catch (StringIndexOutOfBoundsException e) {
+            currentType = ERROR;
+            currentToken = addAsterisk(jsop, pos);
+        }
+        return lastType;
+    }
+
+    private int readToken() {
+        char ch;
+        while (true) {
+            if (pos >= jsop.length()) {
+                return END;
+            }
+            ch = jsop.charAt(pos);
+            if (ch > ' ') {
+                break;
+            }
+            pos++;
+        }
+        int start = pos++;
+        switch (ch) {
+        case '\"': {
+            boolean escaped = false;
+            while (true) {
+                ch = jsop.charAt(pos++);
+                if (ch == '\"') {
+                    break;
+                } else if (ch == '\\') {
+                    escaped = true;
+                    pos++;
+                }
+            }
+            String s = jsop.substring(start + 1, pos - 1);
+            if (escaped) {
+                currentToken = decode(s);
+            } else {
+                currentToken = s;
+            }
+            return STRING;
+        }
+        case '{':
+        case '}':
+        case '[':
+        case ']':
+        case '+':
+        case ':':
+        case ',':
+        case '>':
+        case '^':
+            return ch;
+        case '/': {
+            ch = jsop.charAt(pos);
+            if (ch != '*') {
+                return '/';
+            }
+            pos++;
+            while (true) {
+                ch = jsop.charAt(pos++);
+                if (ch == '*' && jsop.charAt(pos) == '/') {
+                    break;
+                }
+            }
+            currentToken = jsop.substring(start + 2, pos - 1);
+            pos += 2;
+            return COMMENT;
+        }
+        case '-':
+            ch = jsop.charAt(pos);
+            if (ch < '0' || ch > '9') {
+                // lookahead
+                return '-';
+            }
+            // else fall though
+        default:
+            if (ch >= '0' && ch <= '9') {
+                while (pos < jsop.length()) {
+                    ch = jsop.charAt(pos);
+                    if (ch < '0' || ch > '9') {
+                        break;
+                    }
+                    pos++;
+                }
+                if (ch == '.') {
+                    pos++;
+                    while (pos < jsop.length()) {
+                        ch = jsop.charAt(pos);
+                        if (ch < '0' || ch > '9') {
+                            break;
+                        }
+                        pos++;
+                    }
+                }
+                if (ch == 'e' || ch == 'E') {
+                    ch = jsop.charAt(++pos);
+                    if (ch == '+' || ch == '-') {
+                        ch = jsop.charAt(++pos);
+                    }
+                    while (pos < jsop.length()) {
+                        ch = jsop.charAt(pos);
+                        if (ch < '0' || ch > '9') {
+                            break;
+                        }
+                        pos++;
+                    }
+                }
+                currentToken = jsop.substring(start, pos);
+                return NUMBER;
+            } else if (ch >= 'a' && ch <= 'z') {
+                while (pos < jsop.length()) {
+                    ch = jsop.charAt(pos);
+                    if (ch < 'a' || ch > 'z') {
+                        break;
+                    }
+                    pos++;
+                }
+                String s = jsop.substring(start, pos);
+                if ("null".equals(s)) {
+                    return NULL;
+                } else if ("true".equals(s)) {
+                    return TRUE;
+                } else if ("false".equals(s)) {
+                    return FALSE;
+                } else {
+                    // currentToken = s;
+                    // return STRING;
+                    throw getFormatException(jsop, pos);
+                }
+            }
+            throw getFormatException(jsop, pos);
+        }
+    }
+
+    /**
+     * Decode a Json string.
+     *
+     * @param s the encoded string
+     * @return the string
+     */
+    public static String decode(String s) {
+        int length = s.length();
+        StringBuilder buff = new StringBuilder(length);
+        for (int i = 0; i < length; i++) {
+            char c = s.charAt(i);
+            if (c == '\\') {
+                if (i + 1 >= s.length()) {
+                    throw getFormatException(s, i);
+                }
+                c = s.charAt(++i);
+                switch (c) {
+                case '"':
+                    buff.append('"');
+                    break;
+                case '\\':
+                    buff.append('\\');
+                    break;
+                case '/':
+                    buff.append('/');
+                    break;
+                case 'b':
+                    buff.append('\b');
+                    break;
+                case 'f':
+                    buff.append('\f');
+                    break;
+                case 'n':
+                    buff.append('\n');
+                    break;
+                case 'r':
+                    buff.append('\r');
+                    break;
+                case 't':
+                    buff.append('\t');
+                    break;
+                case 'u': {
+                    try {
+                        c = (char) (Integer.parseInt(s.substring(i + 1, i + 5), 16));
+                    } catch (NumberFormatException e) {
+                        throw getFormatException(s, i);
+                    }
+                    i += 4;
+                    buff.append(c);
+                    break;
+                }
+                default:
+                    throw getFormatException(s, i);
+                }
+            } else {
+                buff.append(c);
+            }
+        }
+        return buff.toString();
+    }
+
+    /**
+     * Convert a string to a Json literal using the correct escape sequences.
+     * The literal is enclosed in double quotes. Characters outside the range
+     * 32..127 are encoded (backslash u xxxx). The forward slash (solidus) is
+     * not escaped.
+     *
+     * @param s the text to convert
+     * @return the Json representation (including double quotes)
+     */
+    public static String encode(String s) {
+        int length = s.length();
+        if (length == 0) {
+            return "\"\"";
+        }
+        StringBuilder buff = new StringBuilder(length + 2);
+        buff.append('\"');
+        for (int i = 0; i < length; i++) {
+            char c = s.charAt(i);
+            switch (c) {
+            case '"':
+                // quotation mark
+                buff.append("\\\"");
+                break;
+            case '\\':
+                // backslash
+                buff.append("\\\\");
+                break;
+            case '\b':
+                // backspace
+                buff.append("\\b");
+                break;
+            case '\f':
+                // formfeed
+                buff.append("\\f");
+                break;
+            case '\n':
+                // newline
+                buff.append("\\n");
+                break;
+            case '\r':
+                // carriage return
+                buff.append("\\r");
+                break;
+            case '\t':
+                // horizontal tab
+                buff.append("\\t");
+                break;
+            default:
+                int ch = c;
+                if (ch < ' ') {
+                    // guaranteed to be 1 or 2 hex digits only
+                    buff.append("\\u00");
+                    String hex = Integer.toHexString(c);
+                    if (hex.length() == 1) {
+                        buff.append('0');
+                    }
+                    buff.append(hex);
+                } else if (ch > 127) {
+                    // ascii only mode
+                    buff.append("\\u");
+                    String hex = Integer.toHexString(c);
+                    for (int len = hex.length(); len < 4; len++) {
+                        buff.append('0');
+                    }
+                    buff.append(hex);
+                } else {
+                    buff.append(c);
+                }
+            }
+        }
+        return buff.append('\"').toString();
+    }
+
+    private static String getTokenType(int type) {
+        return type <= END ? TYPE[type] : "'" + (char) type + "'";
+    }
+
+    private static IllegalArgumentException getFormatException(String s, int i, String expected) {
+        return new IllegalArgumentException(addAsterisk(s, i) + " expected: " + expected);
+    }
+
+    private static IllegalArgumentException getFormatException(String s, int i) {
+        return new IllegalArgumentException(addAsterisk(s, i));
+    }
+
+    /**
+     * Add an asterisk ('[*]') at the given position. This format is used to
+     * show where parsing failed in a statement.
+     *
+     * @param s the text
+     * @param index the position
+     * @return the text with asterisk
+     */
+    private static String addAsterisk(String s, int index) {
+        if (s != null) {
+            index = Math.min(index, s.length());
+            s = s.substring(0, index) + "[*]" + s.substring(index);
+        }
+        return s;
+    }
+
+}