You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jackrabbit.apache.org by th...@apache.org on 2011/06/08 11:22:30 UTC
svn commit: r1133299 - in
/jackrabbit/sandbox/microkernel/src/main/java/org/apache/jackrabbit/mk/mem:
./ JsopBuilder.java JsopTokenizer.java
Author: thomasm
Date: Wed Jun 8 09:22:30 2011
New Revision: 1133299
URL: http://svn.apache.org/viewvc?rev=1133299&view=rev
Log:
JSOP parser (from jackrabbit-j3) and builder.
Added:
jackrabbit/sandbox/microkernel/src/main/java/org/apache/jackrabbit/mk/mem/
jackrabbit/sandbox/microkernel/src/main/java/org/apache/jackrabbit/mk/mem/JsopBuilder.java
jackrabbit/sandbox/microkernel/src/main/java/org/apache/jackrabbit/mk/mem/JsopTokenizer.java
Added: jackrabbit/sandbox/microkernel/src/main/java/org/apache/jackrabbit/mk/mem/JsopBuilder.java
URL: http://svn.apache.org/viewvc/jackrabbit/sandbox/microkernel/src/main/java/org/apache/jackrabbit/mk/mem/JsopBuilder.java?rev=1133299&view=auto
==============================================================================
--- jackrabbit/sandbox/microkernel/src/main/java/org/apache/jackrabbit/mk/mem/JsopBuilder.java (added)
+++ jackrabbit/sandbox/microkernel/src/main/java/org/apache/jackrabbit/mk/mem/JsopBuilder.java Wed Jun 8 09:22:30 2011
@@ -0,0 +1,51 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jackrabbit.mk.mem;
+
+import java.util.BitSet;
+
+public class JsopBuilder {
+
+ private StringBuilder buff = new StringBuilder();
+ private BitSet commas = new BitSet();
+ private int level;
+
+ void beginObject() {
+ buff.append('{');
+ commas.clear(level++);
+ }
+
+ void endObject() {
+ buff.append("\n}");
+ commas.clear(level--);
+ }
+
+ void addProperty(String propertyName, String encodedValue) {
+ if (commas.get(level)) {
+ buff.append(',');
+ } else {
+ commas.set(level);
+ }
+ buff.append('\n');
+ buff.append(JsopTokenizer.encode(propertyName)).append(':').append(encodedValue);
+ }
+
+ public String toString() {
+ return buff.toString();
+ }
+
+}
Added: jackrabbit/sandbox/microkernel/src/main/java/org/apache/jackrabbit/mk/mem/JsopTokenizer.java
URL: http://svn.apache.org/viewvc/jackrabbit/sandbox/microkernel/src/main/java/org/apache/jackrabbit/mk/mem/JsopTokenizer.java?rev=1133299&view=auto
==============================================================================
--- jackrabbit/sandbox/microkernel/src/main/java/org/apache/jackrabbit/mk/mem/JsopTokenizer.java (added)
+++ jackrabbit/sandbox/microkernel/src/main/java/org/apache/jackrabbit/mk/mem/JsopTokenizer.java Wed Jun 8 09:22:30 2011
@@ -0,0 +1,419 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jackrabbit.mk.mem;
+
+/**
+ * A tokenizer for Json and Jsop strings.
+ */
+public class JsopTokenizer {
+
+ // "- " is "patch remove", without space is a negative number
+ // please note .0 and -.1 are not valid numbers
+
+ final static int STRING = 0, NUMBER = 1, TRUE = 2, FALSE = 3, NULL = 4;
+ final static int COMMENT = 5, ERROR = 6, END = 7;
+
+ private static final String[] TYPE = {
+ "string", "number", "true", "false", "null", "error", "end"
+ };
+
+ private final String jsop;
+ private int pos;
+ private int currentType;
+ private String currentToken;
+ private int lastType;
+ private String lastToken;
+
+ public JsopTokenizer(String json) {
+ this.jsop = json;
+ read();
+ }
+
+ public String toString() {
+ int todo;
+// version as comment
+// newlines in strings -> error
+// project source code format
+
+ return jsop;
+ }
+
+ /**
+ * Get the token type of the last token.
+ *
+ * @return the token type
+ */
+ public int getTokenType() {
+ return lastType;
+ }
+
+ /**
+ * Get the last token value if the the token type was STRING or NUMBER. For
+ * STRING, the text is decoded; for NUMBER, it is returned as parsed. In all
+ * other cases the result is undefined.
+ *
+ * @return the token
+ */
+ public String getToken() {
+ return lastToken;
+ }
+
+ /**
+ * Read a token which must match a given token type.
+ *
+ * @param type the token type
+ * @throws IllegalStateException if the token type doesn't match
+ */
+ public String read(int type) {
+ String token = currentToken;
+ if (matches(type)) {
+ return token;
+ }
+ throw getFormatException(jsop, pos, getTokenType(type));
+ }
+
+ /**
+ * Read a string.
+ *
+ * @throws IllegalStateException if the token type doesn't match
+ */
+ public String readString() {
+ return read(STRING);
+ }
+
+ /**
+ * Read a token which must match a given token type.
+ *
+ * @param type the token type
+ * @return true if there was a match
+ */
+ public boolean matches(int type) {
+ if (currentType == type) {
+ read();
+ return true;
+ }
+ return false;
+ }
+
+ /**
+ * Read a token and return the token type.
+ *
+ * @return the token type
+ */
+ public int read() {
+ lastType = currentType;
+ lastToken = currentToken;
+ try {
+ currentType = readToken();
+ } catch (IllegalArgumentException e) {
+ currentType = ERROR;
+ currentToken = e.getMessage();
+ } catch (StringIndexOutOfBoundsException e) {
+ currentType = ERROR;
+ currentToken = addAsterisk(jsop, pos);
+ }
+ return lastType;
+ }
+
+ private int readToken() {
+ char ch;
+ while (true) {
+ if (pos >= jsop.length()) {
+ return END;
+ }
+ ch = jsop.charAt(pos);
+ if (ch > ' ') {
+ break;
+ }
+ pos++;
+ }
+ int start = pos++;
+ switch (ch) {
+ case '\"': {
+ boolean escaped = false;
+ while (true) {
+ ch = jsop.charAt(pos++);
+ if (ch == '\"') {
+ break;
+ } else if (ch == '\\') {
+ escaped = true;
+ pos++;
+ }
+ }
+ String s = jsop.substring(start + 1, pos - 1);
+ if (escaped) {
+ currentToken = decode(s);
+ } else {
+ currentToken = s;
+ }
+ return STRING;
+ }
+ case '{':
+ case '}':
+ case '[':
+ case ']':
+ case '+':
+ case ':':
+ case ',':
+ case '>':
+ case '^':
+ return ch;
+ case '/': {
+ ch = jsop.charAt(pos);
+ if (ch != '*') {
+ return '/';
+ }
+ pos++;
+ while (true) {
+ ch = jsop.charAt(pos++);
+ if (ch == '*' && jsop.charAt(pos) == '/') {
+ break;
+ }
+ }
+ currentToken = jsop.substring(start + 2, pos - 1);
+ pos += 2;
+ return COMMENT;
+ }
+ case '-':
+ ch = jsop.charAt(pos);
+ if (ch < '0' || ch > '9') {
+ // lookahead
+ return '-';
+ }
+ // else fall though
+ default:
+ if (ch >= '0' && ch <= '9') {
+ while (pos < jsop.length()) {
+ ch = jsop.charAt(pos);
+ if (ch < '0' || ch > '9') {
+ break;
+ }
+ pos++;
+ }
+ if (ch == '.') {
+ pos++;
+ while (pos < jsop.length()) {
+ ch = jsop.charAt(pos);
+ if (ch < '0' || ch > '9') {
+ break;
+ }
+ pos++;
+ }
+ }
+ if (ch == 'e' || ch == 'E') {
+ ch = jsop.charAt(++pos);
+ if (ch == '+' || ch == '-') {
+ ch = jsop.charAt(++pos);
+ }
+ while (pos < jsop.length()) {
+ ch = jsop.charAt(pos);
+ if (ch < '0' || ch > '9') {
+ break;
+ }
+ pos++;
+ }
+ }
+ currentToken = jsop.substring(start, pos);
+ return NUMBER;
+ } else if (ch >= 'a' && ch <= 'z') {
+ while (pos < jsop.length()) {
+ ch = jsop.charAt(pos);
+ if (ch < 'a' || ch > 'z') {
+ break;
+ }
+ pos++;
+ }
+ String s = jsop.substring(start, pos);
+ if ("null".equals(s)) {
+ return NULL;
+ } else if ("true".equals(s)) {
+ return TRUE;
+ } else if ("false".equals(s)) {
+ return FALSE;
+ } else {
+ // currentToken = s;
+ // return STRING;
+ throw getFormatException(jsop, pos);
+ }
+ }
+ throw getFormatException(jsop, pos);
+ }
+ }
+
+ /**
+ * Decode a Json string.
+ *
+ * @param s the encoded string
+ * @return the string
+ */
+ public static String decode(String s) {
+ int length = s.length();
+ StringBuilder buff = new StringBuilder(length);
+ for (int i = 0; i < length; i++) {
+ char c = s.charAt(i);
+ if (c == '\\') {
+ if (i + 1 >= s.length()) {
+ throw getFormatException(s, i);
+ }
+ c = s.charAt(++i);
+ switch (c) {
+ case '"':
+ buff.append('"');
+ break;
+ case '\\':
+ buff.append('\\');
+ break;
+ case '/':
+ buff.append('/');
+ break;
+ case 'b':
+ buff.append('\b');
+ break;
+ case 'f':
+ buff.append('\f');
+ break;
+ case 'n':
+ buff.append('\n');
+ break;
+ case 'r':
+ buff.append('\r');
+ break;
+ case 't':
+ buff.append('\t');
+ break;
+ case 'u': {
+ try {
+ c = (char) (Integer.parseInt(s.substring(i + 1, i + 5), 16));
+ } catch (NumberFormatException e) {
+ throw getFormatException(s, i);
+ }
+ i += 4;
+ buff.append(c);
+ break;
+ }
+ default:
+ throw getFormatException(s, i);
+ }
+ } else {
+ buff.append(c);
+ }
+ }
+ return buff.toString();
+ }
+
+ /**
+ * Convert a string to a Json literal using the correct escape sequences.
+ * The literal is enclosed in double quotes. Characters outside the range
+ * 32..127 are encoded (backslash u xxxx). The forward slash (solidus) is
+ * not escaped.
+ *
+ * @param s the text to convert
+ * @return the Json representation (including double quotes)
+ */
+ public static String encode(String s) {
+ int length = s.length();
+ if (length == 0) {
+ return "\"\"";
+ }
+ StringBuilder buff = new StringBuilder(length + 2);
+ buff.append('\"');
+ for (int i = 0; i < length; i++) {
+ char c = s.charAt(i);
+ switch (c) {
+ case '"':
+ // quotation mark
+ buff.append("\\\"");
+ break;
+ case '\\':
+ // backslash
+ buff.append("\\\\");
+ break;
+ case '\b':
+ // backspace
+ buff.append("\\b");
+ break;
+ case '\f':
+ // formfeed
+ buff.append("\\f");
+ break;
+ case '\n':
+ // newline
+ buff.append("\\n");
+ break;
+ case '\r':
+ // carriage return
+ buff.append("\\r");
+ break;
+ case '\t':
+ // horizontal tab
+ buff.append("\\t");
+ break;
+ default:
+ int ch = c;
+ if (ch < ' ') {
+ // guaranteed to be 1 or 2 hex digits only
+ buff.append("\\u00");
+ String hex = Integer.toHexString(c);
+ if (hex.length() == 1) {
+ buff.append('0');
+ }
+ buff.append(hex);
+ } else if (ch > 127) {
+ // ascii only mode
+ buff.append("\\u");
+ String hex = Integer.toHexString(c);
+ for (int len = hex.length(); len < 4; len++) {
+ buff.append('0');
+ }
+ buff.append(hex);
+ } else {
+ buff.append(c);
+ }
+ }
+ }
+ return buff.append('\"').toString();
+ }
+
+ private static String getTokenType(int type) {
+ return type <= END ? TYPE[type] : "'" + (char) type + "'";
+ }
+
+ private static IllegalArgumentException getFormatException(String s, int i, String expected) {
+ return new IllegalArgumentException(addAsterisk(s, i) + " expected: " + expected);
+ }
+
+ private static IllegalArgumentException getFormatException(String s, int i) {
+ return new IllegalArgumentException(addAsterisk(s, i));
+ }
+
+ /**
+ * Add an asterisk ('[*]') at the given position. This format is used to
+ * show where parsing failed in a statement.
+ *
+ * @param s the text
+ * @param index the position
+ * @return the text with asterisk
+ */
+ private static String addAsterisk(String s, int index) {
+ if (s != null) {
+ index = Math.min(index, s.length());
+ s = s.substring(0, index) + "[*]" + s.substring(index);
+ }
+ return s;
+ }
+
+}