You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@olingo.apache.org by mi...@apache.org on 2015/11/30 15:15:52 UTC
olingo-odata4 git commit: [OLINGO-568] Added support for escape of
escape and quote characters
Repository: olingo-odata4
Updated Branches:
refs/heads/OLINGO-568_RewrittenGrammar 6dd0a0f3e -> e5ac59079
[OLINGO-568] Added support for escape of escape and quote characters
Project: http://git-wip-us.apache.org/repos/asf/olingo-odata4/repo
Commit: http://git-wip-us.apache.org/repos/asf/olingo-odata4/commit/e5ac5907
Tree: http://git-wip-us.apache.org/repos/asf/olingo-odata4/tree/e5ac5907
Diff: http://git-wip-us.apache.org/repos/asf/olingo-odata4/diff/e5ac5907
Branch: refs/heads/OLINGO-568_RewrittenGrammar
Commit: e5ac590794148f5195c28cb055f26b7b4b3f5027
Parents: 6dd0a0f
Author: Michael Bolz <mi...@sap.com>
Authored: Mon Nov 30 15:15:00 2015 +0100
Committer: Michael Bolz <mi...@sap.com>
Committed: Mon Nov 30 15:15:00 2015 +0100
----------------------------------------------------------------------
.../core/uri/parser/search/SearchTokenizer.java | 189 +++++++++++--------
.../uri/parser/search/SearchTokenizerTest.java | 8 +
.../core/uri/antlr/TestFullResourcePath.java | 16 +-
.../core/uri/testutil/TestUriValidator.java | 5 +
4 files changed, 137 insertions(+), 81 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/olingo-odata4/blob/e5ac5907/lib/server-core/src/main/java/org/apache/olingo/server/core/uri/parser/search/SearchTokenizer.java
----------------------------------------------------------------------
diff --git a/lib/server-core/src/main/java/org/apache/olingo/server/core/uri/parser/search/SearchTokenizer.java b/lib/server-core/src/main/java/org/apache/olingo/server/core/uri/parser/search/SearchTokenizer.java
index 5c42e6d..2146438 100644
--- a/lib/server-core/src/main/java/org/apache/olingo/server/core/uri/parser/search/SearchTokenizer.java
+++ b/lib/server-core/src/main/java/org/apache/olingo/server/core/uri/parser/search/SearchTokenizer.java
@@ -37,6 +37,13 @@ import java.util.List;
* searchWord = 1*ALPHA ; Actually: any character from the Unicode categories L or Nl,
* ; but not the words AND, OR, and NOT
* </code>
+ *
+ * <b>ATTENTION:</b> For a <code>searchPhrase</code> the percent encoding is not supported by the
+ * <code>SearchTokenizer</code>.<br/>
+ * This was a decision based on that the <code>org.apache.olingo.server.core.uri.parser.Parser</code>
+ * already handles in his <code>parseUri</code> method each query as <code>percent decoded</code> strings (see
+ * line <i>177ff</i> (<code>for (RawUri.QueryOption option : uri.queryOptionListDecoded)</code>).
+ *
*/
public class SearchTokenizer {
@@ -45,6 +52,7 @@ public class SearchTokenizer {
private boolean finished = false;
protected static final char QUOTATION_MARK = '\"';
+ protected static final char PHRASE_ESCAPE_CHAR = '\\';
protected static final char CHAR_N = 'N';
protected static final char CHAR_O = 'O';
protected static final char CHAR_T = 'T';
@@ -126,45 +134,59 @@ public class SearchTokenizer {
}
/**
- * searchPhrase = quotation-mark 1*qchar-no-AMP-DQUOTE quotation-mark
- *
- * qchar-no-AMP-DQUOTE = qchar-unescaped / escape ( escape / quotation-mark )
- *
- * qchar-unescaped = unreserved / pct-encoded-unescaped / other-delims / ":" / "@" / "/" / "?" / "$" / "'" / "="
- *
- * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
- *
- * escape = "\" / "%5C" ; reverse solidus U+005C
- *
- * pct-encoded-unescaped = "%" ( "0" / "1" / "3" / "4" / "6" / "7" / "8" / "9" / A-to-F ) HEXDIG
+ * <code>
+ * <b>searchPhrase</b> = quotation-mark 1*qchar-no-AMP-DQUOTE quotation-mark
+ * <br/><br/>
+ * <b>qchar-no-AMP-DQUOTE</b> = qchar-unescaped / escape ( escape / quotation-mark )
+ * <br/><br/>
+ * <b>qchar-unescaped</b> = unreserved / pct-encoded-unescaped / other-delims /
+ * ":" / "@" / "/" / "?" / "$" / "'" / "="
+ * <br/><br/>
+ * <b>unreserved</b> = ALPHA / DIGIT / "-" / "." / "_" / "~"
+ * <br/><br/>
+ * <b>escape</b> = "\" / "%5C" ; reverse solidus U+005C
+ * <br/><br/>
+ * <b>pct-encoded-unescaped</b> = "%" ( "0" / "1" / "3" / "4" / "6" / "7" / "8" / "9" / A-to-F ) HEXDIG
* / "%" "2" ( "0" / "1" / "3" / "4" / "5" / "6" / "7" / "8" / "9" / A-to-F )
* / "%" "5" ( DIGIT / "A" / "B" / "D" / "E" / "F" )
+ * <br/><br/>
+ * <b>other-delims</b> = "!" / "(" / ")" / "*" / "+" / "," / ";"
+ * <br/><br/>
+ * <b>quotation-mark</b> = DQUOTE / "%22"
+ * <br/><br/>
+ * <b>ALPHA</b> = %x41-5A / %x61-7A
+ * <br/>
+ * <b>DIGIT</b> = %x30-39
+ * <br/>
+ * <b>DQUOTE</b> = %x22
+ * </code>
*
- * other-delims = "!" / "(" / ")" / "*" / "+" / "," / ";"
- *
- * quotation-mark = DQUOTE / "%22"
- *
- * ALPHA = %x41-5A / %x61-7A
- * DIGIT = %x30-39
- * DQUOTE = %x22
+ * Checks if given <code>character</code> is allowed for a search phrase.
+ * <b>ATTENTION:</b> Escaping and percent encoding is not be validated here (and can not be validated on
+ * a single character).<br/>
+ * Hence for the {@link #PHRASE_ESCAPE_CHAR} and the {@link #QUOTATION_MARK} characters this method will
+ * return <code>FALSE</code>.<br/>
+ * <b>Furthermore</b> percent encoded characters are also not validated (and can not be validated on
+ * a single character).<br/>
+ * Hence for the <code>%</code> character this method will return <code>FALSE</code>.<br/>
*
* @param character which is checked
* @return true if character is allowed for a phrase
*/
static boolean isAllowedPhrase(final char character) {
// FIXME mibo: check missing
- return isQCharUnescaped(character) || isEscaped(character);
+ return isQCharUnescaped(character);// || isEscaped(character);
}
- /**
- * escape = "\" / "%5C" ; reverse solidus U+005C
- * @param character which is checked
- * @return true if character is allowed
- */
- private static boolean isEscaped(char character) {
- // TODO: mibo(151117): check how to implement
- return false;
- }
+// /**
+// * escape = "\" / "%5C" ; reverse solidus U+005C
+// * @param character which is checked
+// * @return true if character is allowed
+// */
+// private static boolean isEscaped(char character) {
+// // TODO: mibo(151130): is checked in SearchPhraseState
+// return false;
+// }
/**
* qchar-unescaped = unreserved / pct-encoded-unescaped / other-delims / ":" / "@" / "/" / "?" / "$" / "'" / "="
@@ -173,14 +195,14 @@ public class SearchTokenizer {
*/
private static boolean isQCharUnescaped(char character) {
return isUnreserved(character)
- || isPctEncodedUnescaped(character)
- || isOtherDelims(character)
- || character == ':'
- || character == '@'
- || character == '/'
- || character == '$'
- || character == '\''
- || character == '=';
+// || isPctEncodedUnescaped(character)
+ || isOtherDelims(character)
+ || character == ':'
+ || character == '@'
+ || character == '/'
+ || character == '$'
+ || character == '\''
+ || character == '=';
}
/**
@@ -190,43 +212,43 @@ public class SearchTokenizer {
*/
private static boolean isOtherDelims(char character) {
return character == '!'
- || character == '('
- || character == ')'
- || character == '*'
- || character == '+'
- || character == ','
- || character == ';';
- }
-
- /**
- * pct-encoded-unescaped = "%" ( "0" / "1" / "3" / "4" / "6" / "7" / "8" / "9" / A-to-F ) HEXDIG
- * / "%" "2" ( "0" / "1" / "3" / "4" / "5" / "6" / "7" / "8" / "9" / A-to-F )
- * / "%" "5" ( DIGIT / "A" / "B" / "D" / "E" / "F" )
- *
- * HEXDIG = DIGIT / A-to-F
- *
- * @param character which is checked
- * @return true if character is allowed
- */
- private static boolean isPctEncodedUnescaped(char character) {
- String hex = Integer.toHexString(character);
- char aschar[] = hex.toCharArray();
- if(aschar[0] == '%') {
- if(aschar[1] == '2') {
- return aschar[2] != '2' && isHexDigit(aschar[2]);
- } else if(aschar[1] == '5') {
- return aschar[2] != 'C' && isHexDigit(aschar[2]);
- } else if(isHexDigit(aschar[1])) {
- return isHexDigit(aschar[2]);
- }
- }
- return false;
- }
-
- private static boolean isHexDigit(char character) {
- return 'A' <= character && character <= 'F' // case A..F
- || '0' <= character && character <= '9'; // case 0..9
- }
+ || character == '('
+ || character == ')'
+ || character == '*'
+ || character == '+'
+ || character == ','
+ || character == ';';
+ }
+
+// /**
+// * pct-encoded-unescaped = "%" ( "0" / "1" / "3" / "4" / "6" / "7" / "8" / "9" / A-to-F ) HEXDIG
+// * / "%" "2" ( "0" / "1" / "3" / "4" / "5" / "6" / "7" / "8" / "9" / A-to-F )
+// * / "%" "5" ( DIGIT / "A" / "B" / "D" / "E" / "F" )
+// *
+// * HEXDIG = DIGIT / A-to-F
+// *
+// * @param character which is checked
+// * @return true if character is allowed
+// */
+// private static boolean isPctEncodedUnescaped(char character) {
+// String hex = Integer.toHexString(character);
+// char aschar[] = hex.toCharArray();
+// if(aschar[0] == '%') {
+// if(aschar[1] == '2') {
+// return aschar[2] != '2' && isHexDigit(aschar[2]);
+// } else if(aschar[1] == '5') {
+// return aschar[2] != 'C' && isHexDigit(aschar[2]);
+// } else if(isHexDigit(aschar[1])) {
+// return isHexDigit(aschar[2]);
+// }
+// }
+// return false;
+// }
+
+// private static boolean isHexDigit(char character) {
+// return 'A' <= character && character <= 'F' // case A..F
+// || '0' <= character && character <= '9'; // case 0..9
+// }
/**
* unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
@@ -235,10 +257,10 @@ public class SearchTokenizer {
*/
private static boolean isUnreserved(char character) {
return isAlphaOrDigit(character)
- || character == '-'
- || character == '.'
- || character == '_'
- || character == '~';
+ || character == '-'
+ || character == '.'
+ || character == '_'
+ || character == '~';
}
/**
@@ -256,8 +278,6 @@ public class SearchTokenizer {
// BWS = *( SP / HTAB / "%20" / "%09" ) ; "bad" whitespace
// RWS = 1*( SP / HTAB / "%20" / "%09" ) ; "required" whitespace
static boolean isWhitespace(final char character) {
- // ( SP / HTAB / "%20" / "%09" )
- // TODO mibo: add missing whitespaces
return character == ' ' || character == '\t';
}
@@ -400,6 +420,7 @@ public class SearchTokenizer {
private class SearchPhraseState extends LiteralState {
private boolean closed = false;
+ private boolean escaped = false;
public SearchPhraseState(char c) throws SearchTokenizerException {
super(Token.PHRASE, c);
if (c != QUOTATION_MARK) {
@@ -416,6 +437,16 @@ public class SearchTokenizer {
} else if (isWhitespace(c)) {
return new RwsState();
}
+ } else if(escaped) {
+ escaped = false;
+ if(c == QUOTATION_MARK || c == PHRASE_ESCAPE_CHAR) {
+ return allowed(c);
+ } else {
+ return forbidden(c);
+ }
+ } else if(c == PHRASE_ESCAPE_CHAR) {
+ escaped = true;
+ return this;
} else if (isAllowedPhrase(c)) {
return allowed(c);
} else if (isWhitespace(c)) {
http://git-wip-us.apache.org/repos/asf/olingo-odata4/blob/e5ac5907/lib/server-core/src/test/java/org/apache/olingo/server/core/uri/parser/search/SearchTokenizerTest.java
----------------------------------------------------------------------
diff --git a/lib/server-core/src/test/java/org/apache/olingo/server/core/uri/parser/search/SearchTokenizerTest.java b/lib/server-core/src/test/java/org/apache/olingo/server/core/uri/parser/search/SearchTokenizerTest.java
index 2340f37..46c9290 100644
--- a/lib/server-core/src/test/java/org/apache/olingo/server/core/uri/parser/search/SearchTokenizerTest.java
+++ b/lib/server-core/src/test/java/org/apache/olingo/server/core/uri/parser/search/SearchTokenizerTest.java
@@ -250,6 +250,14 @@ public class SearchTokenizerTest {
@Test
public void characterInPhrase() throws Exception {
assertQuery("\"123\" OR \"ALPHA-._~\"").resultsIn(PHRASE, OR, PHRASE);
+ //escaped characters
+ assertQuery("\"\\\"123\" OR \"\\\\abc\"").resultsIn(new Validator.Tuple(PHRASE, "\"\"123\""),
+ new Validator.Tuple(OR), new Validator.Tuple(PHRASE, "\"\\abc\""));
+ assertQuery("\"\\\"1\\\\23\"").resultsIn(new Validator.Tuple(PHRASE, "\"\"1\\23\""));
+ // exceptions
+ assertQuery("\"\\\"1\\\\").resultsIn(SearchTokenizerException.MessageKeys.INVALID_TOKEN_STATE);
+ assertQuery("\"1\\\"").resultsIn(SearchTokenizerException.MessageKeys.INVALID_TOKEN_STATE);
+ assertQuery("\"1\\23\"").resultsIn(SearchTokenizerException.MessageKeys.FORBIDDEN_CHARACTER);
}
@Test
http://git-wip-us.apache.org/repos/asf/olingo-odata4/blob/e5ac5907/lib/server-test/src/test/java/org/apache/olingo/server/core/uri/antlr/TestFullResourcePath.java
----------------------------------------------------------------------
diff --git a/lib/server-test/src/test/java/org/apache/olingo/server/core/uri/antlr/TestFullResourcePath.java b/lib/server-test/src/test/java/org/apache/olingo/server/core/uri/antlr/TestFullResourcePath.java
index 9f66d66..3c02003 100644
--- a/lib/server-test/src/test/java/org/apache/olingo/server/core/uri/antlr/TestFullResourcePath.java
+++ b/lib/server-test/src/test/java/org/apache/olingo/server/core/uri/antlr/TestFullResourcePath.java
@@ -45,6 +45,7 @@ import org.apache.olingo.server.core.uri.parser.UriParserException;
import org.apache.olingo.server.core.uri.parser.UriParserSemanticException;
import org.apache.olingo.server.core.uri.parser.UriParserSemanticException.MessageKeys;
import org.apache.olingo.server.core.uri.parser.UriParserSyntaxException;
+import org.apache.olingo.server.core.uri.parser.search.SearchParserException;
import org.apache.olingo.server.core.uri.testutil.FilterValidator;
import org.apache.olingo.server.core.uri.testutil.TestUriValidator;
import org.apache.olingo.server.core.uri.validator.UriValidationException;
@@ -5428,9 +5429,7 @@ public class TestFullResourcePath {
}
@Test
- @Ignore("$search currently not implemented")
public void testSearch() throws Exception {
-
testUri.run("ESTwoKeyNav", "$search=abc");
testUri.run("ESTwoKeyNav", "$search=NOT abc");
@@ -5462,6 +5461,19 @@ public class TestFullResourcePath {
testUri.run("ESTwoKeyNav", "$search=(abc AND def) ghi ");
testUri.run("ESTwoKeyNav", "$search=abc AND (def OR ghi)");
testUri.run("ESTwoKeyNav", "$search=abc AND (def ghi)");
+
+ // escaped characters
+ testUri.run("ESTwoKeyNav", "$search=\"abc\"");
+ testUri.run("ESTwoKeyNav", "$search=\"a\\\"bc\"");
+ testUri.run("ESTwoKeyNav", "$search=%22abc%22");
+ testUri.run("ESTwoKeyNav", "$search=%22a%5C%22bc%22");
+ testUri.run("ESTwoKeyNav", "$search=%22a%5C%5Cbc%22");
+
+ // wrong escaped characters
+ testUri.runEx("ESTwoKeyNav", "$search=%22a%22bc%22")
+ .isExceptionMessage(SearchParserException.MessageKeys.TOKENIZER_EXCEPTION);
+ testUri.runEx("ESTwoKeyNav", "$search=%22a%5Cbc%22")
+ .isExceptionMessage(SearchParserException.MessageKeys.TOKENIZER_EXCEPTION);
}
@Test
http://git-wip-us.apache.org/repos/asf/olingo-odata4/blob/e5ac5907/lib/server-test/src/test/java/org/apache/olingo/server/core/uri/testutil/TestUriValidator.java
----------------------------------------------------------------------
diff --git a/lib/server-test/src/test/java/org/apache/olingo/server/core/uri/testutil/TestUriValidator.java b/lib/server-test/src/test/java/org/apache/olingo/server/core/uri/testutil/TestUriValidator.java
index 6a2e5b4..0d5fb4a 100644
--- a/lib/server-test/src/test/java/org/apache/olingo/server/core/uri/testutil/TestUriValidator.java
+++ b/lib/server-test/src/test/java/org/apache/olingo/server/core/uri/testutil/TestUriValidator.java
@@ -176,6 +176,11 @@ public class TestUriValidator implements TestValidator {
}
}
+ public TestUriValidator isExceptionMessage(final ODataLibraryException.MessageKey messageKey) {
+ assertEquals(messageKey, exception.getMessageKey());
+ return this;
+ }
+
public TestUriValidator isExSyntax(final UriParserSyntaxException.MessageKeys messageKey) {
assertEquals(UriParserSyntaxException.class, exception.getClass());
assertEquals(messageKey, exception.getMessageKey());