You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@olingo.apache.org by mi...@apache.org on 2015/11/17 16:04:21 UTC

[11/23] olingo-odata4 git commit: [OLINGO-568] Removed implicit and and added unicode for words

[OLINGO-568] Removed implicit and and added unicode for words


Project: http://git-wip-us.apache.org/repos/asf/olingo-odata4/repo
Commit: http://git-wip-us.apache.org/repos/asf/olingo-odata4/commit/ba5220ab
Tree: http://git-wip-us.apache.org/repos/asf/olingo-odata4/tree/ba5220ab
Diff: http://git-wip-us.apache.org/repos/asf/olingo-odata4/diff/ba5220ab

Branch: refs/heads/master
Commit: ba5220ab4a74d0c5aaff9fe1b72632e2a8bc8778
Parents: 37c5827
Author: mibo <mi...@apache.org>
Authored: Wed Nov 11 20:36:51 2015 +0100
Committer: mibo <mi...@apache.org>
Committed: Wed Nov 11 20:58:11 2015 +0100

----------------------------------------------------------------------
 .../core/uri/parser/search/SearchTokenizer.java | 169 +++++++++++++------
 .../uri/parser/search/SearchTokenizerTest.java  | 168 ++++++++++--------
 2 files changed, 212 insertions(+), 125 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/olingo-odata4/blob/ba5220ab/lib/server-core/src/main/java/org/apache/olingo/server/core/uri/parser/search/SearchTokenizer.java
----------------------------------------------------------------------
diff --git a/lib/server-core/src/main/java/org/apache/olingo/server/core/uri/parser/search/SearchTokenizer.java b/lib/server-core/src/main/java/org/apache/olingo/server/core/uri/parser/search/SearchTokenizer.java
index 1ec4df1..9288981 100644
--- a/lib/server-core/src/main/java/org/apache/olingo/server/core/uri/parser/search/SearchTokenizer.java
+++ b/lib/server-core/src/main/java/org/apache/olingo/server/core/uri/parser/search/SearchTokenizer.java
@@ -73,11 +73,6 @@ public class SearchTokenizer {
       return this;
     }
 
-    public State finish(Token token) {
-      this.token = token;
-      return finish();
-    }
-
     public boolean isFinished() {
       return finished;
     }
@@ -90,23 +85,40 @@ public class SearchTokenizer {
       return this;
     }
 
-    static boolean isAllowedChar(final char character) {
+    static boolean isAllowedWord(final char character) {
       // TODO mibo: add missing allowed characters
-      return CHAR_A <= character && character <= 'Z' // case A..Z
-          || 'a' <= character && character <= 'z' // case a..z
-          || '0' <= character && character <= '9'; // case 0..9
+      int type = Character.getType(character);
+      return (type == Character.LETTER_NUMBER
+          || type == Character.LOWERCASE_LETTER
+          || type == Character.MODIFIER_LETTER
+          || type == Character.OTHER_LETTER
+          || type == Character.TITLECASE_LETTER
+          || type == Character.UPPERCASE_LETTER);
     }
 
     /**
-     * qchar-no-AMP-DQUOTE   = qchar-unescaped / escape ( escape / quotation-mark )
-     * qchar-unescaped  = unreserved / pct-encoded-unescaped / other-delims / ":" / "@" / "/" / "?" / "$" / "'" / "="
      * unreserved    = ALPHA / DIGIT / "-" / "." / "_" / "~"
+     * other-delims   = "!" /                   "(" / ")" / "*" / "+" / "," / ";"
+     * qchar-unescaped = unreserved / pct-encoded-unescaped / other-delims / ":" / "@" / "/" / "?" / "$" / "'" / "="
+     * pct-encoded-unescaped = "%" ( "0" / "1" /   "3" / "4" /   "6" / "7" / "8" / "9" / A-to-F ) HEXDIG
+     *   / "%" "2" ( "0" / "1" /   "3" / "4" / "5" / "6" / "7" / "8" / "9" / A-to-F )
+     *   / "%" "5" ( DIGIT / "A" / "B" /   "D" / "E" / "F" )
+     *
+     * qchar-no-AMP-DQUOTE   = qchar-unescaped  / escape ( escape / quotation-mark )
+     *
+     * escape = "\" / "%5C"     ; reverse solidus U+005C
+     * quotation-mark  = DQUOTE / "%22"
+     *
+     * ALPHA  = %x41-5A / %x61-7A
+     * DIGIT  = %x30-39
+     * DQUOTE = %x22
+     *
      * @param character which is checked
      * @return true if character is allowed for a phrase
      */
     static boolean isAllowedPhrase(final char character) {
       // FIXME mibo: check missing
-      return isAllowedChar(character)
+      return isAlphaOrDigit(character)
           || character == '-'
           || character == '.'
           || character == '_'
@@ -119,6 +131,12 @@ public class SearchTokenizer {
           || character == '=';
     }
 
+    private static boolean isAlphaOrDigit(char character) {
+      return 'A' <= character && character <= 'Z' // case A..Z
+          || 'a' <= character && character <= 'z' // case a..z
+          || '0' <= character && character <= '9'; // case 0..9
+    }
+
     //BWS =  *( SP / HTAB / "%20" / "%09" )  ; "bad" whitespace
     //RWS = 1*( SP / HTAB / "%20" / "%09" )  ; "required" whitespace
     static boolean isWhitespace(final char character) {
@@ -173,15 +191,12 @@ public class SearchTokenizer {
     public SearchExpressionState() {
       super(null);
     }
-    public SearchExpressionState(String initLiteral) {
-      super(null, initLiteral);
-    }
     @Override
     public State nextChar(char c) throws SearchTokenizerException {
       if (c == CHAR_OPEN) {
         return new OpenState();
       } else if (isWhitespace(c)) {
-        return new RwsImplicitAndOrState();
+        return new RwsState();
       } else if(c == CHAR_CLOSE) {
         return new CloseState();
       } else {
@@ -205,7 +220,7 @@ public class SearchTokenizer {
         return new NotState(c);
       } else if (c == QUOTATION_MARK) {
         return new SearchPhraseState(c);
-      } else if (isAllowedChar(c)) {
+      } else if (isAllowedWord(c)) {
         return new SearchWordState(c);
       }
       return forbidden(c);
@@ -219,21 +234,30 @@ public class SearchTokenizer {
   private class SearchWordState extends LiteralState {
     public SearchWordState(char c) throws SearchTokenizerException {
       super(Token.WORD, c);
+      if(!isAllowedWord(c)) {
+        forbidden(c);
+      }
     }
-    public SearchWordState(State toConsume) {
+    public SearchWordState(State toConsume) throws SearchTokenizerException {
       super(Token.WORD, toConsume.getLiteral());
+      char[] chars = literal.toString().toCharArray();
+      for (char aChar : chars) {
+        if(!isAllowedWord(aChar)) {
+          forbidden(aChar);
+        }
+      }
     }
 
     @Override
     public State nextChar(char c) throws SearchTokenizerException {
-      if (isAllowedChar(c)) {
+      if (isAllowedWord(c)) {
         return allowed(c);
       } else if (c == CHAR_CLOSE) {
         finish();
         return new CloseState();
       } else if (isWhitespace(c)) {
         finish();
-        return new RwsImplicitAndOrState();
+        return new RwsState();
       }
       return forbidden(c);
     }
@@ -304,13 +328,52 @@ public class SearchTokenizer {
       }
     }
     @Override
-    public State nextChar(char c) {
+    public State nextChar(char c) throws SearchTokenizerException {
       if (literal.length() == 1 && c == CHAR_O) {
         return allowed(c);
       } else if (literal.length() == 2 && c == CHAR_T) {
         return allowed(c);
       } else if(literal.length() == 3 && isWhitespace(c)) {
         finish();
+        return new BeforePhraseOrWordRwsState();
+      }
+      return forbidden(c);
+    }
+  }
+  private class AndState extends LiteralState {
+    public AndState(char c) throws SearchTokenizerException {
+      super(Token.AND, c);
+      if(c != CHAR_A) {
+        forbidden(c);
+      }
+    }
+    @Override
+    public State nextChar(char c) throws SearchTokenizerException {
+      if (literal.length() == 1 && c == CHAR_N) {
+        return allowed(c);
+      } else if (literal.length() == 2 && c == CHAR_D) {
+        return allowed(c);
+      } else if(literal.length() == 3 && isWhitespace(c)) {
+        finish();
+        return new BeforeSearchExpressionRwsState();
+      } else {
+        return new SearchWordState(this);
+      }
+    }
+  }
+  private class OrState extends LiteralState {
+    public OrState(char c) throws SearchTokenizerException {
+      super(Token.OR, c);
+      if(c != CHAR_O) {
+        forbidden(c);
+      }
+    }
+    @Override
+    public State nextChar(char c) throws SearchTokenizerException {
+      if (literal.length() == 1 && (c == CHAR_R)) {
+        return allowed(c);
+      } else if(literal.length() == 2 && isWhitespace(c)) {
+        finish();
         return new BeforeSearchExpressionRwsState();
       } else {
         return new SearchWordState(this);
@@ -334,47 +397,53 @@ public class SearchTokenizer {
     }
   }
 
-  // implicit and
-  private class RwsImplicitAndOrState extends LiteralState {
-    private boolean noneRws = false;
-    public RwsImplicitAndOrState() {
+  private class BeforePhraseOrWordRwsState extends State {
+    public BeforePhraseOrWordRwsState() {
       super(null);
     }
     @Override
     public State nextChar(char c) throws SearchTokenizerException {
-      if (!noneRws && isWhitespace(c)) {
-        return allowed(c);
-      } else if (c == CHAR_O) {
-        noneRws = true;
+      if (isWhitespace(c)) {
         return allowed(c);
-      } else if (literal.length() == 1 && c == CHAR_R) {
+      } else if(c == '"') {
+        return new SearchPhraseState(c);
+      } else {
+        return new SearchWordState(c);
+      }
+    }
+  }
+
+  private class RwsState extends State {
+    public RwsState() {
+      super(null);
+    }
+    @Override
+    public State nextChar(char c) throws SearchTokenizerException {
+      if (isWhitespace(c)) {
         return allowed(c);
-      } else if (literal.length() == 2 && isWhitespace(c)) {
-        finish(Token.OR);
-        return new BeforeSearchExpressionRwsState();
+      } else if (c == CHAR_O) {
+        return new OrState(c);
       } else if (c == CHAR_A) {
-        noneRws = true;
-        return allowed(c);
-      } else if (literal.length() == 1 && c == CHAR_N) {
-        return allowed(c);
-      } else if (literal.length() == 2 && c == CHAR_D) {
-        return allowed(c);
-      } else if(literal.length() == 3 && isWhitespace(c)) {
-        finish(Token.AND);
-        return new BeforeSearchExpressionRwsState();
-      } else if(noneRws) {
-        finish(Token.AND);
-        return new SearchWordState(this);
+        return new AndState(c);
       } else {
-        finish(Token.AND);
-        return new SearchExpressionState(literal.toString()).init(c);
+        return new SearchExpressionState().init(c);
       }
     }
   }
 
-  // TODO (mibo): add (new) parse exception
-  public List<SearchQueryToken> tokenize(String searchQuery) throws SearchTokenizerException {
-    char[] chars = searchQuery.toCharArray();
+  /**
+   * Take the search query and split into according SearchQueryToken.
+   * Before split into tokens the given search query is 'trimmed'.
+   *
+   * @param searchQuery search query to be tokenized
+   * @return list of tokens
+   * @throws SearchTokenizerException if something in query is not valid
+   *                                  (based on OData search query ABNF)
+   */
+  public List<SearchQueryToken> tokenize(final String searchQuery)
+        throws SearchTokenizerException {
+    
+    char[] chars = searchQuery.trim().toCharArray();
 
     State state = new SearchExpressionState();
     List<SearchQueryToken> states = new ArrayList<SearchQueryToken>();

http://git-wip-us.apache.org/repos/asf/olingo-odata4/blob/ba5220ab/lib/server-core/src/test/java/org/apache/olingo/server/core/uri/parser/search/SearchTokenizerTest.java
----------------------------------------------------------------------
diff --git a/lib/server-core/src/test/java/org/apache/olingo/server/core/uri/parser/search/SearchTokenizerTest.java b/lib/server-core/src/test/java/org/apache/olingo/server/core/uri/parser/search/SearchTokenizerTest.java
index ea3cab9..828b4c4 100644
--- a/lib/server-core/src/test/java/org/apache/olingo/server/core/uri/parser/search/SearchTokenizerTest.java
+++ b/lib/server-core/src/test/java/org/apache/olingo/server/core/uri/parser/search/SearchTokenizerTest.java
@@ -29,8 +29,6 @@ import static org.apache.olingo.server.core.uri.parser.search.SearchQueryToken.T
 
 public class SearchTokenizerTest {
 
-  private boolean logEnabled = false;
-
   @Test
   public void parseBasics() throws Exception {
     SearchTokenizer tokenizer = new SearchTokenizer();
@@ -39,25 +37,25 @@ public class SearchTokenizerTest {
     //
     result = tokenizer.tokenize("abc");
     Assert.assertNotNull(result);
-    log(result.toString());
+    
     Assert.assertEquals(WORD, result.get(0).getToken());
 
     result = tokenizer.tokenize("NOT abc");
     Assert.assertNotNull(result);
-    log(result.toString());
+    
     Assert.assertEquals(NOT, result.get(0).getToken());
     Assert.assertEquals(WORD, result.get(1).getToken());
 
     result = tokenizer.tokenize("(abc)");
     Assert.assertNotNull(result);
-    log(result.toString());
+    
     Assert.assertEquals(OPEN, result.get(0).getToken());
     Assert.assertEquals(WORD, result.get(1).getToken());
     Assert.assertEquals(CLOSE, result.get(2).getToken());
 
     result = tokenizer.tokenize("((abc))");
     Assert.assertNotNull(result);
-    log(result.toString());
+    
     Assert.assertEquals(OPEN, result.get(0).getToken());
     Assert.assertEquals(WORD, result.get(2).getToken());
     Assert.assertEquals(CLOSE, result.get(4).getToken());
@@ -71,13 +69,13 @@ public class SearchTokenizerTest {
     //
     result = tokenizer.tokenize("abc");
     Assert.assertNotNull(result);
-    log(result.toString());
+    
     Assert.assertEquals(WORD, result.get(0).getToken());
 
     //
-    result = tokenizer.tokenize("9988abs");
+    result = tokenizer.tokenize("anotherWord\u1234");
     Assert.assertNotNull(result);
-    log(result.toString());
+    
     Assert.assertEquals(WORD, result.get(0).getToken());
   }
 
@@ -86,29 +84,29 @@ public class SearchTokenizerTest {
     SearchTokenizer tokenizer = new SearchTokenizer();
     List<SearchQueryToken> result;
 
-    SearchValidator.init("abc AND \"x-y_z\" AND 123").validate();
+    SearchValidator.init("abc AND \"x-y_z\" AND olingo").validate();
 
     //
     result = tokenizer.tokenize("\"abc\"");
     Assert.assertNotNull(result);
-    log(result.toString());
+    
     Assert.assertEquals(PHRASE, result.get(0).getToken());
 
     //
     result = tokenizer.tokenize("\"9988  abs\"");
     Assert.assertNotNull(result);
-    log(result.toString());
+    
     Assert.assertEquals(PHRASE, result.get(0).getToken());
     Assert.assertEquals("\"9988  abs\"", result.get(0).getLiteral());
 
     //
     result = tokenizer.tokenize("\"99_88.\"");
     Assert.assertNotNull(result);
-    log(result.toString());
+    
     Assert.assertEquals(PHRASE, result.get(0).getToken());
     Assert.assertEquals("\"99_88.\"", result.get(0).getLiteral());
 
-    SearchValidator.init("abc or \"xyz\"").addExpected(WORD, AND, WORD, AND, PHRASE).validate();
+    SearchValidator.init("abc or \"xyz\"").addExpected(WORD, WORD, PHRASE).validate();
   }
 
   @Test
@@ -118,11 +116,14 @@ public class SearchTokenizerTest {
 
     result = tokenizer.tokenize("NOT abc");
     Assert.assertNotNull(result);
-    log(result.toString());
+    
     Assert.assertEquals(NOT, result.get(0).getToken());
     Assert.assertEquals(WORD, result.get(1).getToken());
 
-    SearchValidator.init("not abc").addExpected(WORD, AND, WORD).validate();
+    SearchValidator.init("not abc").addExpected(WORD, WORD).validate();
+    SearchValidator.init("NOT    abc").addExpected(NOT, WORD).validate();
+    SearchValidator.init("NOT    \"abc\"").addExpected(NOT, PHRASE).validate();
+    SearchValidator.init("NOT (sdf)").validate(SearchTokenizerException.class);
   }
 
   @Test
@@ -132,30 +133,30 @@ public class SearchTokenizerTest {
 
     result = tokenizer.tokenize("abc OR xyz");
     Assert.assertNotNull(result);
-    log(result.toString());
+    
     Assert.assertEquals(WORD, result.get(0).getToken());
     Assert.assertEquals(OR, result.get(1).getToken());
     Assert.assertEquals(WORD, result.get(2).getToken());
 
-    result = tokenizer.tokenize("abc OR xyz OR 123");
+    result = tokenizer.tokenize("abc OR xyz OR olingo");
     Assert.assertNotNull(result);
-    log(result.toString());
+    
     Assert.assertEquals(WORD, result.get(0).getToken());
     Assert.assertEquals(OR, result.get(1).getToken());
     Assert.assertEquals(WORD, result.get(2).getToken());
     Assert.assertEquals(OR, result.get(3).getToken());
     Assert.assertEquals(WORD, result.get(4).getToken());
 
-    SearchValidator.init("abc or xyz").addExpected(WORD, AND, WORD, AND, WORD).validate();
+    SearchValidator.init("abc or xyz").addExpected(WORD, WORD, WORD).validate();
   }
 
   @Test
   public void parseImplicitAnd() throws SearchTokenizerException {
-    SearchValidator.init("a b").addExpected(WORD, AND, WORD).validate();
-    SearchValidator.init("a b OR c").addExpected(WORD, AND, WORD, OR, WORD).validate();
-    SearchValidator.init("a bc OR c").addExpected(WORD, AND, WORD, OR, WORD).validate();
-    SearchValidator.init("a bc c").addExpected(WORD, AND, WORD, AND, WORD).validate();
-    SearchValidator.init("(a OR x) bc c").addExpected(OPEN, WORD, OR, WORD, CLOSE, AND, WORD, AND, WORD).validate();
+    SearchValidator.init("a b").addExpected(WORD, WORD).validate();
+    SearchValidator.init("a b OR c").addExpected(WORD, WORD, OR, WORD).validate();
+    SearchValidator.init("a bc OR c").addExpected(WORD, WORD, OR, WORD).validate();
+    SearchValidator.init("a bc c").addExpected(WORD, WORD, WORD).validate();
+    SearchValidator.init("(a OR x) bc c").addExpected(OPEN, WORD, OR, WORD, CLOSE, WORD, WORD).validate();
   }
 
   @Test
@@ -165,7 +166,7 @@ public class SearchTokenizerTest {
 
     result = tokenizer.tokenize("abc AND xyz");
     Assert.assertNotNull(result);
-    log(result.toString());
+    
     Assert.assertEquals(WORD, result.get(0).getToken());
     Assert.assertEquals(AND, result.get(1).getToken());
     Assert.assertEquals(WORD, result.get(2).getToken());
@@ -173,34 +174,31 @@ public class SearchTokenizerTest {
     // no lower case allowed for AND
     result = tokenizer.tokenize("abc and xyz");
     Assert.assertNotNull(result);
-    Assert.assertEquals(5, result.size());
-    log(result.toString());
+    Assert.assertEquals(3, result.size());
+    
     Assert.assertEquals(WORD, result.get(0).getToken());
-    Assert.assertEquals(AND, result.get(1).getToken());
+    Assert.assertEquals(WORD, result.get(1).getToken());
     Assert.assertEquals(WORD, result.get(2).getToken());
-    Assert.assertEquals(AND, result.get(3).getToken());
-    Assert.assertEquals(WORD, result.get(4).getToken());
 
     // implicit AND
     result = tokenizer.tokenize("abc xyz");
     Assert.assertNotNull(result);
-    log(result.toString());
+    
     Assert.assertEquals(WORD, result.get(0).getToken());
-    Assert.assertEquals(AND, result.get(1).getToken());
-    Assert.assertEquals(WORD, result.get(2).getToken());
+    Assert.assertEquals(WORD, result.get(1).getToken());
 
-    result = tokenizer.tokenize("abc AND xyz AND 123");
+    result = tokenizer.tokenize("abc AND xyz AND olingo");
     Assert.assertNotNull(result);
-    log(result.toString());
+    
     Assert.assertEquals(WORD, result.get(0).getToken());
     Assert.assertEquals(AND, result.get(1).getToken());
     Assert.assertEquals(WORD, result.get(2).getToken());
     Assert.assertEquals(AND, result.get(3).getToken());
     Assert.assertEquals(WORD, result.get(4).getToken());
 
-    result = tokenizer.tokenize("abc AND \"x-y_z\" AND 123");
+    result = tokenizer.tokenize("abc AND \"x-y_z\" AND olingo");
     Assert.assertNotNull(result);
-    log(result.toString());
+    
     Assert.assertEquals(WORD, result.get(0).getToken());
     Assert.assertEquals(AND, result.get(1).getToken());
     Assert.assertEquals(PHRASE, result.get(2).getToken());
@@ -214,9 +212,9 @@ public class SearchTokenizerTest {
     SearchTokenizer tokenizer = new SearchTokenizer();
     List<SearchQueryToken> result;
 
-    result = tokenizer.tokenize("abc AND xyz OR 123");
+    result = tokenizer.tokenize("abc AND xyz OR olingo");
     Assert.assertNotNull(result);
-    log(result.toString());
+    
     Assert.assertEquals(WORD, result.get(0).getToken());
     Assert.assertEquals(AND, result.get(1).getToken());
     Assert.assertEquals(WORD, result.get(2).getToken());
@@ -233,9 +231,9 @@ public class SearchTokenizerTest {
     SearchTokenizer tokenizer = new SearchTokenizer();
     List<SearchQueryToken> result;
 
-    result = tokenizer.tokenize("abc AND NOT xyz OR 123");
+    result = tokenizer.tokenize("abc AND NOT xyz OR olingo");
     Assert.assertNotNull(result);
-    log(result.toString());
+    
     Iterator<SearchQueryToken> it = result.iterator();
     Assert.assertEquals(WORD, it.next().getToken());
     Assert.assertEquals(AND, it.next().getToken());
@@ -273,7 +271,7 @@ public class SearchTokenizerTest {
     Iterator<SearchQueryToken> it;
 
     result = tokenizer.tokenize("NOT abc AND nothing");
-    log(result.toString());
+    
     it = result.iterator();
     Assert.assertEquals(NOT, it.next().getToken());
     Assert.assertEquals(WORD, it.next().getToken());
@@ -281,7 +279,7 @@ public class SearchTokenizerTest {
     Assert.assertEquals(WORD, it.next().getToken());
 
     result = tokenizer.tokenize("abc AND andsomething");
-    log(result.toString());
+    
     it = result.iterator();
     Assert.assertEquals(WORD, it.next().getToken());
     Assert.assertEquals(AND, it.next().getToken());
@@ -291,17 +289,47 @@ public class SearchTokenizerTest {
         .addExpected(WORD, AND, WORD).validate();
 
     SearchValidator.init("abc ANDsomething")
-        .addExpected(WORD, AND, WORD).validate();
+        .addExpected(WORD, WORD).validate();
 
     SearchValidator.init("abc ORsomething")
-        .addExpected(WORD, AND, WORD).validate();
+        .addExpected(WORD, WORD).validate();
 
     SearchValidator.init("abc OR orsomething")
         .addExpected(WORD, OR, WORD).validate();
 
     SearchValidator.init("abc OR ORsomething")
         .addExpected(WORD, OR, WORD).validate();
+  }
+
+  @Test
+  public void unicodeInWords() throws Exception {
+    // Ll, Lm, Lo, Lt, Lu, Nl
+    SearchValidator.init("abc OR Ll\u01E3Lm\u02B5Lo\u1BE4Lt\u01F2Lu\u03D3Nl\u216F")
+        .addExpected(WORD, OR, WORD).validate();
+  }
 
+  /**
+   * unreserved    = ALPHA / DIGIT / "-" / "." / "_" / "~"
+   * other-delims   = "!" /                   "(" / ")" / "*" / "+" / "," / ";"
+   * qchar-unescaped       = unreserved / pct-encoded-unescaped / other-delims / ":" / "@" / "/" / "?" / "$" / "'" / "="
+   * pct-encoded-unescaped = "%" ( "0" / "1" /   "3" / "4" /   "6" / "7" / "8" / "9" / A-to-F ) HEXDIG
+   *   / "%" "2" ( "0" / "1" /   "3" / "4" / "5" / "6" / "7" / "8" / "9" / A-to-F )
+   *   / "%" "5" ( DIGIT / "A" / "B" /   "D" / "E" / "F" )
+   *
+   * qchar-no-AMP-DQUOTE   = qchar-unescaped  / escape ( escape / quotation-mark )
+   *
+   * escape = "\" / "%5C"     ; reverse solidus U+005C
+   * quotation-mark  = DQUOTE / "%22"
+   * ALPHA  = %x41-5A / %x61-7A
+   * DIGIT  = %x30-39
+   * DQUOTE = %x22
+   *
+   * @throws Exception
+   */
+  @Test
+  public void characterInPhrase() throws Exception {
+    SearchValidator.init("\"123\" OR \"ALPHA-._~\"")
+        .addExpected(PHRASE, OR, PHRASE).validate();
   }
 
   @Test
@@ -311,32 +339,32 @@ public class SearchTokenizerTest {
 
     validate("abc AND def");
     validate("abc  OR def");
-    validate("abc     def");
+    validate("abc     def", WORD, WORD);
 
     validate("abc AND def AND ghi", WORD, AND, WORD, AND, WORD);
     validate("abc AND def  OR ghi");
     validate("abc AND def     ghi");
 
-    validate("abc  OR def AND ghi");
-    validate("abc  OR def  OR ghi");
-    validate("abc  OR def     ghi");
+    validate("abc  OR def AND ghi", WORD, OR, WORD, AND, WORD);
+    validate("abc  OR def  OR ghi", WORD, OR, WORD, OR, WORD);
+    validate("abc  OR def     ghi", WORD, OR, WORD, WORD);
 
     validate("abc     def AND ghi");
     validate("abc     def  OR ghi");
     validate("abc     def     ghi");
 
     // mixed not
-    validate("    abc         def AND     ghi");
-    validate("NOT abc  NOT    def  OR NOT ghi");
-    validate("    abc         def     NOT ghi");
+    SearchValidator.init("    abc         def AND     ghi").validate(WORD, WORD, AND, WORD);
+    validate("NOT abc  NOT    def  OR NOT ghi", NOT, WORD, NOT, WORD, OR, NOT, WORD);
+    validate("    abc         def     NOT ghi", WORD, WORD, NOT, WORD);
 
     // parenthesis
-    validate("(abc)");
-    validate("(abc AND  def)");
-    validate("(abc AND  def)   OR  ghi");
-    validate("(abc AND  def)       ghi");
-    validate("abc AND (def    OR  ghi)");
-    validate("abc AND (def        ghi)");
+    validate("(abc)", OPEN, WORD, CLOSE);
+    validate("(abc AND  def)", OPEN, WORD, AND, WORD, CLOSE);
+    validate("(abc AND  def)   OR  ghi", OPEN, WORD, AND, WORD, CLOSE, OR, WORD);
+    validate("(abc AND  def)       ghi", OPEN, WORD, AND, WORD, CLOSE, WORD);
+    validate("abc AND (def    OR  ghi)", WORD, AND, OPEN, WORD, OR, WORD, CLOSE);
+    validate("abc AND (def        ghi)", WORD, AND, OPEN, WORD, WORD, CLOSE);
   }
 
   @Test
@@ -363,6 +391,12 @@ public class SearchTokenizerTest {
     private List<Tuple> validations = new ArrayList<Tuple>();
     private boolean log;
     private final String searchQuery;
+
+    public void validate(SearchQueryToken.Token... tokens) throws SearchTokenizerException {
+      addExpected(tokens);
+      validate();
+    }
+
     private class Tuple {
       final SearchQueryToken.Token token;
       final String literal;
@@ -427,20 +461,4 @@ public class SearchTokenizerTest {
       }
     }
   }
-
-
-
-  private void log(Object ... toString) {
-    if(logEnabled) {
-      System.out.println("------------");
-      if(toString == null || toString.length <= 1) {
-        System.out.println(toString == null? "NULL": (toString.length == 0? "EMPTY ARRAY": toString[0]));
-      } else {
-        int count = 1;
-        for (Object o : toString) {
-          System.out.println(count++ + ": " + o);
-        }
-      }
-    }
-  }
 }
\ No newline at end of file