You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by us...@apache.org on 2012/07/07 23:41:51 UTC

svn commit: r1358649 - /lucene/dev/branches/lucene4199/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/

Author: uschindler
Date: Sat Jul  7 21:41:51 2012
New Revision: 1358649

URL: http://svn.apache.org/viewvc?rev=1358649&view=rev
Log:
LUCENE-4199: fix flexible standard query parser by cloning the user CharStream implementation without unicode bugs from classic

Added:
    lucene/dev/branches/lucene4199/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/CharStream.java   (with props)
    lucene/dev/branches/lucene4199/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/FastCharStream.java
      - copied, changed from r1358644, lucene/dev/branches/lucene4199/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/FastCharStream.java
Removed:
    lucene/dev/branches/lucene4199/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/JavaCharStream.java
Modified:
    lucene/dev/branches/lucene4199/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/ParseException.java
    lucene/dev/branches/lucene4199/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/StandardSyntaxParser.java
    lucene/dev/branches/lucene4199/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/StandardSyntaxParser.jj
    lucene/dev/branches/lucene4199/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/StandardSyntaxParserTokenManager.java
    lucene/dev/branches/lucene4199/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/Token.java
    lucene/dev/branches/lucene4199/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/TokenMgrError.java

Added: lucene/dev/branches/lucene4199/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/CharStream.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4199/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/CharStream.java?rev=1358649&view=auto
==============================================================================
--- lucene/dev/branches/lucene4199/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/CharStream.java (added)
+++ lucene/dev/branches/lucene4199/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/CharStream.java Sat Jul  7 21:41:51 2012
@@ -0,0 +1,112 @@
+/* Generated By:JavaCC: Do not edit this line. CharStream.java Version 4.1 */
+/* JavaCCOptions:STATIC=false */
+package org.apache.lucene.queryparser.flexible.standard.parser;
+
+/**
+ * This interface describes a character stream that maintains line and
+ * column number positions of the characters.  It also has the capability
+ * to backup the stream to some extent.  An implementation of this
+ * interface is used in the TokenManager implementation generated by
+ * JavaCCParser.
+ *
+ * All the methods except backup can be implemented in any fashion. backup
+ * needs to be implemented correctly for the correct operation of the lexer.
+ * Rest of the methods are all used to get information like line number,
+ * column number and the String that constitutes a token and are not used
+ * by the lexer. Hence their implementation won't affect the generated lexer's
+ * operation.
+ */
+
+public interface CharStream {
+
+  /**
+   * Returns the next character from the selected input.  The method
+   * of selecting the input is the responsibility of the class
+   * implementing this interface.  Can throw any java.io.IOException.
+   */
+  char readChar() throws java.io.IOException;
+
+  /**
+   * Returns the column position of the character last read.
+   * @deprecated
+   * @see #getEndColumn
+   */
+  int getColumn();
+
+  /**
+   * Returns the line number of the character last read.
+   * @deprecated
+   * @see #getEndLine
+   */
+  int getLine();
+
+  /**
+   * Returns the column number of the last character for current token (being
+   * matched after the last call to BeginTOken).
+   */
+  int getEndColumn();
+
+  /**
+   * Returns the line number of the last character for current token (being
+   * matched after the last call to BeginTOken).
+   */
+  int getEndLine();
+
+  /**
+   * Returns the column number of the first character for current token (being
+   * matched after the last call to BeginTOken).
+   */
+  int getBeginColumn();
+
+  /**
+   * Returns the line number of the first character for current token (being
+   * matched after the last call to BeginTOken).
+   */
+  int getBeginLine();
+
+  /**
+   * Backs up the input stream by amount steps. Lexer calls this method if it
+   * had already read some characters, but could not use them to match a
+   * (longer) token. So, they will be used again as the prefix of the next
+   * token and it is the implemetation's responsibility to do this right.
+   */
+  void backup(int amount);
+
+  /**
+   * Returns the next character that marks the beginning of the next token.
+   * All characters must remain in the buffer between two successive calls
+   * to this method to implement backup correctly.
+   */
+  char BeginToken() throws java.io.IOException;
+
+  /**
+   * Returns a string made up of characters from the marked token beginning
+   * to the current buffer position. Implementations have the choice of returning
+   * anything that they want to. For example, for efficiency, one might decide
+   * to just return null, which is a valid implementation.
+   */
+  String GetImage();
+
+  /**
+   * Returns an array of characters that make up the suffix of length 'len' for
+   * the currently matched token. This is used to build up the matched string
+   * for use in actions in the case of MORE. A simple and inefficient
+   * implementation of this is as follows :
+   *
+   *   {
+   *      String t = GetImage();
+   *      return t.substring(t.length() - len, t.length()).toCharArray();
+   *   }
+   */
+  char[] GetSuffix(int len);
+
+  /**
+   * The lexer calls this function to indicate that it is done with the stream
+   * and hence implementations can free any resources held by this class.
+   * Again, the body of this function can be just empty and it will not
+   * affect the lexer's operation.
+   */
+  void Done();
+
+}
+/* JavaCC - OriginalChecksum=298ffb3c7c64c6de9b7812e011e58d99 (do not edit this line) */

Copied: lucene/dev/branches/lucene4199/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/FastCharStream.java (from r1358644, lucene/dev/branches/lucene4199/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/FastCharStream.java)
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4199/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/FastCharStream.java?p2=lucene/dev/branches/lucene4199/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/FastCharStream.java&p1=lucene/dev/branches/lucene4199/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/FastCharStream.java&r1=1358644&r2=1358649&rev=1358649&view=diff
==============================================================================
--- lucene/dev/branches/lucene4199/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/FastCharStream.java (original)
+++ lucene/dev/branches/lucene4199/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/FastCharStream.java Sat Jul  7 21:41:51 2012
@@ -1,5 +1,5 @@
 // FastCharStream.java
-package org.apache.lucene.queryparser.classic;
+package org.apache.lucene.queryparser.flexible.standard.parser;
 
 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more

Modified: lucene/dev/branches/lucene4199/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/ParseException.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4199/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/ParseException.java?rev=1358649&r1=1358648&r2=1358649&view=diff
==============================================================================
--- lucene/dev/branches/lucene4199/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/ParseException.java (original)
+++ lucene/dev/branches/lucene4199/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/ParseException.java Sat Jul  7 21:41:51 2012
@@ -193,4 +193,4 @@ public class ParseException extends Quer
    }
 
 }
-/* JavaCC - OriginalChecksum=0f25f4245374bbf9920c9a82efecadd2 (do not edit this line) */
+/* JavaCC - OriginalChecksum=7601d49d11bc059457ae5850628ebc8a (do not edit this line) */

Modified: lucene/dev/branches/lucene4199/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/StandardSyntaxParser.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4199/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/StandardSyntaxParser.java?rev=1358649&r1=1358648&r2=1358649&view=diff
==============================================================================
--- lucene/dev/branches/lucene4199/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/StandardSyntaxParser.java (original)
+++ lucene/dev/branches/lucene4199/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/StandardSyntaxParser.java Sat Jul  7 21:41:51 2012
@@ -1,7 +1,7 @@
 /* Generated By:JavaCC: Do not edit this line. StandardSyntaxParser.java */
 package org.apache.lucene.queryparser.flexible.standard.parser;
 
-/*
+/**
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
@@ -49,14 +49,14 @@ public class StandardSyntaxParser implem
 
    // syntax parser constructor
    public StandardSyntaxParser() {
-        this(new StringReader(""));
+        this(new FastCharStream(new StringReader("")));
   }
      /** Parses a query string, returning a {@link org.apache.lucene.queryparser.flexible.core.nodes.QueryNode}.
      *  @param query  the query string to be parsed.
      *  @throws ParseException if the parsing fails
      */
     public QueryNode parse(CharSequence query, CharSequence field) throws QueryNodeParseException {
-      ReInit(new StringReader(query.toString()));
+      ReInit(new FastCharStream(new StringReader(query.toString())));
       try {
         // TopLevelQuery is a Query followed by the end-of-input (EOF)
         QueryNode querynode = TopLevelQuery(field);
@@ -844,7 +844,6 @@ public class StandardSyntaxParser implem
 
   /** Generated Token Manager. */
   public StandardSyntaxParserTokenManager token_source;
-  JavaCharStream jj_input_stream;
   /** Current token. */
   public Token token;
   /** Next token. */
@@ -870,14 +869,9 @@ public class StandardSyntaxParser implem
   private boolean jj_rescan = false;
   private int jj_gc = 0;
 
-  /** Constructor with InputStream. */
-  public StandardSyntaxParser(java.io.InputStream stream) {
-     this(stream, null);
-  }
-  /** Constructor with InputStream and supplied encoding */
-  public StandardSyntaxParser(java.io.InputStream stream, String encoding) {
-    try { jj_input_stream = new JavaCharStream(stream, encoding, 1, 1); } catch(java.io.UnsupportedEncodingException e) { throw new RuntimeException(e); }
-    token_source = new StandardSyntaxParserTokenManager(jj_input_stream);
+  /** Constructor with user supplied CharStream. */
+  public StandardSyntaxParser(CharStream stream) {
+    token_source = new StandardSyntaxParserTokenManager(stream);
     token = new Token();
     jj_ntk = -1;
     jj_gen = 0;
@@ -886,35 +880,8 @@ public class StandardSyntaxParser implem
   }
 
   /** Reinitialise. */
-  public void ReInit(java.io.InputStream stream) {
-     ReInit(stream, null);
-  }
-  /** Reinitialise. */
-  public void ReInit(java.io.InputStream stream, String encoding) {
-    try { jj_input_stream.ReInit(stream, encoding, 1, 1); } catch(java.io.UnsupportedEncodingException e) { throw new RuntimeException(e); }
-    token_source.ReInit(jj_input_stream);
-    token = new Token();
-    jj_ntk = -1;
-    jj_gen = 0;
-    for (int i = 0; i < 28; i++) jj_la1[i] = -1;
-    for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls();
-  }
-
-  /** Constructor. */
-  public StandardSyntaxParser(java.io.Reader stream) {
-    jj_input_stream = new JavaCharStream(stream, 1, 1);
-    token_source = new StandardSyntaxParserTokenManager(jj_input_stream);
-    token = new Token();
-    jj_ntk = -1;
-    jj_gen = 0;
-    for (int i = 0; i < 28; i++) jj_la1[i] = -1;
-    for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls();
-  }
-
-  /** Reinitialise. */
-  public void ReInit(java.io.Reader stream) {
-    jj_input_stream.ReInit(stream, 1, 1);
-    token_source.ReInit(jj_input_stream);
+  public void ReInit(CharStream stream) {
+    token_source.ReInit(stream);
     token = new Token();
     jj_ntk = -1;
     jj_gen = 0;

Modified: lucene/dev/branches/lucene4199/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/StandardSyntaxParser.jj
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4199/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/StandardSyntaxParser.jj?rev=1358649&r1=1358648&r2=1358649&view=diff
==============================================================================
--- lucene/dev/branches/lucene4199/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/StandardSyntaxParser.jj (original)
+++ lucene/dev/branches/lucene4199/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/StandardSyntaxParser.jj Sat Jul  7 21:41:51 2012
@@ -5,7 +5,7 @@
 options {
   STATIC=false;
   JAVA_UNICODE_ESCAPE=true;
-  USER_CHAR_STREAM=false;
+  USER_CHAR_STREAM=true;
   IGNORE_CASE=false;
   JDK_VERSION="1.5";
 }
@@ -61,14 +61,14 @@ public class StandardSyntaxParser implem
  
    // syntax parser constructor
    public StandardSyntaxParser() {
-   	this(new StringReader(""));
+   	this(new FastCharStream(new StringReader("")));
   }
      /** Parses a query string, returning a {@link org.apache.lucene.queryparser.flexible.core.nodes.QueryNode}.
      *  @param query  the query string to be parsed.
      *  @throws ParseException if the parsing fails
      */
     public QueryNode parse(CharSequence query, CharSequence field) throws QueryNodeParseException {
-      ReInit(new StringReader(query.toString()));
+      ReInit(new FastCharStream(new StringReader(query.toString())));
       try {
         // TopLevelQuery is a Query followed by the end-of-input (EOF)
         QueryNode querynode = TopLevelQuery(field);

Modified: lucene/dev/branches/lucene4199/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/StandardSyntaxParserTokenManager.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4199/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/StandardSyntaxParserTokenManager.java?rev=1358649&r1=1358648&r2=1358649&view=diff
==============================================================================
--- lucene/dev/branches/lucene4199/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/StandardSyntaxParserTokenManager.java (original)
+++ lucene/dev/branches/lucene4199/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/StandardSyntaxParserTokenManager.java Sat Jul  7 21:41:51 2012
@@ -1,6 +1,6 @@
 /* Generated By:JavaCC: Do not edit this line. StandardSyntaxParserTokenManager.java */
 package org.apache.lucene.queryparser.flexible.standard.parser;
-/*
+/**
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
@@ -781,25 +781,23 @@ static final long[] jjtoToken = {
 static final long[] jjtoSkip = {
    0x80L, 
 };
-protected JavaCharStream input_stream;
+protected CharStream input_stream;
 private final int[] jjrounds = new int[33];
 private final int[] jjstateSet = new int[66];
 protected char curChar;
 /** Constructor. */
-public StandardSyntaxParserTokenManager(JavaCharStream stream){
-   if (JavaCharStream.staticFlag)
-      throw new Error("ERROR: Cannot use a static CharStream class with a non-static lexical analyzer.");
+public StandardSyntaxParserTokenManager(CharStream stream){
    input_stream = stream;
 }
 
 /** Constructor. */
-public StandardSyntaxParserTokenManager(JavaCharStream stream, int lexState){
+public StandardSyntaxParserTokenManager(CharStream stream, int lexState){
    this(stream);
    SwitchTo(lexState);
 }
 
 /** Reinitialise parser. */
-public void ReInit(JavaCharStream stream)
+public void ReInit(CharStream stream)
 {
    jjmatchedPos = jjnewStateCnt = 0;
    curLexState = defaultLexState;
@@ -815,7 +813,7 @@ private void ReInitRounds()
 }
 
 /** Reinitialise parser. */
-public void ReInit(JavaCharStream stream, int lexState)
+public void ReInit(CharStream stream, int lexState)
 {
    ReInit(stream);
    SwitchTo(lexState);

Modified: lucene/dev/branches/lucene4199/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/Token.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4199/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/Token.java?rev=1358649&r1=1358648&r2=1358649&view=diff
==============================================================================
--- lucene/dev/branches/lucene4199/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/Token.java (original)
+++ lucene/dev/branches/lucene4199/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/Token.java Sat Jul  7 21:41:51 2012
@@ -121,4 +121,4 @@ public class Token {
   }
 
 }
-/* JavaCC - OriginalChecksum=e9c55091ec11152bcd3a300ddff5c73a (do not edit this line) */
+/* JavaCC - OriginalChecksum=3b4fe6dcfcfa24a81f1c6ceffae5f73a (do not edit this line) */

Modified: lucene/dev/branches/lucene4199/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/TokenMgrError.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4199/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/TokenMgrError.java?rev=1358649&r1=1358648&r2=1358649&view=diff
==============================================================================
--- lucene/dev/branches/lucene4199/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/TokenMgrError.java (original)
+++ lucene/dev/branches/lucene4199/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/parser/TokenMgrError.java Sat Jul  7 21:41:51 2012
@@ -138,4 +138,4 @@ public class TokenMgrError extends Error
       this(LexicalError(EOFSeen, lexState, errorLine, errorColumn, errorAfter, curChar), reason);
    }
 }
-/* JavaCC - OriginalChecksum=76b513fd9c50f65248056bbeeff49277 (do not edit this line) */
+/* JavaCC - OriginalChecksum=1efb3d906925f2478637c66473b79bae (do not edit this line) */