You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by us...@apache.org on 2012/07/08 17:33:29 UTC

svn commit: r1358760 - in /lucene/dev/branches/lucene4199/lucene/benchmark: ./ src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/

Author: uschindler
Date: Sun Jul  8 15:33:29 2012
New Revision: 1358760

URL: http://svn.apache.org/viewvc?rev=1358760&view=rev
Log:
LUCENE-4199: Next usecase of FastCharStream for HTMLParser

Added:
    lucene/dev/branches/lucene4199/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/CharStream.java   (with props)
    lucene/dev/branches/lucene4199/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/FastCharStream.java
      - copied, changed from r1358756, lucene/dev/branches/lucene4199/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/FastCharStream.java
Removed:
    lucene/dev/branches/lucene4199/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/SimpleCharStream.java
Modified:
    lucene/dev/branches/lucene4199/lucene/benchmark/build.xml
    lucene/dev/branches/lucene4199/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/HTMLParser.java
    lucene/dev/branches/lucene4199/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/HTMLParser.jj
    lucene/dev/branches/lucene4199/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/HTMLParserTokenManager.java
    lucene/dev/branches/lucene4199/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/ParseException.java
    lucene/dev/branches/lucene4199/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/Token.java
    lucene/dev/branches/lucene4199/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/TokenMgrError.java

Modified: lucene/dev/branches/lucene4199/lucene/benchmark/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4199/lucene/benchmark/build.xml?rev=1358760&r1=1358759&r2=1358760&view=diff
==============================================================================
--- lucene/dev/branches/lucene4199/lucene/benchmark/build.xml (original)
+++ lucene/dev/branches/lucene4199/lucene/benchmark/build.xml Sun Jul  8 15:33:29 2012
@@ -262,9 +262,11 @@
     <target name="init" depends="module-build.init,resolve-icu,jar-memory,jar-highlighter,jar-analyzers-common,jar-queryparser,jar-facet"/>
   
     <target name="clean-javacc">
-      <fileset dir="src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml" includes="*.java">
-	<containsregexp expression="Generated.*By.*JavaCC"/>
-      </fileset>
+      <delete>
+        <fileset dir="src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml" includes="*.java">
+    <containsregexp expression="Generated.*By.*JavaCC"/>
+        </fileset>
+      </delete>
     </target>
     
     <target name="javacc" depends="init,javacc-check" if="javacc.present">

Added: lucene/dev/branches/lucene4199/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/CharStream.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4199/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/CharStream.java?rev=1358760&view=auto
==============================================================================
--- lucene/dev/branches/lucene4199/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/CharStream.java (added)
+++ lucene/dev/branches/lucene4199/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/CharStream.java Sun Jul  8 15:33:29 2012
@@ -0,0 +1,112 @@
+/* Generated By:JavaCC: Do not edit this line. CharStream.java Version 4.1 */
+/* JavaCCOptions:STATIC=false */
+package org.apache.lucene.benchmark.byTask.feeds.demohtml;
+
+/**
+ * This interface describes a character stream that maintains line and
+ * column number positions of the characters.  It also has the capability
+ * to backup the stream to some extent.  An implementation of this
+ * interface is used in the TokenManager implementation generated by
+ * JavaCCParser.
+ *
+ * All the methods except backup can be implemented in any fashion. backup
+ * needs to be implemented correctly for the correct operation of the lexer.
+ * Rest of the methods are all used to get information like line number,
+ * column number and the String that constitutes a token and are not used
+ * by the lexer. Hence their implementation won't affect the generated lexer's
+ * operation.
+ */
+
+public interface CharStream {
+
+  /**
+   * Returns the next character from the selected input.  The method
+   * of selecting the input is the responsibility of the class
+   * implementing this interface.  Can throw any java.io.IOException.
+   */
+  char readChar() throws java.io.IOException;
+
+  /**
+   * Returns the column position of the character last read.
+   * @deprecated
+   * @see #getEndColumn
+   */
+  int getColumn();
+
+  /**
+   * Returns the line number of the character last read.
+   * @deprecated
+   * @see #getEndLine
+   */
+  int getLine();
+
+  /**
+   * Returns the column number of the last character for current token (being
+   * matched after the last call to BeginTOken).
+   */
+  int getEndColumn();
+
+  /**
+   * Returns the line number of the last character for current token (being
+   * matched after the last call to BeginTOken).
+   */
+  int getEndLine();
+
+  /**
+   * Returns the column number of the first character for current token (being
+   * matched after the last call to BeginTOken).
+   */
+  int getBeginColumn();
+
+  /**
+   * Returns the line number of the first character for current token (being
+   * matched after the last call to BeginTOken).
+   */
+  int getBeginLine();
+
+  /**
+   * Backs up the input stream by amount steps. Lexer calls this method if it
+   * had already read some characters, but could not use them to match a
+   * (longer) token. So, they will be used again as the prefix of the next
+   * token and it is the implemetation's responsibility to do this right.
+   */
+  void backup(int amount);
+
+  /**
+   * Returns the next character that marks the beginning of the next token.
+   * All characters must remain in the buffer between two successive calls
+   * to this method to implement backup correctly.
+   */
+  char BeginToken() throws java.io.IOException;
+
+  /**
+   * Returns a string made up of characters from the marked token beginning
+   * to the current buffer position. Implementations have the choice of returning
+   * anything that they want to. For example, for efficiency, one might decide
+   * to just return null, which is a valid implementation.
+   */
+  String GetImage();
+
+  /**
+   * Returns an array of characters that make up the suffix of length 'len' for
+   * the currently matched token. This is used to build up the matched string
+   * for use in actions in the case of MORE. A simple and inefficient
+   * implementation of this is as follows :
+   *
+   *   {
+   *      String t = GetImage();
+   *      return t.substring(t.length() - len, t.length()).toCharArray();
+   *   }
+   */
+  char[] GetSuffix(int len);
+
+  /**
+   * The lexer calls this function to indicate that it is done with the stream
+   * and hence implementations can free any resources held by this class.
+   * Again, the body of this function can be just empty and it will not
+   * affect the lexer's operation.
+   */
+  void Done();
+
+}
+/* JavaCC - OriginalChecksum=e26d9399cd34335f985e19c1fa86c11b (do not edit this line) */

Copied: lucene/dev/branches/lucene4199/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/FastCharStream.java (from r1358756, lucene/dev/branches/lucene4199/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/FastCharStream.java)
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4199/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/FastCharStream.java?p2=lucene/dev/branches/lucene4199/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/FastCharStream.java&p1=lucene/dev/branches/lucene4199/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/FastCharStream.java&r1=1358756&r2=1358760&rev=1358760&view=diff
==============================================================================
--- lucene/dev/branches/lucene4199/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/FastCharStream.java (original)
+++ lucene/dev/branches/lucene4199/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/FastCharStream.java Sun Jul  8 15:33:29 2012
@@ -1,5 +1,5 @@
 // FastCharStream.java
-package org.apache.lucene.queryparser.classic;
+package org.apache.lucene.benchmark.byTask.feeds.demohtml;
 
 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more

Modified: lucene/dev/branches/lucene4199/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/HTMLParser.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4199/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/HTMLParser.java?rev=1358760&r1=1358759&r2=1358760&view=diff
==============================================================================
--- lucene/dev/branches/lucene4199/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/HTMLParser.java (original)
+++ lucene/dev/branches/lucene4199/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/HTMLParser.java Sun Jul  8 15:33:29 2012
@@ -29,6 +29,10 @@ public class HTMLParser implements HTMLP
   private MyPipedInputStream pipeInStream = null;
   private PipedOutputStream pipeOutStream = null;
 
+  public HTMLParser(Reader reader) {
+    this(new FastCharStream(reader));
+  }
+
   private class MyPipedInputStream extends PipedInputStream{
 
     public MyPipedInputStream(){
@@ -464,7 +468,6 @@ null)
 
   /** Generated Token Manager. */
   public HTMLParserTokenManager token_source;
-  SimpleCharStream jj_input_stream;
   /** Current token. */
   public Token token;
   /** Next token. */
@@ -485,40 +488,9 @@ null)
   private boolean jj_rescan = false;
   private int jj_gc = 0;
 
-  /** Constructor with InputStream. */
-  public HTMLParser(java.io.InputStream stream) {
-     this(stream, null);
-  }
-  /** Constructor with InputStream and supplied encoding */
-  public HTMLParser(java.io.InputStream stream, String encoding) {
-    try { jj_input_stream = new SimpleCharStream(stream, encoding, 1, 1); } catch(java.io.UnsupportedEncodingException e) { throw new RuntimeException(e); }
-    token_source = new HTMLParserTokenManager(jj_input_stream);
-    token = new Token();
-    jj_ntk = -1;
-    jj_gen = 0;
-    for (int i = 0; i < 14; i++) jj_la1[i] = -1;
-    for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls();
-  }
-
-  /** Reinitialise. */
-  public void ReInit(java.io.InputStream stream) {
-     ReInit(stream, null);
-  }
-  /** Reinitialise. */
-  public void ReInit(java.io.InputStream stream, String encoding) {
-    try { jj_input_stream.ReInit(stream, encoding, 1, 1); } catch(java.io.UnsupportedEncodingException e) { throw new RuntimeException(e); }
-    token_source.ReInit(jj_input_stream);
-    token = new Token();
-    jj_ntk = -1;
-    jj_gen = 0;
-    for (int i = 0; i < 14; i++) jj_la1[i] = -1;
-    for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls();
-  }
-
-  /** Constructor. */
-  public HTMLParser(java.io.Reader stream) {
-    jj_input_stream = new SimpleCharStream(stream, 1, 1);
-    token_source = new HTMLParserTokenManager(jj_input_stream);
+  /** Constructor with user supplied CharStream. */
+  public HTMLParser(CharStream stream) {
+    token_source = new HTMLParserTokenManager(stream);
     token = new Token();
     jj_ntk = -1;
     jj_gen = 0;
@@ -527,9 +499,8 @@ null)
   }
 
   /** Reinitialise. */
-  public void ReInit(java.io.Reader stream) {
-    jj_input_stream.ReInit(stream, 1, 1);
-    token_source.ReInit(jj_input_stream);
+  public void ReInit(CharStream stream) {
+    token_source.ReInit(stream);
     token = new Token();
     jj_ntk = -1;
     jj_gen = 0;
@@ -631,7 +602,7 @@ null)
       return (jj_ntk = jj_nt.kind);
   }
 
-  private java.util.List<int[]> jj_expentries = new java.util.ArrayList<int[]>();
+  private java.util.List jj_expentries = new java.util.ArrayList();
   private int[] jj_expentry;
   private int jj_kind = -1;
   private int[] jj_lasttokens = new int[100];
@@ -691,7 +662,7 @@ null)
     jj_add_error_token(0, 0);
     int[][] exptokseq = new int[jj_expentries.size()][];
     for (int i = 0; i < jj_expentries.size(); i++) {
-      exptokseq[i] = jj_expentries.get(i);
+      exptokseq[i] = (int[])jj_expentries.get(i);
     }
     return new ParseException(token, exptokseq, tokenImage);
   }

Modified: lucene/dev/branches/lucene4199/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/HTMLParser.jj
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4199/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/HTMLParser.jj?rev=1358760&r1=1358759&r2=1358760&view=diff
==============================================================================
--- lucene/dev/branches/lucene4199/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/HTMLParser.jj (original)
+++ lucene/dev/branches/lucene4199/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/HTMLParser.jj Sun Jul  8 15:33:29 2012
@@ -22,6 +22,7 @@ options {
   //DEBUG_LOOKAHEAD = true;
   //DEBUG_TOKEN_MANAGER = true;
   UNICODE_INPUT = true;
+  USER_CHAR_STREAM=true;
 }
 
 PARSER_BEGIN(HTMLParser)
@@ -56,6 +57,10 @@ public class HTMLParser {
   private MyPipedInputStream pipeInStream = null;
   private PipedOutputStream pipeOutStream = null;
   
+  public HTMLParser(Reader reader) {
+    this(new FastCharStream(reader));
+  }
+
   private class MyPipedInputStream extends PipedInputStream{
     
     public MyPipedInputStream(){

Modified: lucene/dev/branches/lucene4199/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/HTMLParserTokenManager.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4199/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/HTMLParserTokenManager.java?rev=1358760&r1=1358759&r2=1358760&view=diff
==============================================================================
--- lucene/dev/branches/lucene4199/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/HTMLParserTokenManager.java (original)
+++ lucene/dev/branches/lucene4199/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/HTMLParserTokenManager.java Sun Jul  8 15:33:29 2012
@@ -464,7 +464,7 @@ private int jjMoveNfa_0(int startState, 
       }
       else
       {
-         int hiByte = (curChar >> 8);
+         int hiByte = (int)(curChar >> 8);
          int i1 = hiByte >> 6;
          long l1 = 1L << (hiByte & 077);
          int i2 = (curChar & 0xff) >> 6;
@@ -569,7 +569,7 @@ private int jjMoveNfa_5(int startState, 
       }
       else
       {
-         int hiByte = (curChar >> 8);
+         int hiByte = (int)(curChar >> 8);
          int i1 = hiByte >> 6;
          long l1 = 1L << (hiByte & 077);
          int i2 = (curChar & 0xff) >> 6;
@@ -670,7 +670,7 @@ private int jjMoveNfa_7(int startState, 
       }
       else
       {
-         int hiByte = (curChar >> 8);
+         int hiByte = (int)(curChar >> 8);
          int i1 = hiByte >> 6;
          long l1 = 1L << (hiByte & 077);
          int i2 = (curChar & 0xff) >> 6;
@@ -766,7 +766,7 @@ private int jjMoveNfa_4(int startState, 
       }
       else
       {
-         int hiByte = (curChar >> 8);
+         int hiByte = (int)(curChar >> 8);
          int i1 = hiByte >> 6;
          long l1 = 1L << (hiByte & 077);
          int i2 = (curChar & 0xff) >> 6;
@@ -892,7 +892,7 @@ private int jjMoveNfa_3(int startState, 
       }
       else
       {
-         int hiByte = (curChar >> 8);
+         int hiByte = (int)(curChar >> 8);
          int i1 = hiByte >> 6;
          long l1 = 1L << (hiByte & 077);
          int i2 = (curChar & 0xff) >> 6;
@@ -1061,7 +1061,7 @@ private int jjMoveNfa_6(int startState, 
       }
       else
       {
-         int hiByte = (curChar >> 8);
+         int hiByte = (int)(curChar >> 8);
          int i1 = hiByte >> 6;
          long l1 = 1L << (hiByte & 077);
          int i2 = (curChar & 0xff) >> 6;
@@ -1205,7 +1205,7 @@ private int jjMoveNfa_1(int startState, 
       }
       else
       {
-         int hiByte = (curChar >> 8);
+         int hiByte = (int)(curChar >> 8);
          int i1 = hiByte >> 6;
          long l1 = 1L << (hiByte & 077);
          int i2 = (curChar & 0xff) >> 6;
@@ -1361,7 +1361,7 @@ private int jjMoveNfa_2(int startState, 
       }
       else
       {
-         int hiByte = (curChar >> 8);
+         int hiByte = (int)(curChar >> 8);
          int i1 = hiByte >> 6;
          long l1 = 1L << (hiByte & 077);
          int i2 = (curChar & 0xff) >> 6;
@@ -1441,25 +1441,23 @@ static final long[] jjtoToken = {
 static final long[] jjtoSkip = {
    0x400000L, 
 };
-protected SimpleCharStream input_stream;
+protected CharStream input_stream;
 private final int[] jjrounds = new int[28];
 private final int[] jjstateSet = new int[56];
 protected char curChar;
 /** Constructor. */
-public HTMLParserTokenManager(SimpleCharStream stream){
-   if (SimpleCharStream.staticFlag)
-      throw new Error("ERROR: Cannot use a static CharStream class with a non-static lexical analyzer.");
+public HTMLParserTokenManager(CharStream stream){
    input_stream = stream;
 }
 
 /** Constructor. */
-public HTMLParserTokenManager(SimpleCharStream stream, int lexState){
+public HTMLParserTokenManager(CharStream stream, int lexState){
    this(stream);
    SwitchTo(lexState);
 }
 
 /** Reinitialise parser. */
-public void ReInit(SimpleCharStream stream)
+public void ReInit(CharStream stream)
 {
    jjmatchedPos = jjnewStateCnt = 0;
    curLexState = defaultLexState;
@@ -1475,7 +1473,7 @@ private void ReInitRounds()
 }
 
 /** Reinitialise parser. */
-public void ReInit(SimpleCharStream stream, int lexState)
+public void ReInit(CharStream stream, int lexState)
 {
    ReInit(stream);
    SwitchTo(lexState);

Modified: lucene/dev/branches/lucene4199/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/ParseException.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4199/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/ParseException.java?rev=1358760&r1=1358759&r2=1358760&view=diff
==============================================================================
--- lucene/dev/branches/lucene4199/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/ParseException.java (original)
+++ lucene/dev/branches/lucene4199/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/ParseException.java Sun Jul  8 15:33:29 2012
@@ -195,4 +195,4 @@ public class ParseException extends Exce
    }
 
 }
-/* JavaCC - OriginalChecksum=e5376178619291bc9d2c0c6647dc3cef (do not edit this line) */
+/* JavaCC - OriginalChecksum=e449d0e43f3d85deb1260a88b7e90fcd (do not edit this line) */

Modified: lucene/dev/branches/lucene4199/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/Token.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4199/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/Token.java?rev=1358760&r1=1358759&r2=1358760&view=diff
==============================================================================
--- lucene/dev/branches/lucene4199/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/Token.java (original)
+++ lucene/dev/branches/lucene4199/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/Token.java Sun Jul  8 15:33:29 2012
@@ -121,4 +121,4 @@ public class Token {
   }
 
 }
-/* JavaCC - OriginalChecksum=e49c2a0c10d50ff2ebd0639552330ce7 (do not edit this line) */
+/* JavaCC - OriginalChecksum=24643dc85fd6daeec42ceba20b46ee61 (do not edit this line) */

Modified: lucene/dev/branches/lucene4199/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/TokenMgrError.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4199/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/TokenMgrError.java?rev=1358760&r1=1358759&r2=1358760&view=diff
==============================================================================
--- lucene/dev/branches/lucene4199/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/TokenMgrError.java (original)
+++ lucene/dev/branches/lucene4199/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/TokenMgrError.java Sun Jul  8 15:33:29 2012
@@ -138,4 +138,4 @@ public class TokenMgrError extends Error
       this(LexicalError(EOFSeen, lexState, errorLine, errorColumn, errorAfter, curChar), reason);
    }
 }
-/* JavaCC - OriginalChecksum=3aee554f696e5d7a18b1ad330c1de53f (do not edit this line) */
+/* JavaCC - OriginalChecksum=538f0da130356fcc0bc7db621ab0389d (do not edit this line) */