You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by us...@apache.org on 2012/07/08 17:33:29 UTC
svn commit: r1358760 - in /lucene/dev/branches/lucene4199/lucene/benchmark:
./ src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/
Author: uschindler
Date: Sun Jul 8 15:33:29 2012
New Revision: 1358760
URL: http://svn.apache.org/viewvc?rev=1358760&view=rev
Log:
LUCENE-4199: Next usecase of FastCharStream for HTMLParser
Added:
lucene/dev/branches/lucene4199/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/CharStream.java (with props)
lucene/dev/branches/lucene4199/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/FastCharStream.java
- copied, changed from r1358756, lucene/dev/branches/lucene4199/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/FastCharStream.java
Removed:
lucene/dev/branches/lucene4199/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/SimpleCharStream.java
Modified:
lucene/dev/branches/lucene4199/lucene/benchmark/build.xml
lucene/dev/branches/lucene4199/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/HTMLParser.java
lucene/dev/branches/lucene4199/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/HTMLParser.jj
lucene/dev/branches/lucene4199/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/HTMLParserTokenManager.java
lucene/dev/branches/lucene4199/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/ParseException.java
lucene/dev/branches/lucene4199/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/Token.java
lucene/dev/branches/lucene4199/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/TokenMgrError.java
Modified: lucene/dev/branches/lucene4199/lucene/benchmark/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4199/lucene/benchmark/build.xml?rev=1358760&r1=1358759&r2=1358760&view=diff
==============================================================================
--- lucene/dev/branches/lucene4199/lucene/benchmark/build.xml (original)
+++ lucene/dev/branches/lucene4199/lucene/benchmark/build.xml Sun Jul 8 15:33:29 2012
@@ -262,9 +262,11 @@
<target name="init" depends="module-build.init,resolve-icu,jar-memory,jar-highlighter,jar-analyzers-common,jar-queryparser,jar-facet"/>
<target name="clean-javacc">
- <fileset dir="src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml" includes="*.java">
- <containsregexp expression="Generated.*By.*JavaCC"/>
- </fileset>
+ <delete>
+ <fileset dir="src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml" includes="*.java">
+ <containsregexp expression="Generated.*By.*JavaCC"/>
+ </fileset>
+ </delete>
</target>
<target name="javacc" depends="init,javacc-check" if="javacc.present">
Added: lucene/dev/branches/lucene4199/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/CharStream.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4199/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/CharStream.java?rev=1358760&view=auto
==============================================================================
--- lucene/dev/branches/lucene4199/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/CharStream.java (added)
+++ lucene/dev/branches/lucene4199/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/CharStream.java Sun Jul 8 15:33:29 2012
@@ -0,0 +1,112 @@
+/* Generated By:JavaCC: Do not edit this line. CharStream.java Version 4.1 */
+/* JavaCCOptions:STATIC=false */
+package org.apache.lucene.benchmark.byTask.feeds.demohtml;
+
+/**
+ * This interface describes a character stream that maintains line and
+ * column number positions of the characters. It also has the capability
+ * to backup the stream to some extent. An implementation of this
+ * interface is used in the TokenManager implementation generated by
+ * JavaCCParser.
+ *
+ * All the methods except backup can be implemented in any fashion. backup
+ * needs to be implemented correctly for the correct operation of the lexer.
+ * Rest of the methods are all used to get information like line number,
+ * column number and the String that constitutes a token and are not used
+ * by the lexer. Hence their implementation won't affect the generated lexer's
+ * operation.
+ */
+
+public interface CharStream {
+
+ /**
+ * Returns the next character from the selected input. The method
+ * of selecting the input is the responsibility of the class
+ * implementing this interface. Can throw any java.io.IOException.
+ */
+ char readChar() throws java.io.IOException;
+
+ /**
+ * Returns the column position of the character last read.
+ * @deprecated
+ * @see #getEndColumn
+ */
+ int getColumn();
+
+ /**
+ * Returns the line number of the character last read.
+ * @deprecated
+ * @see #getEndLine
+ */
+ int getLine();
+
+ /**
+ * Returns the column number of the last character for current token (being
+ * matched after the last call to BeginTOken).
+ */
+ int getEndColumn();
+
+ /**
+ * Returns the line number of the last character for current token (being
+ * matched after the last call to BeginTOken).
+ */
+ int getEndLine();
+
+ /**
+ * Returns the column number of the first character for current token (being
+ * matched after the last call to BeginTOken).
+ */
+ int getBeginColumn();
+
+ /**
+ * Returns the line number of the first character for current token (being
+ * matched after the last call to BeginTOken).
+ */
+ int getBeginLine();
+
+ /**
+ * Backs up the input stream by amount steps. Lexer calls this method if it
+ * had already read some characters, but could not use them to match a
+ * (longer) token. So, they will be used again as the prefix of the next
+ * token and it is the implemetation's responsibility to do this right.
+ */
+ void backup(int amount);
+
+ /**
+ * Returns the next character that marks the beginning of the next token.
+ * All characters must remain in the buffer between two successive calls
+ * to this method to implement backup correctly.
+ */
+ char BeginToken() throws java.io.IOException;
+
+ /**
+ * Returns a string made up of characters from the marked token beginning
+ * to the current buffer position. Implementations have the choice of returning
+ * anything that they want to. For example, for efficiency, one might decide
+ * to just return null, which is a valid implementation.
+ */
+ String GetImage();
+
+ /**
+ * Returns an array of characters that make up the suffix of length 'len' for
+ * the currently matched token. This is used to build up the matched string
+ * for use in actions in the case of MORE. A simple and inefficient
+ * implementation of this is as follows :
+ *
+ * {
+ * String t = GetImage();
+ * return t.substring(t.length() - len, t.length()).toCharArray();
+ * }
+ */
+ char[] GetSuffix(int len);
+
+ /**
+ * The lexer calls this function to indicate that it is done with the stream
+ * and hence implementations can free any resources held by this class.
+ * Again, the body of this function can be just empty and it will not
+ * affect the lexer's operation.
+ */
+ void Done();
+
+}
+/* JavaCC - OriginalChecksum=e26d9399cd34335f985e19c1fa86c11b (do not edit this line) */
Copied: lucene/dev/branches/lucene4199/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/FastCharStream.java (from r1358756, lucene/dev/branches/lucene4199/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/FastCharStream.java)
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4199/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/FastCharStream.java?p2=lucene/dev/branches/lucene4199/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/FastCharStream.java&p1=lucene/dev/branches/lucene4199/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/FastCharStream.java&r1=1358756&r2=1358760&rev=1358760&view=diff
==============================================================================
--- lucene/dev/branches/lucene4199/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/FastCharStream.java (original)
+++ lucene/dev/branches/lucene4199/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/FastCharStream.java Sun Jul 8 15:33:29 2012
@@ -1,5 +1,5 @@
// FastCharStream.java
-package org.apache.lucene.queryparser.classic;
+package org.apache.lucene.benchmark.byTask.feeds.demohtml;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
Modified: lucene/dev/branches/lucene4199/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/HTMLParser.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4199/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/HTMLParser.java?rev=1358760&r1=1358759&r2=1358760&view=diff
==============================================================================
--- lucene/dev/branches/lucene4199/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/HTMLParser.java (original)
+++ lucene/dev/branches/lucene4199/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/HTMLParser.java Sun Jul 8 15:33:29 2012
@@ -29,6 +29,10 @@ public class HTMLParser implements HTMLP
private MyPipedInputStream pipeInStream = null;
private PipedOutputStream pipeOutStream = null;
+ public HTMLParser(Reader reader) {
+ this(new FastCharStream(reader));
+ }
+
private class MyPipedInputStream extends PipedInputStream{
public MyPipedInputStream(){
@@ -464,7 +468,6 @@ null)
/** Generated Token Manager. */
public HTMLParserTokenManager token_source;
- SimpleCharStream jj_input_stream;
/** Current token. */
public Token token;
/** Next token. */
@@ -485,40 +488,9 @@ null)
private boolean jj_rescan = false;
private int jj_gc = 0;
- /** Constructor with InputStream. */
- public HTMLParser(java.io.InputStream stream) {
- this(stream, null);
- }
- /** Constructor with InputStream and supplied encoding */
- public HTMLParser(java.io.InputStream stream, String encoding) {
- try { jj_input_stream = new SimpleCharStream(stream, encoding, 1, 1); } catch(java.io.UnsupportedEncodingException e) { throw new RuntimeException(e); }
- token_source = new HTMLParserTokenManager(jj_input_stream);
- token = new Token();
- jj_ntk = -1;
- jj_gen = 0;
- for (int i = 0; i < 14; i++) jj_la1[i] = -1;
- for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls();
- }
-
- /** Reinitialise. */
- public void ReInit(java.io.InputStream stream) {
- ReInit(stream, null);
- }
- /** Reinitialise. */
- public void ReInit(java.io.InputStream stream, String encoding) {
- try { jj_input_stream.ReInit(stream, encoding, 1, 1); } catch(java.io.UnsupportedEncodingException e) { throw new RuntimeException(e); }
- token_source.ReInit(jj_input_stream);
- token = new Token();
- jj_ntk = -1;
- jj_gen = 0;
- for (int i = 0; i < 14; i++) jj_la1[i] = -1;
- for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls();
- }
-
- /** Constructor. */
- public HTMLParser(java.io.Reader stream) {
- jj_input_stream = new SimpleCharStream(stream, 1, 1);
- token_source = new HTMLParserTokenManager(jj_input_stream);
+ /** Constructor with user supplied CharStream. */
+ public HTMLParser(CharStream stream) {
+ token_source = new HTMLParserTokenManager(stream);
token = new Token();
jj_ntk = -1;
jj_gen = 0;
@@ -527,9 +499,8 @@ null)
}
/** Reinitialise. */
- public void ReInit(java.io.Reader stream) {
- jj_input_stream.ReInit(stream, 1, 1);
- token_source.ReInit(jj_input_stream);
+ public void ReInit(CharStream stream) {
+ token_source.ReInit(stream);
token = new Token();
jj_ntk = -1;
jj_gen = 0;
@@ -631,7 +602,7 @@ null)
return (jj_ntk = jj_nt.kind);
}
- private java.util.List<int[]> jj_expentries = new java.util.ArrayList<int[]>();
+ private java.util.List jj_expentries = new java.util.ArrayList();
private int[] jj_expentry;
private int jj_kind = -1;
private int[] jj_lasttokens = new int[100];
@@ -691,7 +662,7 @@ null)
jj_add_error_token(0, 0);
int[][] exptokseq = new int[jj_expentries.size()][];
for (int i = 0; i < jj_expentries.size(); i++) {
- exptokseq[i] = jj_expentries.get(i);
+ exptokseq[i] = (int[])jj_expentries.get(i);
}
return new ParseException(token, exptokseq, tokenImage);
}
Modified: lucene/dev/branches/lucene4199/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/HTMLParser.jj
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4199/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/HTMLParser.jj?rev=1358760&r1=1358759&r2=1358760&view=diff
==============================================================================
--- lucene/dev/branches/lucene4199/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/HTMLParser.jj (original)
+++ lucene/dev/branches/lucene4199/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/HTMLParser.jj Sun Jul 8 15:33:29 2012
@@ -22,6 +22,7 @@ options {
//DEBUG_LOOKAHEAD = true;
//DEBUG_TOKEN_MANAGER = true;
UNICODE_INPUT = true;
+ USER_CHAR_STREAM=true;
}
PARSER_BEGIN(HTMLParser)
@@ -56,6 +57,10 @@ public class HTMLParser {
private MyPipedInputStream pipeInStream = null;
private PipedOutputStream pipeOutStream = null;
+ public HTMLParser(Reader reader) {
+ this(new FastCharStream(reader));
+ }
+
private class MyPipedInputStream extends PipedInputStream{
public MyPipedInputStream(){
Modified: lucene/dev/branches/lucene4199/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/HTMLParserTokenManager.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4199/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/HTMLParserTokenManager.java?rev=1358760&r1=1358759&r2=1358760&view=diff
==============================================================================
--- lucene/dev/branches/lucene4199/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/HTMLParserTokenManager.java (original)
+++ lucene/dev/branches/lucene4199/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/HTMLParserTokenManager.java Sun Jul 8 15:33:29 2012
@@ -464,7 +464,7 @@ private int jjMoveNfa_0(int startState,
}
else
{
- int hiByte = (curChar >> 8);
+ int hiByte = (int)(curChar >> 8);
int i1 = hiByte >> 6;
long l1 = 1L << (hiByte & 077);
int i2 = (curChar & 0xff) >> 6;
@@ -569,7 +569,7 @@ private int jjMoveNfa_5(int startState,
}
else
{
- int hiByte = (curChar >> 8);
+ int hiByte = (int)(curChar >> 8);
int i1 = hiByte >> 6;
long l1 = 1L << (hiByte & 077);
int i2 = (curChar & 0xff) >> 6;
@@ -670,7 +670,7 @@ private int jjMoveNfa_7(int startState,
}
else
{
- int hiByte = (curChar >> 8);
+ int hiByte = (int)(curChar >> 8);
int i1 = hiByte >> 6;
long l1 = 1L << (hiByte & 077);
int i2 = (curChar & 0xff) >> 6;
@@ -766,7 +766,7 @@ private int jjMoveNfa_4(int startState,
}
else
{
- int hiByte = (curChar >> 8);
+ int hiByte = (int)(curChar >> 8);
int i1 = hiByte >> 6;
long l1 = 1L << (hiByte & 077);
int i2 = (curChar & 0xff) >> 6;
@@ -892,7 +892,7 @@ private int jjMoveNfa_3(int startState,
}
else
{
- int hiByte = (curChar >> 8);
+ int hiByte = (int)(curChar >> 8);
int i1 = hiByte >> 6;
long l1 = 1L << (hiByte & 077);
int i2 = (curChar & 0xff) >> 6;
@@ -1061,7 +1061,7 @@ private int jjMoveNfa_6(int startState,
}
else
{
- int hiByte = (curChar >> 8);
+ int hiByte = (int)(curChar >> 8);
int i1 = hiByte >> 6;
long l1 = 1L << (hiByte & 077);
int i2 = (curChar & 0xff) >> 6;
@@ -1205,7 +1205,7 @@ private int jjMoveNfa_1(int startState,
}
else
{
- int hiByte = (curChar >> 8);
+ int hiByte = (int)(curChar >> 8);
int i1 = hiByte >> 6;
long l1 = 1L << (hiByte & 077);
int i2 = (curChar & 0xff) >> 6;
@@ -1361,7 +1361,7 @@ private int jjMoveNfa_2(int startState,
}
else
{
- int hiByte = (curChar >> 8);
+ int hiByte = (int)(curChar >> 8);
int i1 = hiByte >> 6;
long l1 = 1L << (hiByte & 077);
int i2 = (curChar & 0xff) >> 6;
@@ -1441,25 +1441,23 @@ static final long[] jjtoToken = {
static final long[] jjtoSkip = {
0x400000L,
};
-protected SimpleCharStream input_stream;
+protected CharStream input_stream;
private final int[] jjrounds = new int[28];
private final int[] jjstateSet = new int[56];
protected char curChar;
/** Constructor. */
-public HTMLParserTokenManager(SimpleCharStream stream){
- if (SimpleCharStream.staticFlag)
- throw new Error("ERROR: Cannot use a static CharStream class with a non-static lexical analyzer.");
+public HTMLParserTokenManager(CharStream stream){
input_stream = stream;
}
/** Constructor. */
-public HTMLParserTokenManager(SimpleCharStream stream, int lexState){
+public HTMLParserTokenManager(CharStream stream, int lexState){
this(stream);
SwitchTo(lexState);
}
/** Reinitialise parser. */
-public void ReInit(SimpleCharStream stream)
+public void ReInit(CharStream stream)
{
jjmatchedPos = jjnewStateCnt = 0;
curLexState = defaultLexState;
@@ -1475,7 +1473,7 @@ private void ReInitRounds()
}
/** Reinitialise parser. */
-public void ReInit(SimpleCharStream stream, int lexState)
+public void ReInit(CharStream stream, int lexState)
{
ReInit(stream);
SwitchTo(lexState);
Modified: lucene/dev/branches/lucene4199/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/ParseException.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4199/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/ParseException.java?rev=1358760&r1=1358759&r2=1358760&view=diff
==============================================================================
--- lucene/dev/branches/lucene4199/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/ParseException.java (original)
+++ lucene/dev/branches/lucene4199/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/ParseException.java Sun Jul 8 15:33:29 2012
@@ -195,4 +195,4 @@ public class ParseException extends Exce
}
}
-/* JavaCC - OriginalChecksum=e5376178619291bc9d2c0c6647dc3cef (do not edit this line) */
+/* JavaCC - OriginalChecksum=e449d0e43f3d85deb1260a88b7e90fcd (do not edit this line) */
Modified: lucene/dev/branches/lucene4199/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/Token.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4199/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/Token.java?rev=1358760&r1=1358759&r2=1358760&view=diff
==============================================================================
--- lucene/dev/branches/lucene4199/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/Token.java (original)
+++ lucene/dev/branches/lucene4199/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/Token.java Sun Jul 8 15:33:29 2012
@@ -121,4 +121,4 @@ public class Token {
}
}
-/* JavaCC - OriginalChecksum=e49c2a0c10d50ff2ebd0639552330ce7 (do not edit this line) */
+/* JavaCC - OriginalChecksum=24643dc85fd6daeec42ceba20b46ee61 (do not edit this line) */
Modified: lucene/dev/branches/lucene4199/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/TokenMgrError.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4199/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/TokenMgrError.java?rev=1358760&r1=1358759&r2=1358760&view=diff
==============================================================================
--- lucene/dev/branches/lucene4199/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/TokenMgrError.java (original)
+++ lucene/dev/branches/lucene4199/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml/TokenMgrError.java Sun Jul 8 15:33:29 2012
@@ -138,4 +138,4 @@ public class TokenMgrError extends Error
this(LexicalError(EOFSeen, lexState, errorLine, errorColumn, errorAfter, curChar), reason);
}
}
-/* JavaCC - OriginalChecksum=3aee554f696e5d7a18b1ad330c1de53f (do not edit this line) */
+/* JavaCC - OriginalChecksum=538f0da130356fcc0bc7db621ab0389d (do not edit this line) */