You are viewing a plain text version of this content. The canonical link for it is here.
Posted to java-commits@lucene.apache.org by gs...@apache.org on 2008/01/24 15:58:35 UTC
svn commit: r614891 -
/lucene/java/trunk/src/java/org/apache/lucene/analysis/Token.java
Author: gsingers
Date: Thu Jan 24 06:58:34 2008
New Revision: 614891
URL: http://svn.apache.org/viewvc?rev=614891&view=rev
Log:
LUCENE-1137: Added Token.get/setFlags() Release the hounds
Modified:
lucene/java/trunk/src/java/org/apache/lucene/analysis/Token.java
Modified: lucene/java/trunk/src/java/org/apache/lucene/analysis/Token.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/analysis/Token.java?rev=614891&r1=614890&r2=614891&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/analysis/Token.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/analysis/Token.java Thu Jan 24 06:58:34 2008
@@ -20,6 +20,8 @@
import org.apache.lucene.index.Payload;
import org.apache.lucene.index.TermPositions;
+import java.util.BitSet;
+
/** A Token is an occurence of a term from the text of a field. It consists of
a term's text, the start and end offset of the term in the text of the field,
and a type string.
@@ -76,6 +78,7 @@
public class Token implements Cloneable {
public static final String DEFAULT_TYPE = "word";
+
private static int MIN_BUFFER_SIZE = 10;
/** @deprecated: we will remove this when we remove the
@@ -88,6 +91,7 @@
int startOffset; // start in source text
int endOffset; // end in source text
String type = DEFAULT_TYPE; // lexical type
+ private int flags;
Payload payload;
@@ -116,6 +120,19 @@
type = typ;
}
+ /**
+ * Constructs a Token with null text and start & end
+ * offsets plus the Token type.
+ * @param start start offset
+ * @param end end offset
+ * @param flags The bits to set for this token
+ */
+ public Token(int start, int end, int flags){
+ startOffset = start;
+ endOffset = end;
+ this.flags = flags;
+ }
+
/** Constructs a Token with the given term text, and start
* & end offsets. The type defaults to "word."
* <b>NOTE:</b> for better indexing speed you should
@@ -145,6 +162,23 @@
type = typ;
}
+ /**
+ * Constructs a Token with the given text, start and end
+ * offsets, & type. <b>NOTE:</b> for better indexing
+ * speed you should instead use the char[] termBuffer
+ * methods to set the term text.
+ * @param text
+ * @param start
+ * @param end
+ * @param typ token type bits
+ */
+ public Token(String text, int start, int end, int flags) {
+ termText = text;
+ startOffset = start;
+ endOffset = end;
+ this.flags = flags;
+ }
+
/** Set the position increment. This determines the position of this token
* relative to the previous Token in a {@link TokenStream}, used in phrase
* searching.
@@ -317,7 +351,30 @@
this.type = type;
}
- /**
+ /**
+ * EXPERIMENTAL: While we think this is here to stay, we may want to change it to be a long.
+ * <p/>
+ *
+ * Get the bitset for any bits that have been set. This is completely distinct from {@link #type()}, although they do share similar purposes.
+ * The flags can be used to encode information about the token for use by other {@link org.apache.lucene.analysis.TokenFilter}s.
+ *
+ *
+ * @return The bits
+ */
+ public int getFlags() {
+ return flags;
+ }
+
+ /**
+ * @see #getFlags()
+ */
+ public void setFlags(int flags) {
+ this.flags = flags;
+ }
+
+
+
+ /**
* Returns this Token's payload.
*/
public Payload getPayload() {
@@ -348,7 +405,7 @@
return sb.toString();
}
- /** Resets the term text, payload, and positionIncrement to default.
+ /** Resets the term text, payload, flags, and positionIncrement to default.
* Other fields such as startOffset, endOffset and the token type are
* not reset since they are normally overwritten by the tokenizer. */
public void clear() {
@@ -357,6 +414,7 @@
termLength = 0;
termText = null;
positionIncrement = 1;
+ flags = 0;
// startOffset = endOffset = 0;
// type = DEFAULT_TYPE;
}