You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2011/08/30 17:05:14 UTC
svn commit: r1163251 - in /lucene/dev/trunk: lucene/
lucene/contrib/misc/src/java/org/apache/lucene/document/ lucene/src/java/
lucene/src/java/org/apache/lucene/document/
modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/
Author: mikemccand
Date: Tue Aug 30 15:05:14 2011
New Revision: 1163251
URL: http://svn.apache.org/viewvc?rev=1163251&view=rev
Log:
LUCENE-2308: more cleanup
Modified:
lucene/dev/trunk/lucene/MIGRATE.txt
lucene/dev/trunk/lucene/contrib/misc/src/java/org/apache/lucene/document/FieldSelectorVisitor.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/document/BinaryField.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/document/Field.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/document/FieldType.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/document/StringField.java
lucene/dev/trunk/lucene/src/java/org/apache/lucene/document/TextField.java
lucene/dev/trunk/lucene/src/java/overview.html
lucene/dev/trunk/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java
Modified: lucene/dev/trunk/lucene/MIGRATE.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/MIGRATE.txt?rev=1163251&r1=1163250&r2=1163251&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/MIGRATE.txt (original)
+++ lucene/dev/trunk/lucene/MIGRATE.txt Tue Aug 30 15:05:14 2011
@@ -407,3 +407,115 @@ LUCENE-1458, LUCENE-2111: Flexible Index
- o.a.l.queryParser.QueryParserTokenManager -> o.a.l.queryparser.classic.QueryParserTokenManager
- o.a.l.queryParser.QueryParserToken -> o.a.l.queryparser.classic.Token
- o.a.l.queryParser.QueryParserTokenMgrError -> o.a.l.queryparser.classic.TokenMgrError
+
+
+
+* LUCENE-2308: Separate FieldType from Field instances
+
+With this change, the indexing details (indexed, tokenized, norms,
+indexOptions, stored, etc.) are moved into a separate FieldType
+instance (rather than being stored directly on the Field).
+
+This means you can create the FieldType instance once, up front, for a
+given field, and then re-use that instance whenever you instantiate
+the Field.
+
+Certain field types are pre-defined since they are common cases:
+
+ * StringField: indexes a String value as a single token (ie, does
+ not tokenize). This field turns off norms and indexes only doc
+ IDS (does not index term frequency nor positions). This field
+ does not store its value, but exposes TYPE_STORED as well.
+
+ * BinaryField: a byte[] value that's only stored.
+
+ * TextField: indexes and tokenizes a String, Reader or TokenStream
+ value, without term vectors. This field does not store its value,
+ but exposes TYPE_STORED as well.
+
+If your usage fits one of those common cases you can simply
+instantiate the above class. To use the TYPE_STORED variant, do this
+instead:
+
+ Field f = new Field("field", StringField.TYPE_STORED, "value");
+
+Alternatively, if an existing type is close to what you want but you
+need to make a few changes, you can copy that type and make changes:
+
+ FieldType bodyType = new FieldType(TextField.TYPE_STORED);
+ bodyType.setStoreTermVectors(true);
+
+
+You can of course also create your own FieldType from scratch:
+
+ FieldType t = new FieldType();
+ t.setIndexed(true);
+ t.setStored(true);
+ t.setOmitNorms(true);
+ t.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
+
+FieldType has a freeze() method to prevent further changes. Note that
+once a FieldType is bound to a Field, it's frozen, to help prevent
+confusing bugs.
+
+When migrating from the 3.x API, if you did this before:
+
+ new Field("field", "value", Field.Store.NO, Field.Indexed.NOT_ANALYZED_NO_NORMS)
+
+you can now do this:
+
+ new StringField("field", "value")
+
+(though note that StringField indexes DOCS_ONLY).
+
+If instead the value was stored:
+
+ new Field("field", "value", Field.Store.YES, Field.Indexed.NOT_ANALYZED_NO_NORMS)
+
+you can now do this:
+
+ new Field("field", StringField.TYPE_STORED, "value")
+
+If you didn't omit norms:
+
+ new Field("field", "value", Field.Store.YES, Field.Indexed.NOT_ANALYZED)
+
+you can now do this:
+
+ FieldType ft = new FieldType(StringField.TYPE_STORED);
+ ft.setOmitNorms(false);
+ new Field("field", ft, "value")
+
+If you did this before (value can be String or Reader):
+
+ new Field("field", value, Field.Store.NO, Field.Indexed.ANALYZED)
+
+you can now do this:
+
+ new TextField("field", value)
+
+If instead the value was stored:
+
+ new Field("field", value, Field.Store.YES, Field.Indexed.ANALYZED)
+
+you can now do this:
+
+ new Field("field", TextField.TYPE_STORED, value)
+
+If in addition you omit norms:
+
+ new Field("field", value, Field.Store.YES, Field.Indexed.ANALYZED_NO_NORMS)
+
+you can now do this:
+
+ FieldType ft = new FieldType(TextField.TYPE_STORED);
+ ft.setOmitNorms(true);
+ new Field("field", ft, value)
+
+If you did this before (bytes is a byte[]):
+
+ new Field("field", bytes)
+
+you can now do this:
+
+ new BinaryField("field", bytes)
Modified: lucene/dev/trunk/lucene/contrib/misc/src/java/org/apache/lucene/document/FieldSelectorVisitor.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/misc/src/java/org/apache/lucene/document/FieldSelectorVisitor.java?rev=1163251&r1=1163250&r2=1163251&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/contrib/misc/src/java/org/apache/lucene/document/FieldSelectorVisitor.java (original)
+++ lucene/dev/trunk/lucene/contrib/misc/src/java/org/apache/lucene/document/FieldSelectorVisitor.java Tue Aug 30 15:05:14 2011
@@ -174,7 +174,6 @@ public class FieldSelectorVisitor extend
ft.setStored(true);
ft.setOmitNorms(fi.omitNorms);
ft.setIndexOptions(fi.indexOptions);
- ft.setLazy(true);
if (binary) {
f = new LazyField(in, fi.name, ft, numBytes, pointer, binary, cacheResult);
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/document/BinaryField.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/document/BinaryField.java?rev=1163251&r1=1163250&r2=1163251&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/document/BinaryField.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/document/BinaryField.java Tue Aug 30 15:05:14 2011
@@ -19,6 +19,8 @@ import org.apache.lucene.util.BytesRef;
* limitations under the License.
*/
+/** A field with byte[] value that is only stored. */
+
public final class BinaryField extends Field {
public static final FieldType TYPE_STORED = new FieldType();
@@ -27,23 +29,18 @@ public final class BinaryField extends F
TYPE_STORED.freeze();
}
+ /** Creates a new BinaryField */
public BinaryField(String name, byte[] value) {
super(name, BinaryField.TYPE_STORED, value);
}
+ /** Creates a new BinaryField */
public BinaryField(String name, byte[] value, int offset, int length) {
super(name, BinaryField.TYPE_STORED, value, offset, length);
}
+ /** Creates a new BinaryField */
public BinaryField(String name, BytesRef bytes) {
super(name, BinaryField.TYPE_STORED, bytes);
}
-
- public BinaryField(String name, FieldType custom, byte[] value) {
- super(name, custom, value);
- }
-
- public boolean isNumeric() {
- return false;
- }
}
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/document/Field.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/document/Field.java?rev=1163251&r1=1163250&r2=1163251&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/document/Field.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/document/Field.java Tue Aug 30 15:05:14 2011
@@ -50,6 +50,7 @@ public class Field implements IndexableF
public Field(String name, FieldType type) {
this.name = name;
this.type = type;
+ type.freeze();
}
public Field(String name, FieldType type, Reader reader) {
@@ -63,6 +64,7 @@ public class Field implements IndexableF
this.name = name;
this.fieldsData = reader;
this.type = type;
+ type.freeze();
}
public Field(String name, FieldType type, TokenStream tokenStream) {
@@ -77,6 +79,7 @@ public class Field implements IndexableF
this.fieldsData = null;
this.tokenStream = tokenStream;
this.type = type;
+ type.freeze();
}
public Field(String name, FieldType type, byte[] value) {
@@ -87,12 +90,14 @@ public class Field implements IndexableF
this.fieldsData = new BytesRef(value, offset, length);
this.type = type;
this.name = name;
+ type.freeze();
}
public Field(String name, FieldType type, BytesRef bytes) {
this.fieldsData = bytes;
this.type = type;
this.name = name;
+ type.freeze();
}
public Field(String name, FieldType type, String value) {
@@ -114,6 +119,7 @@ public class Field implements IndexableF
this.type = type;
this.name = name;
this.fieldsData = value;
+ type.freeze();
}
/**
@@ -195,22 +201,6 @@ public class Field implements IndexableF
}
/**
- * Expert: change the value of this field. See <a
- * href="#setValue(java.lang.String)">setValue(String)</a>.
- */
- /*
- public void setValue(byte[] value, int offset, int length) {
- if (!isBinary) {
- throw new IllegalArgumentException(
- "cannot set a byte[] value on a non-binary field");
- }
- fieldsData = value;
- binaryLength = length;
- binaryOffset = offset;
- }
- */
-
- /**
* Expert: sets the token stream to be used for indexing and causes
* isIndexed() and isTokenized() to return true. May be combined with stored
* values from stringValue() or getBinaryValue()
@@ -316,7 +306,7 @@ public class Field implements IndexableF
result.append(name);
result.append(':');
- if (fieldsData != null && type.lazy() == false) {
+ if (fieldsData != null) {
result.append(fieldsData);
}
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/document/FieldType.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/document/FieldType.java?rev=1163251&r1=1163250&r2=1163251&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/document/FieldType.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/document/FieldType.java Tue Aug 30 15:05:14 2011
@@ -29,7 +29,6 @@ public class FieldType {
private boolean storeTermVectorPositions;
private boolean omitNorms;
private IndexOptions indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
- private boolean lazy;
private boolean frozen;
public FieldType(FieldType ref) {
@@ -41,7 +40,7 @@ public class FieldType {
this.storeTermVectorPositions = ref.storeTermVectorPositions();
this.omitNorms = ref.omitNorms();
this.indexOptions = ref.indexOptions();
- this.lazy = ref.lazy();
+ // Do not copy frozen!
}
public FieldType() {
@@ -52,7 +51,9 @@ public class FieldType {
throw new IllegalStateException();
}
}
-
+
+ /** Prevents future changes. Note that when a FieldType
+ * is first bound to a Field instance, it is frozen. */
public void freeze() {
this.frozen = true;
}
@@ -129,15 +130,6 @@ public class FieldType {
this.indexOptions = value;
}
- public boolean lazy() {
- return this.lazy;
- }
-
- public void setLazy(boolean value) {
- checkIfFrozen();
- this.lazy = value;
- }
-
/** Prints a Field for human consumption. */
@Override
public final String toString() {
@@ -177,9 +169,6 @@ public class FieldType {
result.append(",indexOptions=");
result.append(indexOptions);
}
- if (lazy()){
- result.append(",lazy");
- }
return result.toString();
}
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/document/StringField.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/document/StringField.java?rev=1163251&r1=1163250&r2=1163251&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/document/StringField.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/document/StringField.java Tue Aug 30 15:05:14 2011
@@ -19,10 +19,26 @@ import org.apache.lucene.index.FieldInfo
* limitations under the License.
*/
+/** A field that is indexed but not tokenized: the entire
+ * String value is indexed as a single token. For example
+ * this might be used for a 'country' field or an 'id'
+ * field, or any field that you intend to use for sorting
+ * or access through the field cache.
+ *
+ * <p/>This field's value is not stored by default; use the
+ * {@link TYPE_STORED} type (pass it to <code>new
+ * Field</code>) to store the value. */
+
public final class StringField extends Field {
+ /** Indexed, not tokenized, omits norms, indexes
+ * DOCS_ONLY, not stored. */
public static final FieldType TYPE_UNSTORED = new FieldType();
+
+ /** Indexed, not tokenized, omits norms, indexes
+ * DOCS_ONLY, stored */
public static final FieldType TYPE_STORED = new FieldType();
+
static {
TYPE_UNSTORED.setIndexed(true);
TYPE_UNSTORED.setOmitNorms(true);
@@ -36,6 +52,7 @@ public final class StringField extends F
TYPE_STORED.freeze();
}
+ /** Creates a new un-stored StringField */
public StringField(String name, String value) {
super(name, TYPE_UNSTORED, value);
}
@@ -44,8 +61,4 @@ public final class StringField extends F
public String stringValue() {
return (fieldsData == null) ? null : fieldsData.toString();
}
-
- public boolean isNumeric() {
- return false;
- }
}
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/document/TextField.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/document/TextField.java?rev=1163251&r1=1163250&r2=1163251&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/document/TextField.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/document/TextField.java Tue Aug 30 15:05:14 2011
@@ -21,10 +21,22 @@ import java.io.Reader;
import org.apache.lucene.analysis.TokenStream;
+/** A field that is indexed and tokenized, without term
+ * vectors. For example this would be used on a 'body'
+ * field, that contains the bulk of a document's text.
+ *
+ * This field's value is not stored by default; use the
+ * {@link TYPE_STORED} type (pass it to <code>new
+ * Field</code>) to store the value. */
+
public final class TextField extends Field {
+ /* Indexed, tokenized, not stored. */
public static final FieldType TYPE_UNSTORED = new FieldType();
+
+ /* Indexed, tokenized, stored. */
public static final FieldType TYPE_STORED = new FieldType();
+
static {
TYPE_UNSTORED.setIndexed(true);
TYPE_UNSTORED.setTokenized(true);
@@ -35,20 +47,19 @@ public final class TextField extends Fie
TYPE_STORED.setTokenized(true);
TYPE_STORED.freeze();
}
-
+
+ /** Creates a new un-stored TextField */
public TextField(String name, Reader reader) {
super(name, TextField.TYPE_UNSTORED, reader);
}
+ /** Creates a new un-stored TextField */
public TextField(String name, String value) {
super(name, TextField.TYPE_UNSTORED, value);
}
+ /** Creates a new un-stored TextField */
public TextField(String name, TokenStream stream) {
super(name, TextField.TYPE_UNSTORED, stream);
}
-
- public boolean isNumeric() {
- return false;
- }
}
Modified: lucene/dev/trunk/lucene/src/java/overview.html
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/overview.html?rev=1163251&r1=1163250&r2=1163251&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/overview.html (original)
+++ lucene/dev/trunk/lucene/src/java/overview.html Tue Aug 30 15:05:14 2011
@@ -56,8 +56,7 @@ to check if the results are what we expe
new IndexWriter.MaxFieldLength(25000));
Document doc = new Document();
String text = "This is the text to be indexed.";
- doc.add(new Field("fieldname", text, Field.Store.YES,
- Field.Index.ANALYZED));
+ doc.add(new Field("fieldname", TextField.TYPE_STORED, text));
iwriter.addDocument(doc);
iwriter.close();
Modified: lucene/dev/trunk/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java?rev=1163251&r1=1163250&r2=1163251&view=diff
==============================================================================
--- lucene/dev/trunk/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java (original)
+++ lucene/dev/trunk/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/DocMaker.java Tue Aug 30 15:05:14 2011
@@ -179,8 +179,8 @@ public class DocMaker {
protected Config config;
- protected final FieldType valType;
- protected final FieldType bodyValType;
+ protected FieldType valType;
+ protected FieldType bodyValType;
protected ContentSource source;
protected boolean reuseFields;
@@ -194,10 +194,6 @@ public class DocMaker {
private int printNum = 0;
public DocMaker() {
- valType = new FieldType(TextField.TYPE_UNSTORED);
- valType.setOmitNorms(true);
-
- bodyValType = new FieldType(TextField.TYPE_UNSTORED);
}
// create a doc
@@ -465,20 +461,24 @@ public class DocMaker {
boolean termVecPositions = config.get("doc.term.vector.positions", false);
boolean termVecOffsets = config.get("doc.term.vector.offsets", false);
+ valType = new FieldType(TextField.TYPE_UNSTORED);
valType.setStored(stored);
- bodyValType.setStored(bodyStored);
valType.setTokenized(tokenized);
valType.setOmitNorms(!norms);
- bodyValType.setTokenized(bodyTokenized);
- bodyValType.setOmitNorms(!bodyNorms);
-
valType.setStoreTermVectors(termVec);
valType.setStoreTermVectorPositions(termVecPositions);
valType.setStoreTermVectorOffsets(termVecOffsets);
+ valType.freeze();
+
+ bodyValType = new FieldType(TextField.TYPE_UNSTORED);
+ bodyValType.setStored(bodyStored);
+ bodyValType.setTokenized(bodyTokenized);
+ bodyValType.setOmitNorms(!bodyNorms);
bodyValType.setStoreTermVectors(termVec);
bodyValType.setStoreTermVectorPositions(termVecPositions);
bodyValType.setStoreTermVectorOffsets(termVecOffsets);
-
+ bodyValType.freeze();
+
storeBytes = config.get("doc.store.body.bytes", false);
reuseFields = config.get("doc.reuse.fields", true);