You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2011/08/04 18:55:57 UTC
svn commit: r1153925 [3/8] - in /lucene/dev/branches/fieldtype:
lucene/contrib/demo/src/java/org/apache/lucene/demo/
lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/
lucene/contrib/highlighter/src/java/org/apache/lucene/search/ve...
Modified: lucene/dev/branches/fieldtype/lucene/contrib/xml-query-parser/src/test/org/apache/lucene/xmlparser/TestQueryTemplateManager.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/fieldtype/lucene/contrib/xml-query-parser/src/test/org/apache/lucene/xmlparser/TestQueryTemplateManager.java?rev=1153925&r1=1153924&r2=1153925&view=diff
==============================================================================
--- lucene/dev/branches/fieldtype/lucene/contrib/xml-query-parser/src/test/org/apache/lucene/xmlparser/TestQueryTemplateManager.java (original)
+++ lucene/dev/branches/fieldtype/lucene/contrib/xml-query-parser/src/test/org/apache/lucene/xmlparser/TestQueryTemplateManager.java Thu Aug 4 16:54:58 2011
@@ -10,9 +10,9 @@ import javax.xml.transform.TransformerEx
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.MockAnalyzer;
-import org.apache.lucene.document2.Field;
-import org.apache.lucene.document2.FieldType;
-import org.apache.lucene.document2.TextField;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.FieldType;
+import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
@@ -126,9 +126,9 @@ public class TestQueryTemplateManager ex
}
//Helper method to construct Lucene documents used in our tests
- org.apache.lucene.document2.Document getDocumentFromString(String nameValuePairs)
+ org.apache.lucene.document.Document getDocumentFromString(String nameValuePairs)
{
- org.apache.lucene.document2.Document result=new org.apache.lucene.document2.Document();
+ org.apache.lucene.document.Document result=new org.apache.lucene.document.Document();
StringTokenizer st=new StringTokenizer(nameValuePairs,"\t=");
FieldType customType = new FieldType(TextField.TYPE_UNSTORED);
customType.setStored(true);
Modified: lucene/dev/branches/fieldtype/lucene/src/java/org/apache/lucene/analysis/NumericTokenStream.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/fieldtype/lucene/src/java/org/apache/lucene/analysis/NumericTokenStream.java?rev=1153925&r1=1153924&r2=1153925&view=diff
==============================================================================
--- lucene/dev/branches/fieldtype/lucene/src/java/org/apache/lucene/analysis/NumericTokenStream.java (original)
+++ lucene/dev/branches/fieldtype/lucene/src/java/org/apache/lucene/analysis/NumericTokenStream.java Thu Aug 4 16:54:58 2011
@@ -22,7 +22,7 @@ import org.apache.lucene.util.AttributeI
import org.apache.lucene.util.AttributeReflector;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.NumericUtils;
-import org.apache.lucene.document2.NumericField; // for javadocs
+import org.apache.lucene.document.NumericField;
import org.apache.lucene.search.NumericRangeQuery; // for javadocs
import org.apache.lucene.search.NumericRangeFilter; // for javadocs
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
Modified: lucene/dev/branches/fieldtype/lucene/src/java/org/apache/lucene/analysis/TokenStream.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/fieldtype/lucene/src/java/org/apache/lucene/analysis/TokenStream.java?rev=1153925&r1=1153924&r2=1153925&view=diff
==============================================================================
--- lucene/dev/branches/fieldtype/lucene/src/java/org/apache/lucene/analysis/TokenStream.java (original)
+++ lucene/dev/branches/fieldtype/lucene/src/java/org/apache/lucene/analysis/TokenStream.java Thu Aug 4 16:54:58 2011
@@ -21,8 +21,8 @@ import java.io.IOException;
import java.io.Closeable;
import java.lang.reflect.Modifier;
-import org.apache.lucene.document2.Document;
-import org.apache.lucene.document2.Field;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.util.Attribute;
import org.apache.lucene.util.AttributeImpl;
Modified: lucene/dev/branches/fieldtype/lucene/src/java/org/apache/lucene/document/AbstractField.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/fieldtype/lucene/src/java/org/apache/lucene/document/AbstractField.java?rev=1153925&r1=1153924&r2=1153925&view=diff
==============================================================================
--- lucene/dev/branches/fieldtype/lucene/src/java/org/apache/lucene/document/AbstractField.java (original)
+++ lucene/dev/branches/fieldtype/lucene/src/java/org/apache/lucene/document/AbstractField.java Thu Aug 4 16:54:58 2011
@@ -1,308 +0,0 @@
-package org.apache.lucene.document;
-/**
- * Copyright 2006 The Apache Software Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.index.FieldInvertState; // for javadocs
-import org.apache.lucene.search.PhraseQuery; // for javadocs
-import org.apache.lucene.search.spans.SpanQuery; // for javadocs
-import org.apache.lucene.util.BytesRef;
-import org.apache.lucene.util.StringHelper; // for javadocs
-
-
-/**
- *
- *
- **/
-public abstract class AbstractField implements Fieldable {
-
- protected String name = "body";
- protected boolean storeTermVector = false;
- protected boolean storeOffsetWithTermVector = false;
- protected boolean storePositionWithTermVector = false;
- protected boolean omitNorms = false;
- protected boolean isStored = false;
- protected boolean isIndexed = true;
- protected boolean isTokenized = true;
- protected boolean isBinary = false;
- protected boolean lazy = false;
- protected boolean omitTermFreqAndPositions = false;
- protected float boost = 1.0f;
- // the data object for all different kind of field values
- protected Object fieldsData = null;
- // pre-analyzed tokenStream for indexed fields
- protected TokenStream tokenStream;
- // length/offset for all primitive types
- protected int binaryLength;
- protected int binaryOffset;
-
- protected AbstractField()
- {
- }
-
- protected AbstractField(String name, Field.Store store, Field.Index index, Field.TermVector termVector) {
- if (name == null)
- throw new NullPointerException("name cannot be null");
- this.name = StringHelper.intern(name); // field names are interned
-
- this.isStored = store.isStored();
- this.isIndexed = index.isIndexed();
- this.isTokenized = index.isAnalyzed();
- this.omitNorms = index.omitNorms();
-
- this.isBinary = false;
-
- setStoreTermVector(termVector);
- }
-
- /** Sets the boost factor hits on this field. This value will be
- * multiplied into the score of all hits on this this field of this
- * document.
- *
- * <p>The boost is multiplied by {@link org.apache.lucene.document.Document#getBoost()} of the document
- * containing this field. If a document has multiple fields with the same
- * name, all such values are multiplied together. This product is then
- * used to compute the norm factor for the field. By
- * default, in the {@link
- * org.apache.lucene.search.Similarity#computeNorm(FieldInvertState)} method, the boost value is multiplied
- * by the length normalization factor and then
- * rounded by {@link org.apache.lucene.search.Similarity#encodeNormValue(float)} before it is stored in the
- * index. One should attempt to ensure that this product does not overflow
- * the range of that encoding.
- *
- * @see org.apache.lucene.document.Document#setBoost(float)
- * @see org.apache.lucene.search.Similarity#computeNorm(FieldInvertState)
- * @see org.apache.lucene.search.Similarity#encodeNormValue(float)
- */
- public void setBoost(float boost) {
- this.boost = boost;
- }
-
- /** Returns the boost factor for hits for this field.
- *
- * <p>The default value is 1.0.
- *
- * <p>Note: this value is not stored directly with the document in the index.
- * Documents returned from {@link org.apache.lucene.index.IndexReader#document(int)} and
- * {@link org.apache.lucene.search.IndexSearcher#doc(int)} may thus not have the same value present as when
- * this field was indexed.
- *
- * @see #setBoost(float)
- */
- public float getBoost() {
- return boost;
- }
-
- /** Returns the name of the field as an interned string.
- * For example "date", "title", "body", ...
- */
- public String name() { return name; }
-
- protected void setStoreTermVector(Field.TermVector termVector) {
- this.storeTermVector = termVector.isStored();
- this.storePositionWithTermVector = termVector.withPositions();
- this.storeOffsetWithTermVector = termVector.withOffsets();
- }
-
- /** True iff the value of the field is to be stored in the index for return
- with search hits. It is an error for this to be true if a field is
- Reader-valued. */
- public final boolean isStored() { return isStored; }
-
- /** True iff the value of the field is to be indexed, so that it may be
- searched on. */
- public final boolean isIndexed() { return isIndexed; }
-
- /** True iff the value of the field should be tokenized as text prior to
- indexing. Un-tokenized fields are indexed as a single word and may not be
- Reader-valued. */
- public final boolean isTokenized() { return isTokenized; }
-
- /** True iff the term or terms used to index this field are stored as a term
- * vector, available from {@link org.apache.lucene.index.IndexReader#getTermFreqVector(int,String)}.
- * These methods do not provide access to the original content of the field,
- * only to terms used to index it. If the original content must be
- * preserved, use the <code>stored</code> attribute instead.
- *
- * @see org.apache.lucene.index.IndexReader#getTermFreqVector(int, String)
- */
- public final boolean isTermVectorStored() { return storeTermVector; }
-
- /**
- * True iff terms are stored as term vector together with their offsets
- * (start and end position in source text).
- */
- public boolean isStoreOffsetWithTermVector(){
- return storeOffsetWithTermVector;
- }
-
- /**
- * True iff terms are stored as term vector together with their token positions.
- */
- public boolean isStorePositionWithTermVector(){
- return storePositionWithTermVector;
- }
-
- /** True iff the value of the filed is stored as binary */
- public final boolean isBinary() {
- return isBinary;
- }
-
-
- private byte[] getBinaryValue() {
- return getBinaryValue(null);
- }
-
- private byte[] getBinaryValue(byte[] result /* unused */){
- if (isBinary || fieldsData instanceof byte[])
- return (byte[]) fieldsData;
- else
- return null;
- }
-
- public boolean isNumeric() {
- return false;
- }
-
- public BytesRef binaryValue(BytesRef reuse) {
- final byte[] bytes = getBinaryValue();
- if (bytes != null) {
- if (reuse == null) {
- return new BytesRef(bytes,
- getBinaryOffset(),
- getBinaryLength());
- } else {
- reuse.bytes = bytes;
- reuse.offset = getBinaryOffset();
- reuse.length = getBinaryLength();
- return reuse;
- }
- } else {
- return null;
- }
- }
-
- /**
- * Returns length of byte[] segment that is used as value, if Field is not binary
- * returned value is undefined
- * @return length of byte[] segment that represents this Field value
- */
- private int getBinaryLength() {
- if (isBinary) {
- return binaryLength;
- } else if (fieldsData instanceof byte[])
- return ((byte[]) fieldsData).length;
- else
- return 0;
- }
-
- /**
- * Returns offset into byte[] segment that is used as value, if Field is not binary
- * returned value is undefined
- * @return index of the first character in byte[] segment that represents this Field value
- */
- public int getBinaryOffset() {
- return binaryOffset;
- }
-
- /** True if norms are omitted for this indexed field */
- public boolean getOmitNorms() { return omitNorms; }
-
- /** @see #setOmitTermFreqAndPositions */
- public boolean getOmitTermFreqAndPositions() { return omitTermFreqAndPositions; }
-
- /** Expert:
- *
- * If set, omit normalization factors associated with this indexed field.
- * This effectively disables indexing boosts and length normalization for this field.
- */
- public void setOmitNorms(boolean omitNorms) { this.omitNorms=omitNorms; }
-
- /** Expert:
- *
- * If set, omit term freq, positions and payloads from
- * postings for this field.
- *
- * <p><b>NOTE</b>: While this option reduces storage space
- * required in the index, it also means any query
- * requiring positional information, such as {@link
- * PhraseQuery} or {@link SpanQuery} subclasses will
- * silently fail to find results.
- */
- public void setOmitTermFreqAndPositions(boolean omitTermFreqAndPositions) { this.omitTermFreqAndPositions=omitTermFreqAndPositions; }
-
- public boolean isLazy() {
- return lazy;
- }
-
- /** Prints a Field for human consumption. */
- @Override
- public final String toString() {
- StringBuilder result = new StringBuilder();
- if (isStored) {
- result.append("stored");
- }
- if (isIndexed) {
- if (result.length() > 0)
- result.append(",");
- result.append("indexed");
- }
- if (isTokenized) {
- if (result.length() > 0)
- result.append(",");
- result.append("tokenized");
- }
- if (storeTermVector) {
- if (result.length() > 0)
- result.append(",");
- result.append("termVector");
- }
- if (storeOffsetWithTermVector) {
- if (result.length() > 0)
- result.append(",");
- result.append("termVectorOffsets");
- }
- if (storePositionWithTermVector) {
- if (result.length() > 0)
- result.append(",");
- result.append("termVectorPosition");
- }
- if (isBinary) {
- if (result.length() > 0)
- result.append(",");
- result.append("binary");
- }
- if (omitNorms) {
- result.append(",omitNorms");
- }
- if (omitTermFreqAndPositions) {
- result.append(",omitTermFreqAndPositions");
- }
- if (lazy){
- result.append(",lazy");
- }
- result.append('<');
- result.append(name);
- result.append(':');
-
- if (fieldsData != null && lazy == false) {
- result.append(fieldsData);
- }
-
- result.append('>');
- return result.toString();
- }
-}
Modified: lucene/dev/branches/fieldtype/lucene/src/java/org/apache/lucene/document/Document.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/fieldtype/lucene/src/java/org/apache/lucene/document/Document.java?rev=1153925&r1=1153924&r2=1153925&view=diff
==============================================================================
--- lucene/dev/branches/fieldtype/lucene/src/java/org/apache/lucene/document/Document.java (original)
+++ lucene/dev/branches/fieldtype/lucene/src/java/org/apache/lucene/document/Document.java Thu Aug 4 16:54:58 2011
@@ -17,44 +17,35 @@ package org.apache.lucene.document;
* limitations under the License.
*/
-import java.io.Reader;
import java.util.*;
-import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.index.IndexReader; // for javadoc
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.search.IndexSearcher; // for javadoc
import org.apache.lucene.search.ScoreDoc; // for javadoc
-import org.apache.lucene.util.BytesRef;
/** Documents are the unit of indexing and search.
*
* A Document is a set of fields. Each field has a name and a textual value.
- * A field may be {@link Fieldable#isStored() stored} with the document, in which
+ * A field may be {@link IndexableField#stored() stored} with the document, in which
* case it is returned with search hits on the document. Thus each document
* should typically contain one or more stored fields which uniquely identify
* it.
*
- * <p>Note that fields which are <i>not</i> {@link Fieldable#isStored() stored} are
+ * <p>Note that fields which are <i>not</i> {@link IndexableField#stored() stored} are
* <i>not</i> available in documents retrieved from the index, e.g. with {@link
* ScoreDoc#doc} or {@link IndexReader#document(int)}.
*/
public final class Document implements Iterable<IndexableField> {
- List<Fieldable> fields = new ArrayList<Fieldable>();
- private float boost = 1.0f;
+ List<IndexableField> fields = new ArrayList<IndexableField>();
/** Constructs a new document with no fields. */
public Document() {}
// @Override not until Java 1.6
public Iterator<IndexableField> iterator() {
- // nocommit this shim code is temporary!! only here as an
- // example... we will fix it "properly" for LUCENE-2308
-
- // nocommit -- must multiply in docBoost to each
- // provided field
return new Iterator<IndexableField>() {
private int fieldUpto = 0;
@@ -68,112 +59,11 @@ public final class Document implements I
}
public IndexableField next() {
- final Fieldable field = fields.get(fieldUpto++);
- return new IndexableField() {
- public String name() {
- return field.name();
- }
-
- public float boost() {
- return boost * field.getBoost();
- }
-
- public boolean stored() {
- return field.isStored();
- }
-
- public BytesRef binaryValue(BytesRef reuse) {
- return field.binaryValue(reuse);
- }
-
- public String stringValue() {
- return field.stringValue();
- }
-
- public Reader readerValue() {
- return field.readerValue();
- }
-
- public TokenStream tokenStreamValue() {
- return field.tokenStreamValue();
- }
-
- public boolean numeric() {
- return field instanceof NumericField;
- }
-
- public org.apache.lucene.document2.NumericField.DataType numericDataType() {
- return field.getDataType();
- }
-
- public Number numericValue() {
- return field.getNumericValue();
- }
-
- public boolean indexed() {
- return field.isIndexed();
- }
-
- public boolean tokenized() {
- return field.isTokenized();
- }
-
- public boolean omitNorms() {
- return field.getOmitNorms();
- }
-
- public boolean omitTermFreqAndPositions() {
- return field.getOmitTermFreqAndPositions();
- }
-
- public boolean storeTermVectors() {
- return field.isTermVectorStored();
- }
-
- public boolean storeTermVectorOffsets() {
- return field.isStoreOffsetWithTermVector();
- }
-
- public boolean storeTermVectorPositions() {
- return field.isStorePositionWithTermVector();
- }
- };
+ return fields.get(fieldUpto++);
}
};
}
- /** Sets a boost factor for hits on any field of this document. This value
- * will be multiplied into the score of all hits on this document.
- *
- * <p>The default value is 1.0.
- *
- * <p>Values are multiplied into the value of {@link Fieldable#getBoost()} of
- * each field in this document. Thus, this method in effect sets a default
- * boost for the fields of this document.
- *
- * @see Fieldable#setBoost(float)
- */
- public void setBoost(float boost) {
- this.boost = boost;
- }
-
- /** Returns, at indexing time, the boost factor as set by {@link #setBoost(float)}.
- *
- * <p>Note that once a document is indexed this value is no longer available
- * from the index. At search time, for retrieved documents, this method always
- * returns 1. This however does not mean that the boost value set at indexing
- * time was ignored - it was just combined with other indexing time factors and
- * stored elsewhere, for better indexing and search performance. (For more
- * information see the "norm(t,d)" part of the scoring formula in
- * {@link org.apache.lucene.search.Similarity Similarity}.)
- *
- * @see #setBoost(float)
- */
- // @Override not until Java 1.6
- public float getBoost() {
- return boost;
- }
-
/**
* <p>Adds a field to a document. Several fields may be added with
* the same name. In this case, if the fields are indexed, their text is
@@ -184,7 +74,7 @@ public final class Document implements I
* a document has to be deleted from an index and a new changed version of that
* document has to be added.</p>
*/
- public final void add(Fieldable field) {
+ public final void add(IndexableField field) {
fields.add(field);
}
@@ -199,9 +89,9 @@ public final class Document implements I
* document has to be added.</p>
*/
public final void removeField(String name) {
- Iterator<Fieldable> it = fields.iterator();
+ Iterator<IndexableField> it = fields.iterator();
while (it.hasNext()) {
- Fieldable field = it.next();
+ IndexableField field = it.next();
if (field.name().equals(name)) {
it.remove();
return;
@@ -219,147 +109,15 @@ public final class Document implements I
* document has to be added.</p>
*/
public final void removeFields(String name) {
- Iterator<Fieldable> it = fields.iterator();
+ Iterator<IndexableField> it = fields.iterator();
while (it.hasNext()) {
- Fieldable field = it.next();
+ IndexableField field = it.next();
if (field.name().equals(name)) {
it.remove();
}
}
}
- /** Returns a field with the given name if any exist in this document, or
- * null. If multiple fields exists with this name, this method returns the
- * first value added.
- * Do not use this method with lazy loaded fields or {@link NumericField}.
- * @deprecated use {@link #getFieldable} instead and cast depending on
- * data type.
- * @throws ClassCastException if you try to retrieve a numerical or
- * lazy loaded field.
- */
- @Deprecated
- public final Field getField(String name) {
- return (Field) getFieldable(name);
- }
-
-
- /** Returns a field with the given name if any exist in this document, or
- * null. If multiple fields exists with this name, this method returns the
- * first value added.
- */
- public Fieldable getFieldable(String name) {
- for (Fieldable field : fields) {
- if (field.name().equals(name))
- return field;
- }
- return null;
- }
-
- /** Returns the string value of the field with the given name if any exist in
- * this document, or null. If multiple fields exist with this name, this
- * method returns the first value added. If only binary fields with this name
- * exist, returns null.
- * For {@link NumericField} it returns the string value of the number. If you want
- * the actual {@code NumericField} instance back, use {@link #getFieldable}.
- */
- public final String get(String name) {
- for (Fieldable field : fields) {
- if (field.name().equals(name) && (!field.isBinary()))
- return field.stringValue();
- }
- return null;
- }
-
- /** Returns a List of all the fields in a document.
- * <p>Note that fields which are <i>not</i> {@link Fieldable#isStored() stored} are
- * <i>not</i> available in documents retrieved from the
- * index, e.g. {@link IndexSearcher#doc(int)} or {@link
- * IndexReader#document(int)}.
- */
- public final List<Fieldable> getFields() {
- return fields;
- }
-
- private final static Field[] NO_FIELDS = new Field[0];
-
- /**
- * Returns an array of {@link Field}s with the given name.
- * This method returns an empty array when there are no
- * matching fields. It never returns null.
- * Do not use this method with lazy loaded fields or {@link NumericField}.
- *
- * @param name the name of the field
- * @return a <code>Field[]</code> array
- * @deprecated use {@link #getFieldable} instead and cast depending on
- * data type.
- * @throws ClassCastException if you try to retrieve a numerical or
- * lazy loaded field.
- */
- @Deprecated
- public final Field[] getFields(String name) {
- List<Field> result = new ArrayList<Field>();
- for (Fieldable field : fields) {
- if (field.name().equals(name)) {
- result.add((Field) field);
- }
- }
-
- if (result.size() == 0)
- return NO_FIELDS;
-
- return result.toArray(new Field[result.size()]);
- }
-
-
- private final static Fieldable[] NO_FIELDABLES = new Fieldable[0];
-
- /**
- * Returns an array of {@link Fieldable}s with the given name.
- * This method returns an empty array when there are no
- * matching fields. It never returns null.
- *
- * @param name the name of the field
- * @return a <code>Fieldable[]</code> array
- */
- public Fieldable[] getFieldables(String name) {
- List<Fieldable> result = new ArrayList<Fieldable>();
- for (Fieldable field : fields) {
- if (field.name().equals(name)) {
- result.add(field);
- }
- }
-
- if (result.size() == 0)
- return NO_FIELDABLES;
-
- return result.toArray(new Fieldable[result.size()]);
- }
-
-
- private final static String[] NO_STRINGS = new String[0];
-
- /**
- * Returns an array of values of the field specified as the method parameter.
- * This method returns an empty array when there are no
- * matching fields. It never returns null.
- * For {@link NumericField}s it returns the string value of the number. If you want
- * the actual {@code NumericField} instances back, use {@link #getFieldables}.
- * @param name the name of the field
- * @return a <code>String[]</code> of field values
- */
- public final String[] getValues(String name) {
- List<String> result = new ArrayList<String>();
- for (Fieldable field : fields) {
- if (field.name().equals(name) && (!field.isBinary()))
- result.add(field.stringValue());
- }
-
- if (result.size() == 0)
- return NO_STRINGS;
-
- return result.toArray(new String[result.size()]);
- }
-
private final static byte[][] NO_BYTES = new byte[0][];
/**
@@ -373,8 +131,8 @@ public final class Document implements I
*/
public final byte[][] getBinaryValues(String name) {
List<byte[]> result = new ArrayList<byte[]>();
- for (Fieldable field : fields) {
- if (field.name().equals(name) && (field.isBinary()))
+ for (IndexableField field : fields) {
+ if (field.name().equals(name) && ((Field) field).isBinary())
result.add(field.binaryValue(null).bytes);
}
@@ -394,12 +152,52 @@ public final class Document implements I
* @return a <code>byte[]</code> containing the binary field value or <code>null</code>
*/
public final byte[] getBinaryValue(String name) {
- for (Fieldable field : fields) {
- if (field.name().equals(name) && (field.isBinary()))
+ for (IndexableField field : fields) {
+ if (field.name().equals(name) && ((Field) field).isBinary())
return field.binaryValue(null).bytes;
}
return null;
}
+
+ public final IndexableField getField(String name) {
+ for (IndexableField field : fields) {
+ if (field.name().equals(name))
+ return field;
+ }
+ return null;
+ }
+
+ private final static IndexableField[] NO_FIELDS = new IndexableField[0];
+
+ public IndexableField[] getFields(String name) {
+ List<IndexableField> result = new ArrayList<IndexableField>();
+ for (IndexableField field : fields) {
+ if (field.name().equals(name)) {
+ result.add(field);
+ }
+ }
+
+ if (result.size() == 0)
+ return NO_FIELDS;
+
+ return result.toArray(new IndexableField[result.size()]);
+ }
+
+ public Integer size() {
+ return fields.size();
+ }
+
+ public final List<IndexableField> getFields() {
+ return fields;
+ }
+
+ public final String get(String name) {
+ for (IndexableField field : fields) {
+ if (field.name().equals(name) && (field.binaryValue(null) == null))
+ return field.stringValue();
+ }
+ return null;
+ }
/** Prints the fields of a document for human consumption. */
@Override
@@ -407,7 +205,7 @@ public final class Document implements I
StringBuilder buffer = new StringBuilder();
buffer.append("Document<");
for (int i = 0; i < fields.size(); i++) {
- Fieldable field = fields.get(i);
+ IndexableField field = fields.get(i);
buffer.append(field.toString());
if (i != fields.size()-1)
buffer.append(" ");
Modified: lucene/dev/branches/fieldtype/lucene/src/java/org/apache/lucene/document/Field.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/fieldtype/lucene/src/java/org/apache/lucene/document/Field.java?rev=1153925&r1=1153924&r2=1153925&view=diff
==============================================================================
--- lucene/dev/branches/fieldtype/lucene/src/java/org/apache/lucene/document/Field.java (original)
+++ lucene/dev/branches/fieldtype/lucene/src/java/org/apache/lucene/document/Field.java Thu Aug 4 16:54:58 2011
@@ -20,547 +20,364 @@ package org.apache.lucene.document;
import java.io.Reader;
import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.document2.NumericField.DataType;
-import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.document.NumericField;
+import org.apache.lucene.index.IndexableField;
+import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.StringHelper;
/**
- A field is a section of a Document. Each field has two parts, a name and a
- value. Values may be free text, provided as a String or as a Reader, or they
- may be atomic keywords, which are not further processed. Such keywords may
- be used to represent dates, urls, etc. Fields are optionally stored in the
- index, so that they may be returned with hits on the document.
- */
+ * A field is a section of a Document. Each field has two parts, a name and a
+ * value. Values may be free text, provided as a String or as a Reader, or they
+ * may be atomic keywords, which are not further processed. Such keywords may be
+ * used to represent dates, urls, etc. Fields are optionally stored in the
+ * index, so that they may be returned with hits on the document.
+ */
-public final class Field extends AbstractField implements Fieldable {
+public class Field implements IndexableField {
- /** Specifies whether and how a field should be stored. */
- public static enum Store {
-
- /** Store the original field value in the index. This is useful for short texts
- * like a document's title which should be displayed with the results. The
- * value is stored in its original form, i.e. no analyzer is used before it is
- * stored.
- */
- YES {
- @Override
- public boolean isStored() { return true; }
- },
-
- /** Do not store the field value in the index. */
- NO {
- @Override
- public boolean isStored() { return false; }
- };
-
- public abstract boolean isStored();
- }
-
- /** Specifies whether and how a field should be indexed. */
- public static enum Index {
-
- /** Do not index the field value. This field can thus not be searched,
- * but one can still access its contents provided it is
- * {@link Field.Store stored}. */
- NO {
- @Override
- public boolean isIndexed() { return false; }
- @Override
- public boolean isAnalyzed() { return false; }
- @Override
- public boolean omitNorms() { return true; }
- },
-
- /** Index the tokens produced by running the field's
- * value through an Analyzer. This is useful for
- * common text. */
- ANALYZED {
- @Override
- public boolean isIndexed() { return true; }
- @Override
- public boolean isAnalyzed() { return true; }
- @Override
- public boolean omitNorms() { return false; }
- },
-
- /** Index the field's value without using an Analyzer, so it can be searched.
- * As no analyzer is used the value will be stored as a single term. This is
- * useful for unique Ids like product numbers.
- */
- NOT_ANALYZED {
- @Override
- public boolean isIndexed() { return true; }
- @Override
- public boolean isAnalyzed() { return false; }
- @Override
- public boolean omitNorms() { return false; }
- },
-
- /** Expert: Index the field's value without an Analyzer,
- * and also disable the indexing of norms. Note that you
- * can also separately enable/disable norms by calling
- * {@link Field#setOmitNorms}. No norms means that
- * index-time field and document boosting and field
- * length normalization are disabled. The benefit is
- * less memory usage as norms take up one byte of RAM
- * per indexed field for every document in the index,
- * during searching. Note that once you index a given
- * field <i>with</i> norms disabled, enabling norms will
- * have no effect. In other words, for this to have the
- * above described effect on a field, one instance of
- * that field must be indexed with NOT_ANALYZED_NO_NORMS
- * at some point. */
- NOT_ANALYZED_NO_NORMS {
- @Override
- public boolean isIndexed() { return true; }
- @Override
- public boolean isAnalyzed() { return false; }
- @Override
- public boolean omitNorms() { return true; }
- },
-
- /** Expert: Index the tokens produced by running the
- * field's value through an Analyzer, and also
- * separately disable the storing of norms. See
- * {@link #NOT_ANALYZED_NO_NORMS} for what norms are
- * and why you may want to disable them. */
- ANALYZED_NO_NORMS {
- @Override
- public boolean isIndexed() { return true; }
- @Override
- public boolean isAnalyzed() { return true; }
- @Override
- public boolean omitNorms() { return true; }
- };
-
- /** Get the best representation of the index given the flags. */
- public static Index toIndex(boolean indexed, boolean analyzed) {
- return toIndex(indexed, analyzed, false);
- }
-
- /** Expert: Get the best representation of the index given the flags. */
- public static Index toIndex(boolean indexed, boolean analyzed, boolean omitNorms) {
-
- // If it is not indexed nothing else matters
- if (!indexed) {
- return Index.NO;
- }
-
- // typical, non-expert
- if (!omitNorms) {
- if (analyzed) {
- return Index.ANALYZED;
- }
- return Index.NOT_ANALYZED;
- }
-
- // Expert: Norms omitted
- if (analyzed) {
- return Index.ANALYZED_NO_NORMS;
- }
- return Index.NOT_ANALYZED_NO_NORMS;
- }
+ protected FieldType type;
+ protected String name = "body";
+ // the data object for all different kind of field values
+ protected Object fieldsData = null;
+ // pre-analyzed tokenStream for indexed fields
+ protected TokenStream tokenStream;
+ protected boolean isBinary = false;
+ // length/offset for all primitive types
+ protected int binaryLength;
+ protected int binaryOffset;
+
+ protected float boost = 1.0f;
- public abstract boolean isIndexed();
- public abstract boolean isAnalyzed();
- public abstract boolean omitNorms();
+ public Field(String name, FieldType type) {
+ this.name = name;
+ this.type = type;
}
-
- /** Specifies whether and how a field should have term vectors. */
- public static enum TermVector {
-
- /** Do not store term vectors.
- */
- NO {
- @Override
- public boolean isStored() { return false; }
- @Override
- public boolean withPositions() { return false; }
- @Override
- public boolean withOffsets() { return false; }
- },
-
- /** Store the term vectors of each document. A term vector is a list
- * of the document's terms and their number of occurrences in that document. */
- YES {
- @Override
- public boolean isStored() { return true; }
- @Override
- public boolean withPositions() { return false; }
- @Override
- public boolean withOffsets() { return false; }
- },
-
- /**
- * Store the term vector + token position information
- *
- * @see #YES
- */
- WITH_POSITIONS {
- @Override
- public boolean isStored() { return true; }
- @Override
- public boolean withPositions() { return true; }
- @Override
- public boolean withOffsets() { return false; }
- },
+
+ public Field(String name, FieldType type, Reader reader) {
+ if (name == null)
+ throw new NullPointerException("name cannot be null");
+ if (reader == null)
+ throw new NullPointerException("reader cannot be null");
- /**
- * Store the term vector + Token offset information
- *
- * @see #YES
- */
- WITH_OFFSETS {
- @Override
- public boolean isStored() { return true; }
- @Override
- public boolean withPositions() { return false; }
- @Override
- public boolean withOffsets() { return true; }
- },
+ this.name = StringHelper.intern(name); // field names are interned
+ this.fieldsData = reader;
+ this.type = type;
+ }
+
+ public Field(String name, FieldType type, TokenStream tokenStream) {
+ if (name == null)
+ throw new NullPointerException("name cannot be null");
+ if (tokenStream == null)
+ throw new NullPointerException("tokenStream cannot be null");
- /**
- * Store the term vector + Token position and offset information
- *
- * @see #YES
- * @see #WITH_POSITIONS
- * @see #WITH_OFFSETS
- */
- WITH_POSITIONS_OFFSETS {
- @Override
- public boolean isStored() { return true; }
- @Override
- public boolean withPositions() { return true; }
- @Override
- public boolean withOffsets() { return true; }
- };
-
- /** Get the best representation of a TermVector given the flags. */
- public static TermVector toTermVector(boolean stored, boolean withOffsets, boolean withPositions) {
-
- // If it is not stored, nothing else matters.
- if (!stored) {
- return TermVector.NO;
- }
-
- if (withOffsets) {
- if (withPositions) {
- return Field.TermVector.WITH_POSITIONS_OFFSETS;
- }
- return Field.TermVector.WITH_OFFSETS;
- }
-
- if (withPositions) {
- return Field.TermVector.WITH_POSITIONS;
- }
- return Field.TermVector.YES;
- }
-
- public abstract boolean isStored();
- public abstract boolean withPositions();
- public abstract boolean withOffsets();
+ this.name = StringHelper.intern(name); // field names are interned
+ this.fieldsData = null;
+ this.tokenStream = tokenStream;
+ this.type = type;
}
+ public Field(String name, FieldType type, byte[] value) {
+ this(name, type, value, 0, value.length);
+ }
- /** The value of the field as a String, or null. If null, the Reader value or
- * binary value is used. Exactly one of stringValue(),
- * readerValue(), and getBinaryValue() must be set. */
- public String stringValue() { return fieldsData instanceof String ? (String)fieldsData : null; }
+ public Field(String name, FieldType type, byte[] value, int offset, int length) {
+ this.isBinary = true;
+ this.fieldsData = value;
+ this.type = type;
+ this.binaryOffset = offset;
+ this.binaryLength = length;
+ this.name = StringHelper.intern(name);
+ }
+
+ public Field(String name, FieldType type, String value) {
+ this(name, true, type, value);
+ }
- /** The value of the field as a Reader, or null. If null, the String value or
- * binary value is used. Exactly one of stringValue(),
- * readerValue(), and getBinaryValue() must be set. */
- public Reader readerValue() { return fieldsData instanceof Reader ? (Reader)fieldsData : null; }
+ public Field(String name, boolean internName, FieldType type, String value) {
+ if (name == null) {
+ throw new IllegalArgumentException("name cannot be null");
+ }
+ if (value == null) {
+ throw new IllegalArgumentException("value cannot be null");
+ }
+ if (!type.stored() && !type.indexed()) {
+ throw new IllegalArgumentException("it doesn't make sense to have a field that "
+ + "is neither indexed nor stored");
+ }
+ if (!type.indexed() && !type.tokenized() && (type.storeTermVectors())) {
+ throw new IllegalArgumentException("cannot store term vector information "
+ + "for a field that is not indexed");
+ }
- /** The TokesStream for this field to be used when indexing, or null. If null, the Reader value
- * or String value is analyzed to produce the indexed tokens. */
- public TokenStream tokenStreamValue() { return tokenStream; }
-
- public Number getNumericValue() {
- return null;
+ this.type = type;
+ this.name = name;
+ this.fieldsData = value;
+
+ if (internName) // field names are optionally interned
+ name = StringHelper.intern(name);
}
- public DataType getDataType() {
- return null;
+ public boolean isNumeric() {
+ return false;
}
-
- /** <p>Expert: change the value of this field. This can
- * be used during indexing to re-use a single Field
- * instance to improve indexing speed by avoiding GC cost
- * of new'ing and reclaiming Field instances. Typically
- * a single {@link Document} instance is re-used as
- * well. This helps most on small documents.</p>
+
+ /**
+ * The value of the field as a String, or null. If null, the Reader value or
+ * binary value is used. Exactly one of stringValue(), readerValue(), and
+ * getBinaryValue() must be set.
+ */
+ public String stringValue() {
+ return fieldsData instanceof String ? (String) fieldsData : null;
+ }
+
+ /**
+ * The value of the field as a Reader, or null. If null, the String value or
+ * binary value is used. Exactly one of stringValue(), readerValue(), and
+ * getBinaryValue() must be set.
+ */
+ public Reader readerValue() {
+ return fieldsData instanceof Reader ? (Reader) fieldsData : null;
+ }
+
+ /**
+ * The TokesStream for this field to be used when indexing, or null. If null,
+ * the Reader value or String value is analyzed to produce the indexed tokens.
+ */
+ public TokenStream tokenStreamValue() {
+ return tokenStream;
+ }
+
+ /**
+ * <p>
+ * Expert: change the value of this field. This can be used during indexing to
+ * re-use a single Field instance to improve indexing speed by avoiding GC
+ * cost of new'ing and reclaiming Field instances. Typically a single
+ * {@link Document} instance is re-used as well. This helps most on small
+ * documents.
+ * </p>
*
- * <p>Each Field instance should only be used once
- * within a single {@link Document} instance. See <a
- * href="http://wiki.apache.org/lucene-java/ImproveIndexingSpeed">ImproveIndexingSpeed</a>
- * for details.</p> */
+ * <p>
+ * Each Field instance should only be used once within a single
+ * {@link Document} instance. See <a
+ * href="http://wiki.apache.org/lucene-java/ImproveIndexingSpeed"
+ * >ImproveIndexingSpeed</a> for details.
+ * </p>
+ */
public void setValue(String value) {
if (isBinary) {
- throw new IllegalArgumentException("cannot set a String value on a binary field");
+ throw new IllegalArgumentException(
+ "cannot set a String value on a binary field");
}
fieldsData = value;
}
-
- /** Expert: change the value of this field. See <a href="#setValue(java.lang.String)">setValue(String)</a>. */
+
+ /**
+ * Expert: change the value of this field. See <a
+ * href="#setValue(java.lang.String)">setValue(String)</a>.
+ */
public void setValue(Reader value) {
if (isBinary) {
- throw new IllegalArgumentException("cannot set a Reader value on a binary field");
+ throw new IllegalArgumentException(
+ "cannot set a Reader value on a binary field");
}
- if (isStored) {
- throw new IllegalArgumentException("cannot set a Reader value on a stored field");
+ if (stored()) {
+ throw new IllegalArgumentException(
+ "cannot set a Reader value on a stored field");
}
fieldsData = value;
}
-
- /** Expert: change the value of this field. See <a href="#setValue(java.lang.String)">setValue(String)</a>. */
+
+ /**
+ * Expert: change the value of this field. See <a
+ * href="#setValue(java.lang.String)">setValue(String)</a>.
+ */
public void setValue(byte[] value) {
if (!isBinary) {
- throw new IllegalArgumentException("cannot set a byte[] value on a non-binary field");
+ throw new IllegalArgumentException(
+ "cannot set a byte[] value on a non-binary field");
}
fieldsData = value;
binaryLength = value.length;
binaryOffset = 0;
}
-
- /** Expert: change the value of this field. See <a href="#setValue(java.lang.String)">setValue(String)</a>. */
+
+ /**
+ * Expert: change the value of this field. See <a
+ * href="#setValue(java.lang.String)">setValue(String)</a>.
+ */
public void setValue(byte[] value, int offset, int length) {
if (!isBinary) {
- throw new IllegalArgumentException("cannot set a byte[] value on a non-binary field");
+ throw new IllegalArgumentException(
+ "cannot set a byte[] value on a non-binary field");
}
fieldsData = value;
binaryLength = length;
binaryOffset = offset;
}
- /** Expert: sets the token stream to be used for indexing and causes isIndexed() and isTokenized() to return true.
- * May be combined with stored values from stringValue() or getBinaryValue() */
+ /**
+ * Expert: sets the token stream to be used for indexing and causes
+ * isIndexed() and isTokenized() to return true. May be combined with stored
+ * values from stringValue() or getBinaryValue()
+ */
public void setTokenStream(TokenStream tokenStream) {
- this.isIndexed = true;
- this.isTokenized = true;
+ if (!indexed() || !tokenized()) {
+ throw new IllegalArgumentException(
+ "cannot set token stream on non indexed and tokenized field");
+ }
this.tokenStream = tokenStream;
}
-
- /**
- * Create a field by specifying its name, value and how it will
- * be saved in the index. Term vectors will not be stored in the index.
- *
- * @param name The name of the field
- * @param value The string to process
- * @param store Whether <code>value</code> should be stored in the index
- * @param index Whether the field should be indexed, and if so, if it should
- * be tokenized before indexing
- * @throws NullPointerException if name or value is <code>null</code>
- * @throws IllegalArgumentException if the field is neither stored nor indexed
- */
- public Field(String name, String value, Store store, Index index) {
- this(name, value, store, index, TermVector.NO);
+
+ public String name() {
+ return name;
}
- /**
- * Create a field by specifying its name, value and how it will
- * be saved in the index.
- *
- * @param name The name of the field
- * @param value The string to process
- * @param store Whether <code>value</code> should be stored in the index
- * @param index Whether the field should be indexed, and if so, if it should
- * be tokenized before indexing
- * @param termVector Whether term vector should be stored
- * @throws NullPointerException if name or value is <code>null</code>
- * @throws IllegalArgumentException in any of the following situations:
- * <ul>
- * <li>the field is neither stored nor indexed</li>
- * <li>the field is not indexed but termVector is <code>TermVector.YES</code></li>
- * </ul>
- */
- public Field(String name, String value, Store store, Index index, TermVector termVector) {
- this(name, true, value, store, index, termVector);
+ public float boost() {
+ return boost;
}
/**
- * Create a field by specifying its name, value and how it will
- * be saved in the index.
+ * Sets the boost factor hits on this field. This value will be multiplied
+ * into the score of all hits on this this field of this document.
*
- * @param name The name of the field
- * @param internName Whether to .intern() name or not
- * @param value The string to process
- * @param store Whether <code>value</code> should be stored in the index
- * @param index Whether the field should be indexed, and if so, if it should
- * be tokenized before indexing
- * @param termVector Whether term vector should be stored
- * @throws NullPointerException if name or value is <code>null</code>
- * @throws IllegalArgumentException in any of the following situations:
- * <ul>
- * <li>the field is neither stored nor indexed</li>
- * <li>the field is not indexed but termVector is <code>TermVector.YES</code></li>
- * </ul>
- */
- public Field(String name, boolean internName, String value, Store store, Index index, TermVector termVector) {
- if (name == null)
- throw new NullPointerException("name cannot be null");
- if (value == null)
- throw new NullPointerException("value cannot be null");
- if (name.length() == 0 && value.length() == 0)
- throw new IllegalArgumentException("name and value cannot both be empty");
- if (index == Index.NO && store == Store.NO)
- throw new IllegalArgumentException("it doesn't make sense to have a field that "
- + "is neither indexed nor stored");
- if (index == Index.NO && termVector != TermVector.NO)
- throw new IllegalArgumentException("cannot store term vector information "
- + "for a field that is not indexed");
-
- if (internName) // field names are optionally interned
- name = StringHelper.intern(name);
-
- this.name = name;
-
- this.fieldsData = value;
-
- this.isStored = store.isStored();
-
- this.isIndexed = index.isIndexed();
- this.isTokenized = index.isAnalyzed();
- this.omitNorms = index.omitNorms();
- if (index == Index.NO) {
- this.omitTermFreqAndPositions = false;
- }
-
- this.isBinary = false;
-
- setStoreTermVector(termVector);
- }
-
- /**
- * Create a tokenized and indexed field that is not stored. Term vectors will
- * not be stored. The Reader is read only when the Document is added to the index,
- * i.e. you may not close the Reader until {@link IndexWriter#addDocument(Iterable)}
- * has been called.
+ * <p>
+ * The boost is multiplied by
+ * {@link org.apache.lucene.document.Document#getBoost()} of the document
+ * containing this field. If a document has multiple fields with the same
+ * name, all such values are multiplied together. This product is then used to
+ * compute the norm factor for the field. By default, in the
+ * {@link org.apache.lucene.search.Similarity#computeNorm(FieldInvertState)}
+ * method, the boost value is multiplied by the length normalization factor
+ * and then rounded by
+ * {@link org.apache.lucene.search.Similarity#encodeNormValue(float)} before
+ * it is stored in the index. One should attempt to ensure that this product
+ * does not overflow the range of that encoding.
*
- * @param name The name of the field
- * @param reader The reader with the content
- * @throws NullPointerException if name or reader is <code>null</code>
+ * @see org.apache.lucene.document.Document#setBoost(float)
+ * @see org.apache.lucene.search.Similarity#computeNorm(FieldInvertState)
+ * @see org.apache.lucene.search.Similarity#encodeNormValue(float)
*/
- public Field(String name, Reader reader) {
- this(name, reader, TermVector.NO);
+ public void setBoost(float boost) {
+ this.boost = boost;
+ }
+
+ public boolean numeric() {
+ return false;
}
- /**
- * Create a tokenized and indexed field that is not stored, optionally with
- * storing term vectors. The Reader is read only when the Document is added to the index,
- * i.e. you may not close the Reader until {@link IndexWriter#addDocument(Iterable)}
- * has been called.
- *
- * @param name The name of the field
- * @param reader The reader with the content
- * @param termVector Whether term vector should be stored
- * @throws NullPointerException if name or reader is <code>null</code>
- */
- public Field(String name, Reader reader, TermVector termVector) {
- if (name == null)
- throw new NullPointerException("name cannot be null");
- if (reader == null)
- throw new NullPointerException("reader cannot be null");
-
- this.name = StringHelper.intern(name); // field names are interned
- this.fieldsData = reader;
-
- this.isStored = false;
-
- this.isIndexed = true;
- this.isTokenized = true;
-
- this.isBinary = false;
-
- setStoreTermVector(termVector);
+ public Number numericValue() {
+ return null;
}
- /**
- * Create a tokenized and indexed field that is not stored. Term vectors will
- * not be stored. This is useful for pre-analyzed fields.
- * The TokenStream is read only when the Document is added to the index,
- * i.e. you may not close the TokenStream until {@link IndexWriter#addDocument(Iterable)}
- * has been called.
- *
- * @param name The name of the field
- * @param tokenStream The TokenStream with the content
- * @throws NullPointerException if name or tokenStream is <code>null</code>
- */
- public Field(String name, TokenStream tokenStream) {
- this(name, tokenStream, TermVector.NO);
+ public NumericField.DataType numericDataType() {
+ return null;
}
- /**
- * Create a tokenized and indexed field that is not stored, optionally with
- * storing term vectors. This is useful for pre-analyzed fields.
- * The TokenStream is read only when the Document is added to the index,
- * i.e. you may not close the TokenStream until {@link IndexWriter#addDocument(Iterable)}
- * has been called.
- *
- * @param name The name of the field
- * @param tokenStream The TokenStream with the content
- * @param termVector Whether term vector should be stored
- * @throws NullPointerException if name or tokenStream is <code>null</code>
- */
- public Field(String name, TokenStream tokenStream, TermVector termVector) {
- if (name == null)
- throw new NullPointerException("name cannot be null");
- if (tokenStream == null)
- throw new NullPointerException("tokenStream cannot be null");
-
- this.name = StringHelper.intern(name); // field names are interned
- this.fieldsData = null;
- this.tokenStream = tokenStream;
-
- this.isStored = false;
-
- this.isIndexed = true;
- this.isTokenized = true;
-
- this.isBinary = false;
-
- setStoreTermVector(termVector);
+ private byte[] getBinaryValue(byte[] result /* unused */) {
+ if (isBinary || fieldsData instanceof byte[]) return (byte[]) fieldsData;
+ else return null;
+ }
+
+ private byte[] getBinaryValue() {
+ return getBinaryValue(null);
+ }
+
+ public BytesRef binaryValue(BytesRef reuse) {
+ final byte[] bytes = getBinaryValue();
+ if (bytes != null) {
+ if (reuse == null) {
+ return new BytesRef(bytes, getBinaryOffset(), getBinaryLength());
+ } else {
+ reuse.bytes = bytes;
+ reuse.offset = getBinaryOffset();
+ reuse.length = getBinaryLength();
+ return reuse;
+ }
+ } else {
+ return null;
+ }
}
-
/**
- * Create a stored field with binary value. Optionally the value may be compressed.
+ * Returns length of byte[] segment that is used as value, if Field is not
+ * binary returned value is undefined
*
- * @param name The name of the field
- * @param value The binary value
+ * @return length of byte[] segment that represents this Field value
*/
- public Field(String name, byte[] value) {
- this(name, value, 0, value.length);
+ private int getBinaryLength() {
+ if (isBinary) {
+ return binaryLength;
+ } else if (fieldsData instanceof byte[]) return ((byte[]) fieldsData).length;
+ else return 0;
}
-
+
/**
- * Create a stored field with binary value. Optionally the value may be compressed.
+ * Returns offset into byte[] segment that is used as value, if Field is not
+ * binary returned value is undefined
*
- * @param name The name of the field
- * @param value The binary value
- * @param offset Starting offset in value where this Field's bytes are
- * @param length Number of bytes to use for this Field, starting at offset
+ * @return index of the first character in byte[] segment that represents this
+ * Field value
*/
- public Field(String name, byte[] value, int offset, int length) {
+ public int getBinaryOffset() {
+ return binaryOffset;
+ }
+
+ public boolean isBinary() {
+ return isBinary;
+ }
+
+ /** methods from inner FieldType */
+
+ public boolean stored() {
+ return type.stored();
+ }
+
+ public boolean indexed() {
+ return type.indexed();
+ }
+
+ public boolean tokenized() {
+ return type.tokenized();
+ }
+
+ public boolean omitNorms() {
+ return type.omitNorms();
+ }
+
+ public boolean omitTermFreqAndPositions() {
+ return type.omitTermFreqAndPositions();
+ }
+
+ public boolean storeTermVectors() {
+ return type.storeTermVectors();
+ }
+
+ public boolean storeTermVectorOffsets() {
+ return type.storeTermVectorOffsets();
+ }
+
+ public boolean storeTermVectorPositions() {
+ return type.storeTermVectorPositions();
+ }
+
+ public boolean lazy() {
+ return type.lazy();
+ }
+
+ /** Prints a Field for human consumption. */
+ @Override
+ public final String toString() {
+ StringBuilder result = new StringBuilder();
+ result.append(type.toString());
+ result.append('<');
+ result.append(name);
+ result.append(':');
- if (name == null)
- throw new IllegalArgumentException("name cannot be null");
- if (value == null)
- throw new IllegalArgumentException("value cannot be null");
-
- this.name = StringHelper.intern(name); // field names are interned
- fieldsData = value;
-
- isStored = true;
- isIndexed = false;
- isTokenized = false;
- omitTermFreqAndPositions = false;
- omitNorms = true;
-
- isBinary = true;
- binaryLength = length;
- binaryOffset = offset;
-
- setStoreTermVector(TermVector.NO);
+ if (fieldsData != null && type.lazy() == false) {
+ result.append(fieldsData);
+ }
+
+ result.append('>');
+ return result.toString();
}
}
Modified: lucene/dev/branches/fieldtype/lucene/src/java/org/apache/lucene/document/Fieldable.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/fieldtype/lucene/src/java/org/apache/lucene/document/Fieldable.java?rev=1153925&r1=1153924&r2=1153925&view=diff
==============================================================================
--- lucene/dev/branches/fieldtype/lucene/src/java/org/apache/lucene/document/Fieldable.java (original)
+++ lucene/dev/branches/fieldtype/lucene/src/java/org/apache/lucene/document/Fieldable.java Thu Aug 4 16:54:58 2011
@@ -1,213 +0,0 @@
-package org.apache.lucene.document;
-
-/**
- * Copyright 2004 The Apache Software Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.Reader;
-
-import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.document2.NumericField.DataType;
-import org.apache.lucene.index.FieldInvertState; // for javadocs
-import org.apache.lucene.search.PhraseQuery; // for javadocs
-import org.apache.lucene.search.spans.SpanQuery; // for javadocs
-import org.apache.lucene.util.BytesRef; // for javadocs
-
-/**
- * Synonymous with {@link Field}.
- *
- * <p><bold>WARNING</bold>: This interface may change within minor versions, despite Lucene's backward compatibility requirements.
- * This means new methods may be added from version to version. This change only affects the Fieldable API; other backwards
- * compatibility promises remain intact. For example, Lucene can still
- * read and write indices created within the same major version.
- * </p>
- *
- **/
-public interface Fieldable {
-
- /** Sets the boost factor hits on this field. This value will be
- * multiplied into the score of all hits on this this field of this
- * document.
- *
- * <p>The boost is multiplied by {@link org.apache.lucene.document.Document#getBoost()} of the document
- * containing this field. If a document has multiple fields with the same
- * name, all such values are multiplied together. This product is then
- * used to compute the norm factor for the field. By
- * default, in the {@link
- * org.apache.lucene.search.Similarity#computeNorm(FieldInvertState)} method, the boost value is multiplied
- * by the length normalization factor
- * and then rounded by {@link org.apache.lucene.search.Similarity#encodeNormValue(float)} before it is stored in the
- * index. One should attempt to ensure that this product does not overflow
- * the range of that encoding.
- *
- * @see org.apache.lucene.document.Document#setBoost(float)
- * @see org.apache.lucene.search.Similarity#computeNorm(FieldInvertState)
- * @see org.apache.lucene.search.Similarity#encodeNormValue(float)
- */
- void setBoost(float boost);
-
- /** Returns the boost factor for hits for this field.
- *
- * <p>The default value is 1.0.
- *
- * <p>Note: this value is not stored directly with the document in the index.
- * Documents returned from {@link org.apache.lucene.index.IndexReader#document(int)} and
- * {@link org.apache.lucene.search.IndexSearcher#doc(int)} may thus not have the same value present as when
- * this field was indexed.
- *
- * @see #setBoost(float)
- */
- float getBoost();
-
- /** Returns the name of the field as an interned string.
- * For example "date", "title", "body", ...
- */
- String name();
-
- /** The value of the field as a String, or null.
- * <p>
- * For indexing, if isStored()==true, the stringValue() will be used as the stored field value
- * unless isBinary()==true, in which case binaryValue() will be used.
- *
- * If isIndexed()==true and isTokenized()==false, this String value will be indexed as a single token.
- * If isIndexed()==true and isTokenized()==true, then tokenStreamValue() will be used to generate indexed tokens if not null,
- * else readerValue() will be used to generate indexed tokens if not null, else stringValue() will be used to generate tokens.
- */
- public String stringValue();
-
- /** The value of the field as a Reader, which can be used at index time to generate indexed tokens.
- * @see #stringValue()
- */
- public Reader readerValue();
-
- /** The TokenStream for this field to be used when indexing, or null.
- * @see #stringValue()
- */
- public TokenStream tokenStreamValue();
-
- /** True if the value of the field is to be stored in the index for return
- with search hits. */
- boolean isStored();
-
- /** True if the value of the field is to be indexed, so that it may be
- searched on. */
- boolean isIndexed();
-
- /** True if the value of the field should be tokenized as text prior to
- indexing. Un-tokenized fields are indexed as a single word and may not be
- Reader-valued. */
- boolean isTokenized();
-
- /** True if the term or terms used to index this field are stored as a term
- * vector, available from {@link org.apache.lucene.index.IndexReader#getTermFreqVector(int,String)}.
- * These methods do not provide access to the original content of the field,
- * only to terms used to index it. If the original content must be
- * preserved, use the <code>stored</code> attribute instead.
- *
- * @see org.apache.lucene.index.IndexReader#getTermFreqVector(int, String)
- */
- boolean isTermVectorStored();
-
- /**
- * True if terms are stored as term vector together with their offsets
- * (start and end positon in source text).
- */
- boolean isStoreOffsetWithTermVector();
-
- /**
- * True if terms are stored as term vector together with their token positions.
- */
- boolean isStorePositionWithTermVector();
-
- /** True if the value of the field is stored as binary */
- boolean isBinary();
-
- /** True if norms are omitted for this indexed field */
- boolean getOmitNorms();
-
- /** Expert:
- *
- * If set, omit normalization factors associated with this indexed field.
- * This effectively disables indexing boosts and length normalization for this field.
- */
- void setOmitNorms(boolean omitNorms);
-
- /**
- * Indicates whether a Field is Lazy or not. The semantics of Lazy loading are such that if a Field is lazily loaded, retrieving
- * it's values via {@link #stringValue()} or {@link #binaryValue(BytesRef)} is only valid as long as the {@link org.apache.lucene.index.IndexReader} that
- * retrieved the {@link Document} is still open.
- *
- * @return true if this field can be loaded lazily
- */
- boolean isLazy();
-
- /**
- * Returns offset into byte[] segment that is used as value, if Field is not binary
- * returned value is undefined
- * @return index of the first character in byte[] segment that represents this Field value
- */
- //abstract int getBinaryOffset();
-
- /**
- * Returns length of byte[] segment that is used as value, if Field is not binary
- * returned value is undefined
- * @return length of byte[] segment that represents this Field value
- */
- //abstract int getBinaryLength();
-
- //abstract byte[] getBinaryValue();
-
- // nocommit api break
- abstract BytesRef binaryValue(BytesRef reuse);
-
- abstract DataType getDataType();
-
- abstract Number getNumericValue();
-
- /**
- * Return the raw byte[] for the binary field. Note that
- * you must also call {@link #binaryValue}
- * to know which range of bytes in this
- * returned array belong to the field.<p>
- * About reuse: if you pass in the result byte[] and it is
- * used, likely the underlying implementation will hold
- * onto this byte[] and return it in future calls to
- * {@link #binaryValue(BytesRef)}.
- * So if you subsequently re-use the same byte[] elsewhere
- * it will alter this Fieldable's value.
- * @param result User defined buffer that will be used if
- * possible. If this is null or not large enough, a new
- * buffer is allocated
- * @return reference to the Field value as byte[].
- */
- // nocommit -- remove this too; add resuse param to binaryValue
- //abstract byte[] getBinaryValue(byte[] result);
-
- /** @see #setOmitTermFreqAndPositions */
- boolean getOmitTermFreqAndPositions();
-
- /** Expert:
- *
- * If set, omit term freq, positions and payloads from
- * postings for this field.
- *
- * <p><b>NOTE</b>: While this option reduces storage space
- * required in the index, it also means any query
- * requiring positional information, such as {@link
- * PhraseQuery} or {@link SpanQuery} subclasses will
- * fail with an exception.
- */
- void setOmitTermFreqAndPositions(boolean omitTermFreqAndPositions);
-}