You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2012/01/12 19:25:09 UTC
svn commit: r1230683 - in /lucene/dev/branches/lucene3453/lucene/src:
java/org/apache/lucene/document/ test/org/apache/lucene/document/
Author: mikemccand
Date: Thu Jan 12 18:25:09 2012
New Revision: 1230683
URL: http://svn.apache.org/viewvc?rev=1230683&view=rev
Log:
LUCENE-3682: add deprecated transition API to Field
Modified:
lucene/dev/branches/lucene3453/lucene/src/java/org/apache/lucene/document/Field.java
lucene/dev/branches/lucene3453/lucene/src/java/org/apache/lucene/document/StringField.java
lucene/dev/branches/lucene3453/lucene/src/java/org/apache/lucene/document/TextField.java
lucene/dev/branches/lucene3453/lucene/src/test/org/apache/lucene/document/TestDocument.java
Modified: lucene/dev/branches/lucene3453/lucene/src/java/org/apache/lucene/document/Field.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3453/lucene/src/java/org/apache/lucene/document/Field.java?rev=1230683&r1=1230682&r2=1230683&view=diff
==============================================================================
--- lucene/dev/branches/lucene3453/lucene/src/java/org/apache/lucene/document/Field.java (original)
+++ lucene/dev/branches/lucene3453/lucene/src/java/org/apache/lucene/document/Field.java Thu Jan 12 18:25:09 2012
@@ -486,4 +486,447 @@ public class Field implements IndexableF
throw new IllegalArgumentException("Field must have either TokenStream, String, Reader or Number value");
}
+
+
+ //
+ // Deprecated transition API below:
+ //
+
+ /** Specifies whether and how a field should be stored.
+ *
+ * @deprecated This is here only to ease transition from
+ * the pre-4.0 APIs. */
+ @Deprecated
+ public static enum Store {
+
+ /** Store the original field value in the index. This is useful for short texts
+ * like a document's title which should be displayed with the results. The
+ * value is stored in its original form, i.e. no analyzer is used before it is
+ * stored.
+ */
+ YES {
+ @Override
+ public boolean isStored() { return true; }
+ },
+
+ /** Do not store the field value in the index. */
+ NO {
+ @Override
+ public boolean isStored() { return false; }
+ };
+
+ public abstract boolean isStored();
+ }
+
+ /** Specifies whether and how a field should be indexed.
+ *
+ * @deprecated This is here only to ease transition from
+ * the pre-4.0 APIs. */
+ @Deprecated
+ public static enum Index {
+
+ /** Do not index the field value. This field can thus not be searched,
+ * but one can still access its contents provided it is
+ * {@link Field.Store stored}. */
+ NO {
+ @Override
+ public boolean isIndexed() { return false; }
+ @Override
+ public boolean isAnalyzed() { return false; }
+ @Override
+ public boolean omitNorms() { return true; }
+ },
+
+ /** Index the tokens produced by running the field's
+ * value through an Analyzer. This is useful for
+ * common text. */
+ ANALYZED {
+ @Override
+ public boolean isIndexed() { return true; }
+ @Override
+ public boolean isAnalyzed() { return true; }
+ @Override
+ public boolean omitNorms() { return false; }
+ },
+
+ /** Index the field's value without using an Analyzer, so it can be searched.
+ * As no analyzer is used the value will be stored as a single term. This is
+ * useful for unique Ids like product numbers.
+ */
+ NOT_ANALYZED {
+ @Override
+ public boolean isIndexed() { return true; }
+ @Override
+ public boolean isAnalyzed() { return false; }
+ @Override
+ public boolean omitNorms() { return false; }
+ },
+
+ /** Expert: Index the field's value without an Analyzer,
+ * and also disable the indexing of norms. Note that you
+ * can also separately enable/disable norms by calling
+ * {@link Field#setOmitNorms}. No norms means that
+ * index-time field and document boosting and field
+ * length normalization are disabled. The benefit is
+ * less memory usage as norms take up one byte of RAM
+ * per indexed field for every document in the index,
+ * during searching. Note that once you index a given
+ * field <i>with</i> norms enabled, disabling norms will
+ * have no effect. In other words, for this to have the
+ * above described effect on a field, all instances of
+ * that field must be indexed with NOT_ANALYZED_NO_NORMS
+ * from the beginning. */
+ NOT_ANALYZED_NO_NORMS {
+ @Override
+ public boolean isIndexed() { return true; }
+ @Override
+ public boolean isAnalyzed() { return false; }
+ @Override
+ public boolean omitNorms() { return true; }
+ },
+
+ /** Expert: Index the tokens produced by running the
+ * field's value through an Analyzer, and also
+ * separately disable the storing of norms. See
+ * {@link #NOT_ANALYZED_NO_NORMS} for what norms are
+ * and why you may want to disable them. */
+ ANALYZED_NO_NORMS {
+ @Override
+ public boolean isIndexed() { return true; }
+ @Override
+ public boolean isAnalyzed() { return true; }
+ @Override
+ public boolean omitNorms() { return true; }
+ };
+
+ /** Get the best representation of the index given the flags. */
+ public static Index toIndex(boolean indexed, boolean analyzed) {
+ return toIndex(indexed, analyzed, false);
+ }
+
+ /** Expert: Get the best representation of the index given the flags. */
+ public static Index toIndex(boolean indexed, boolean analyzed, boolean omitNorms) {
+
+ // If it is not indexed nothing else matters
+ if (!indexed) {
+ return Index.NO;
+ }
+
+ // typical, non-expert
+ if (!omitNorms) {
+ if (analyzed) {
+ return Index.ANALYZED;
+ }
+ return Index.NOT_ANALYZED;
+ }
+
+ // Expert: Norms omitted
+ if (analyzed) {
+ return Index.ANALYZED_NO_NORMS;
+ }
+ return Index.NOT_ANALYZED_NO_NORMS;
+ }
+
+ public abstract boolean isIndexed();
+ public abstract boolean isAnalyzed();
+ public abstract boolean omitNorms();
+ }
+
+ /** Specifies whether and how a field should have term vectors.
+ *
+ * @deprecated This is here only to ease transition from
+ * the pre-4.0 APIs. */
+ @Deprecated
+ public static enum TermVector {
+
+ /** Do not store term vectors.
+ */
+ NO {
+ @Override
+ public boolean isStored() { return false; }
+ @Override
+ public boolean withPositions() { return false; }
+ @Override
+ public boolean withOffsets() { return false; }
+ },
+
+ /** Store the term vectors of each document. A term vector is a list
+ * of the document's terms and their number of occurrences in that document. */
+ YES {
+ @Override
+ public boolean isStored() { return true; }
+ @Override
+ public boolean withPositions() { return false; }
+ @Override
+ public boolean withOffsets() { return false; }
+ },
+
+ /**
+ * Store the term vector + token position information
+ *
+ * @see #YES
+ */
+ WITH_POSITIONS {
+ @Override
+ public boolean isStored() { return true; }
+ @Override
+ public boolean withPositions() { return true; }
+ @Override
+ public boolean withOffsets() { return false; }
+ },
+
+ /**
+ * Store the term vector + Token offset information
+ *
+ * @see #YES
+ */
+ WITH_OFFSETS {
+ @Override
+ public boolean isStored() { return true; }
+ @Override
+ public boolean withPositions() { return false; }
+ @Override
+ public boolean withOffsets() { return true; }
+ },
+
+ /**
+ * Store the term vector + Token position and offset information
+ *
+ * @see #YES
+ * @see #WITH_POSITIONS
+ * @see #WITH_OFFSETS
+ */
+ WITH_POSITIONS_OFFSETS {
+ @Override
+ public boolean isStored() { return true; }
+ @Override
+ public boolean withPositions() { return true; }
+ @Override
+ public boolean withOffsets() { return true; }
+ };
+
+ /** Get the best representation of a TermVector given the flags. */
+ public static TermVector toTermVector(boolean stored, boolean withOffsets, boolean withPositions) {
+
+ // If it is not stored, nothing else matters.
+ if (!stored) {
+ return TermVector.NO;
+ }
+
+ if (withOffsets) {
+ if (withPositions) {
+ return Field.TermVector.WITH_POSITIONS_OFFSETS;
+ }
+ return Field.TermVector.WITH_OFFSETS;
+ }
+
+ if (withPositions) {
+ return Field.TermVector.WITH_POSITIONS;
+ }
+ return Field.TermVector.YES;
+ }
+
+ public abstract boolean isStored();
+ public abstract boolean withPositions();
+ public abstract boolean withOffsets();
+ }
+
+ /** Translates the pre-4.0 enums for specifying how a
+ * field should be indexed into the 4.0 {@link FieldType}
+ * approach.
+ *
+ * @deprecated This is here only to ease transition from
+ * the pre-4.0 APIs.
+ */
+ @Deprecated
+ public static final FieldType translateFieldType(Store store, Index index, TermVector termVector) {
+ final FieldType ft = new FieldType();
+
+ ft.setStored(store == Store.YES);
+
+ switch(index) {
+ case ANALYZED:
+ ft.setIndexed(true);
+ ft.setTokenized(true);
+ break;
+ case ANALYZED_NO_NORMS:
+ ft.setIndexed(true);
+ ft.setTokenized(true);
+ ft.setOmitNorms(true);
+ break;
+ case NOT_ANALYZED:
+ ft.setIndexed(true);
+ break;
+ case NOT_ANALYZED_NO_NORMS:
+ ft.setIndexed(true);
+ ft.setOmitNorms(true);
+ break;
+ case NO:
+ break;
+ }
+
+ switch(termVector) {
+ case NO:
+ break;
+ case YES:
+ ft.setStoreTermVectors(true);
+ break;
+ case WITH_POSITIONS:
+ ft.setStoreTermVectors(true);
+ ft.setStoreTermVectorPositions(true);
+ break;
+ case WITH_OFFSETS:
+ ft.setStoreTermVectors(true);
+ ft.setStoreTermVectorOffsets(true);
+ break;
+ case WITH_POSITIONS_OFFSETS:
+ ft.setStoreTermVectors(true);
+ ft.setStoreTermVectorPositions(true);
+ ft.setStoreTermVectorOffsets(true);
+ break;
+ }
+ ft.freeze();
+ return ft;
+ }
+
+ /**
+ * Create a field by specifying its name, value and how it will
+ * be saved in the index. Term vectors will not be stored in the index.
+ *
+ * @param name The name of the field
+ * @param value The string to process
+ * @param store Whether <code>value</code> should be stored in the index
+ * @param index Whether the field should be indexed, and if so, if it should
+ * be tokenized before indexing
+ * @throws NullPointerException if name or value is <code>null</code>
+ * @throws IllegalArgumentException if the field is neither stored nor indexed
+ *
+ * @deprecated Use {@link StringField}, {@link TextField} instead. */
+ @Deprecated
+ public Field(String name, String value, Store store, Index index) {
+ this(name, value, translateFieldType(store, index, TermVector.NO));
+ }
+
+ /**
+ * Create a field by specifying its name, value and how it will
+ * be saved in the index.
+ *
+ * @param name The name of the field
+ * @param value The string to process
+ * @param store Whether <code>value</code> should be stored in the index
+ * @param index Whether the field should be indexed, and if so, if it should
+ * be tokenized before indexing
+ * @param termVector Whether term vector should be stored
+ * @throws NullPointerException if name or value is <code>null</code>
+ * @throws IllegalArgumentException in any of the following situations:
+ * <ul>
+ * <li>the field is neither stored nor indexed</li>
+ * <li>the field is not indexed but termVector is <code>TermVector.YES</code></li>
+ * </ul>
+ *
+ * @deprecated Use {@link StringField}, {@link TextField} instead. */
+ @Deprecated
+ public Field(String name, String value, Store store, Index index, TermVector termVector) {
+ this(name, value, translateFieldType(store, index, termVector));
+ }
+
+ /**
+ * Create a tokenized and indexed field that is not stored. Term vectors will
+ * not be stored. The Reader is read only when the Document is added to the index,
+ * i.e. you may not close the Reader until {@link IndexWriter#addDocument(Document)}
+ * has been called.
+ *
+ * @param name The name of the field
+ * @param reader The reader with the content
+ * @throws NullPointerException if name or reader is <code>null</code>
+ *
+ * @deprecated Use {@link TextField} instead.
+ */
+ @Deprecated
+ public Field(String name, Reader reader) {
+ this(name, reader, TermVector.NO);
+ }
+
+ /**
+ * Create a tokenized and indexed field that is not stored, optionally with
+ * storing term vectors. The Reader is read only when the Document is added to the index,
+ * i.e. you may not close the Reader until {@link IndexWriter#addDocument(Document)}
+ * has been called.
+ *
+ * @param name The name of the field
+ * @param reader The reader with the content
+ * @param termVector Whether term vector should be stored
+ * @throws NullPointerException if name or reader is <code>null</code>
+ *
+ * @deprecated Use {@link TextField} instead.
+ */
+ @Deprecated
+ public Field(String name, Reader reader, TermVector termVector) {
+ this(name, reader, translateFieldType(Store.NO, Index.ANALYZED, termVector));
+ }
+
+ /**
+ * Create a tokenized and indexed field that is not stored. Term vectors will
+ * not be stored. This is useful for pre-analyzed fields.
+ * The TokenStream is read only when the Document is added to the index,
+ * i.e. you may not close the TokenStream until {@link IndexWriter#addDocument(Document)}
+ * has been called.
+ *
+ * @param name The name of the field
+ * @param tokenStream The TokenStream with the content
+ * @throws NullPointerException if name or tokenStream is <code>null</code>
+ *
+ * @deprecated Use {@link TextField} instead
+ */
+ @Deprecated
+ public Field(String name, TokenStream tokenStream) {
+ this(name, tokenStream, TermVector.NO);
+ }
+
+ /**
+ * Create a tokenized and indexed field that is not stored, optionally with
+ * storing term vectors. This is useful for pre-analyzed fields.
+ * The TokenStream is read only when the Document is added to the index,
+ * i.e. you may not close the TokenStream until {@link IndexWriter#addDocument(Document)}
+ * has been called.
+ *
+ * @param name The name of the field
+ * @param tokenStream The TokenStream with the content
+ * @param termVector Whether term vector should be stored
+ * @throws NullPointerException if name or tokenStream is <code>null</code>
+ *
+ * @deprecated Use {@link TextField} instead
+ */
+ @Deprecated
+ public Field(String name, TokenStream tokenStream, TermVector termVector) {
+ this(name, tokenStream, translateFieldType(Store.NO, Index.ANALYZED, termVector));
+ }
+
+ /**
+ * Create a stored field with binary value. Optionally the value may be compressed.
+ *
+ * @param name The name of the field
+ * @param value The binary value
+ *
+ * @deprecated Use {@link BinaryField} instead.
+ */
+ @Deprecated
+ public Field(String name, byte[] value) {
+ this(name, value, translateFieldType(Store.YES, Index.NO, TermVector.NO));
+ }
+
+ /**
+ * Create a stored field with binary value. Optionally the value may be compressed.
+ *
+ * @param name The name of the field
+ * @param value The binary value
+ * @param offset Starting offset in value where this Field's bytes are
+ * @param length Number of bytes to use for this Field, starting at offset
+ *
+ * @deprecated Use {@link BinaryField} instead.
+ */
+ @Deprecated
+ public Field(String name, byte[] value, int offset, int length) {
+ this(name, value, offset, length, translateFieldType(Store.YES, Index.NO, TermVector.NO));
+ }
}
Modified: lucene/dev/branches/lucene3453/lucene/src/java/org/apache/lucene/document/StringField.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3453/lucene/src/java/org/apache/lucene/document/StringField.java?rev=1230683&r1=1230682&r2=1230683&view=diff
==============================================================================
--- lucene/dev/branches/lucene3453/lucene/src/java/org/apache/lucene/document/StringField.java (original)
+++ lucene/dev/branches/lucene3453/lucene/src/java/org/apache/lucene/document/StringField.java Thu Jan 12 18:25:09 2012
@@ -59,7 +59,7 @@ public final class StringField extends F
public StringField(String name, String value) {
super(name, value, TYPE_UNSTORED);
}
-
+
@Override
public String stringValue() {
return (fieldsData == null) ? null : fieldsData.toString();
Modified: lucene/dev/branches/lucene3453/lucene/src/java/org/apache/lucene/document/TextField.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3453/lucene/src/java/org/apache/lucene/document/TextField.java?rev=1230683&r1=1230682&r2=1230683&view=diff
==============================================================================
--- lucene/dev/branches/lucene3453/lucene/src/java/org/apache/lucene/document/TextField.java (original)
+++ lucene/dev/branches/lucene3453/lucene/src/java/org/apache/lucene/document/TextField.java Thu Jan 12 18:25:09 2012
@@ -48,6 +48,8 @@ public final class TextField extends Fie
TYPE_STORED.freeze();
}
+ // nocommit how to sugar term vectors...?
+
/** Creates a new un-stored TextField */
public TextField(String name, Reader reader) {
super(name, reader, TextField.TYPE_UNSTORED);
Modified: lucene/dev/branches/lucene3453/lucene/src/test/org/apache/lucene/document/TestDocument.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3453/lucene/src/test/org/apache/lucene/document/TestDocument.java?rev=1230683&r1=1230682&r2=1230683&view=diff
==============================================================================
--- lucene/dev/branches/lucene3453/lucene/src/test/org/apache/lucene/document/TestDocument.java (original)
+++ lucene/dev/branches/lucene3453/lucene/src/test/org/apache/lucene/document/TestDocument.java Thu Jan 12 18:25:09 2012
@@ -1,18 +1,5 @@
package org.apache.lucene.document;
-import org.apache.lucene.analysis.Tokenizer;
-import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.IndexableField;
-import org.apache.lucene.index.RandomIndexWriter;
-import org.apache.lucene.index.Term;
-import org.apache.lucene.search.IndexSearcher;
-import org.apache.lucene.search.Query;
-import org.apache.lucene.search.ScoreDoc;
-import org.apache.lucene.search.TermQuery;
-import org.apache.lucene.store.Directory;
-import org.apache.lucene.util.BytesRef;
-import org.apache.lucene.util.LuceneTestCase;
-
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@@ -30,6 +17,26 @@ import org.apache.lucene.util.LuceneTest
* limitations under the License.
*/
+import java.io.StringReader;
+
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.index.DocsAndPositionsEnum;
+import org.apache.lucene.index.Fields;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexableField;
+import org.apache.lucene.index.RandomIndexWriter;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.index.Terms;
+import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.ScoreDoc;
+import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.LuceneTestCase;
+
+
/**
* Tests {@link Document} class.
*/
@@ -288,4 +295,69 @@ public class TestDocument extends Lucene
// expected
}
}
+
+ // LUCENE-3682
+ public void testTransitionAPI() throws Exception {
+ Directory dir = newDirectory();
+ RandomIndexWriter w = new RandomIndexWriter(random, dir);
+
+ Document doc = new Document();
+ doc.add(new Field("stored", "abc", Field.Store.YES, Field.Index.NO));
+ doc.add(new Field("stored_indexed", "abc xyz", Field.Store.YES, Field.Index.NOT_ANALYZED));
+ doc.add(new Field("stored_tokenized", "abc xyz", Field.Store.YES, Field.Index.ANALYZED));
+ doc.add(new Field("indexed", "abc xyz", Field.Store.NO, Field.Index.NOT_ANALYZED));
+ doc.add(new Field("tokenized", "abc xyz", Field.Store.NO, Field.Index.ANALYZED));
+ doc.add(new Field("tokenized_reader", new StringReader("abc xyz")));
+ doc.add(new Field("tokenized_tokenstream", w.w.getAnalyzer().tokenStream("tokenized_tokenstream", new StringReader("abc xyz"))));
+ doc.add(new Field("binary", new byte[10]));
+ doc.add(new Field("tv", "abc xyz", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.YES));
+ doc.add(new Field("tv_pos", "abc xyz", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS));
+ doc.add(new Field("tv_off", "abc xyz", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_OFFSETS));
+ doc.add(new Field("tv_pos_off", "abc xyz", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
+ w.addDocument(doc);
+ IndexReader r = w.getReader();
+ w.close();
+
+ doc = r.document(0);
+ // 4 stored fields
+ assertEquals(4, doc.getFields().size());
+ assertEquals("abc", doc.get("stored"));
+ assertEquals("abc xyz", doc.get("stored_indexed"));
+ assertEquals("abc xyz", doc.get("stored_tokenized"));
+ final BytesRef br = doc.getBinaryValue("binary");
+ assertNotNull(br);
+ assertEquals(10, br.length);
+
+ IndexSearcher s = new IndexSearcher(r);
+ assertEquals(1, s.search(new TermQuery(new Term("stored_indexed", "abc xyz")), 1).totalHits);
+ assertEquals(1, s.search(new TermQuery(new Term("stored_tokenized", "abc")), 1).totalHits);
+ assertEquals(1, s.search(new TermQuery(new Term("stored_tokenized", "xyz")), 1).totalHits);
+ assertEquals(1, s.search(new TermQuery(new Term("indexed", "abc xyz")), 1).totalHits);
+ assertEquals(1, s.search(new TermQuery(new Term("tokenized", "abc")), 1).totalHits);
+ assertEquals(1, s.search(new TermQuery(new Term("tokenized", "xyz")), 1).totalHits);
+ assertEquals(1, s.search(new TermQuery(new Term("tokenized_reader", "abc")), 1).totalHits);
+ assertEquals(1, s.search(new TermQuery(new Term("tokenized_reader", "xyz")), 1).totalHits);
+ assertEquals(1, s.search(new TermQuery(new Term("tokenized_tokenstream", "abc")), 1).totalHits);
+ assertEquals(1, s.search(new TermQuery(new Term("tokenized_tokenstream", "xyz")), 1).totalHits);
+
+ for(String field : new String[] {"tv", "tv_pos", "tv_off", "tv_pos_off"}) {
+ Fields tvFields = r.getTermVectors(0);
+ Terms tvs = tvFields.terms(field);
+ assertNotNull(tvs);
+ assertEquals(2, tvs.getUniqueTermCount());
+ TermsEnum tvsEnum = tvs.iterator(null);
+ assertEquals(new BytesRef("abc"), tvsEnum.next());
+ final DocsAndPositionsEnum dpEnum = tvsEnum.docsAndPositions(null, null);
+ if (field.equals("tv")) {
+ assertNull(dpEnum);
+ } else {
+ assertNotNull(dpEnum);
+ }
+ assertEquals(new BytesRef("xyz"), tvsEnum.next());
+ assertNull(tvsEnum.next());
+ }
+
+ r.close();
+ dir.close();
+ }
}