You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2012/01/12 19:25:09 UTC

svn commit: r1230683 - in /lucene/dev/branches/lucene3453/lucene/src: java/org/apache/lucene/document/ test/org/apache/lucene/document/

Author: mikemccand
Date: Thu Jan 12 18:25:09 2012
New Revision: 1230683

URL: http://svn.apache.org/viewvc?rev=1230683&view=rev
Log:
LUCENE-3682: add deprecated transition API to Field

Modified:
    lucene/dev/branches/lucene3453/lucene/src/java/org/apache/lucene/document/Field.java
    lucene/dev/branches/lucene3453/lucene/src/java/org/apache/lucene/document/StringField.java
    lucene/dev/branches/lucene3453/lucene/src/java/org/apache/lucene/document/TextField.java
    lucene/dev/branches/lucene3453/lucene/src/test/org/apache/lucene/document/TestDocument.java

Modified: lucene/dev/branches/lucene3453/lucene/src/java/org/apache/lucene/document/Field.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3453/lucene/src/java/org/apache/lucene/document/Field.java?rev=1230683&r1=1230682&r2=1230683&view=diff
==============================================================================
--- lucene/dev/branches/lucene3453/lucene/src/java/org/apache/lucene/document/Field.java (original)
+++ lucene/dev/branches/lucene3453/lucene/src/java/org/apache/lucene/document/Field.java Thu Jan 12 18:25:09 2012
@@ -486,4 +486,447 @@ public class Field implements IndexableF
 
     throw new IllegalArgumentException("Field must have either TokenStream, String, Reader or Number value");
   }
+
+  
+  //
+  // Deprecated transition API below:
+  //
+
+  /** Specifies whether and how a field should be stored.
+   *
+   *  @deprecated This is here only to ease transition from
+   *  the pre-4.0 APIs. */
+  @Deprecated
+  public static enum Store {
+
+    /** Store the original field value in the index. This is useful for short texts
+     * like a document's title which should be displayed with the results. The
+     * value is stored in its original form, i.e. no analyzer is used before it is
+     * stored.
+     */
+    YES {
+      @Override
+      public boolean isStored() { return true; }
+    },
+
+    /** Do not store the field value in the index. */
+    NO {
+      @Override
+      public boolean isStored() { return false; }
+    };
+
+    public abstract boolean isStored();
+  }
+
+  /** Specifies whether and how a field should be indexed.
+   *
+   *  @deprecated This is here only to ease transition from
+   *  the pre-4.0 APIs. */
+  @Deprecated
+  public static enum Index {
+
+    /** Do not index the field value. This field can thus not be searched,
+     * but one can still access its contents provided it is
+     * {@link Field.Store stored}. */
+    NO {
+      @Override
+      public boolean isIndexed()  { return false; }
+      @Override
+      public boolean isAnalyzed() { return false; }
+      @Override
+      public boolean omitNorms()  { return true;  }   
+    },
+
+    /** Index the tokens produced by running the field's
+     * value through an Analyzer.  This is useful for
+     * common text. */
+    ANALYZED {
+      @Override
+      public boolean isIndexed()  { return true;  }
+      @Override
+      public boolean isAnalyzed() { return true;  }
+      @Override
+      public boolean omitNorms()  { return false; }   	
+    },
+
+    /** Index the field's value without using an Analyzer, so it can be searched.
+     * As no analyzer is used the value will be stored as a single term. This is
+     * useful for unique Ids like product numbers.
+     */
+    NOT_ANALYZED {
+      @Override
+      public boolean isIndexed()  { return true;  }
+      @Override
+      public boolean isAnalyzed() { return false; }
+      @Override
+      public boolean omitNorms()  { return false; }   	
+    },
+
+    /** Expert: Index the field's value without an Analyzer,
+     * and also disable the indexing of norms.  Note that you
+     * can also separately enable/disable norms by calling
+     * {@link Field#setOmitNorms}.  No norms means that
+     * index-time field and document boosting and field
+     * length normalization are disabled.  The benefit is
+     * less memory usage as norms take up one byte of RAM
+     * per indexed field for every document in the index,
+     * during searching.  Note that once you index a given
+     * field <i>with</i> norms enabled, disabling norms will
+     * have no effect.  In other words, for this to have the
+     * above described effect on a field, all instances of
+     * that field must be indexed with NOT_ANALYZED_NO_NORMS
+     * from the beginning. */
+    NOT_ANALYZED_NO_NORMS {
+      @Override
+      public boolean isIndexed()  { return true;  }
+      @Override
+      public boolean isAnalyzed() { return false; }
+      @Override
+      public boolean omitNorms()  { return true;  }   	
+    },
+
+    /** Expert: Index the tokens produced by running the
+     *  field's value through an Analyzer, and also
+     *  separately disable the storing of norms.  See
+     *  {@link #NOT_ANALYZED_NO_NORMS} for what norms are
+     *  and why you may want to disable them. */
+    ANALYZED_NO_NORMS {
+      @Override
+      public boolean isIndexed()  { return true;  }
+      @Override
+      public boolean isAnalyzed() { return true;  }
+      @Override
+      public boolean omitNorms()  { return true;  }   	
+    };
+
+    /** Get the best representation of the index given the flags. */
+    public static Index toIndex(boolean indexed, boolean analyzed) {
+      return toIndex(indexed, analyzed, false);
+    }
+
+    /** Expert: Get the best representation of the index given the flags. */
+    public static Index toIndex(boolean indexed, boolean analyzed, boolean omitNorms) {
+
+      // If it is not indexed nothing else matters
+      if (!indexed) {
+        return Index.NO;
+      }
+
+      // typical, non-expert
+      if (!omitNorms) {
+        if (analyzed) {
+          return Index.ANALYZED;
+        }
+        return Index.NOT_ANALYZED;
+      }
+
+      // Expert: Norms omitted
+      if (analyzed) {
+        return Index.ANALYZED_NO_NORMS;
+      }
+      return Index.NOT_ANALYZED_NO_NORMS;
+    }
+
+    public abstract boolean isIndexed();
+    public abstract boolean isAnalyzed();
+    public abstract boolean omitNorms();  	
+  }
+
+  /** Specifies whether and how a field should have term vectors.
+   *
+   *  @deprecated This is here only to ease transition from
+   *  the pre-4.0 APIs. */
+  @Deprecated
+  public static enum TermVector {
+    
+    /** Do not store term vectors. 
+     */
+    NO {
+      @Override
+      public boolean isStored()      { return false; }
+      @Override
+      public boolean withPositions() { return false; }
+      @Override
+      public boolean withOffsets()   { return false; }
+    },
+    
+    /** Store the term vectors of each document. A term vector is a list
+     * of the document's terms and their number of occurrences in that document. */
+    YES {
+      @Override
+      public boolean isStored()      { return true;  }
+      @Override
+      public boolean withPositions() { return false; }
+      @Override
+      public boolean withOffsets()   { return false; }
+    },
+    
+    /**
+     * Store the term vector + token position information
+     * 
+     * @see #YES
+     */ 
+    WITH_POSITIONS {
+      @Override
+      public boolean isStored()      { return true;  }
+      @Override
+      public boolean withPositions() { return true;  }
+      @Override
+      public boolean withOffsets()   { return false; }
+    },
+    
+    /**
+     * Store the term vector + Token offset information
+     * 
+     * @see #YES
+     */ 
+    WITH_OFFSETS {
+      @Override
+      public boolean isStored()      { return true;  }
+      @Override
+      public boolean withPositions() { return false; }
+      @Override
+      public boolean withOffsets()   { return true;  }
+    },
+    
+    /**
+     * Store the term vector + Token position and offset information
+     * 
+     * @see #YES
+     * @see #WITH_POSITIONS
+     * @see #WITH_OFFSETS
+     */ 
+    WITH_POSITIONS_OFFSETS {
+      @Override
+      public boolean isStored()      { return true;  }
+      @Override
+      public boolean withPositions() { return true;  }
+      @Override
+      public boolean withOffsets()   { return true;  }
+    };
+
+    /** Get the best representation of a TermVector given the flags. */
+    public static TermVector toTermVector(boolean stored, boolean withOffsets, boolean withPositions) {
+
+      // If it is not stored, nothing else matters.
+      if (!stored) {
+        return TermVector.NO;
+      }
+
+      if (withOffsets) {
+        if (withPositions) {
+          return Field.TermVector.WITH_POSITIONS_OFFSETS;
+        }
+        return Field.TermVector.WITH_OFFSETS;
+      }
+
+      if (withPositions) {
+        return Field.TermVector.WITH_POSITIONS;
+      }
+      return Field.TermVector.YES;
+    }
+
+    public abstract boolean isStored();
+    public abstract boolean withPositions();
+    public abstract boolean withOffsets();
+  }
+
+  /** Translates the pre-4.0 enums for specifying how a
+   *  field should be indexed into the 4.0 {@link FieldType}
+   *  approach.
+   *
+   * @deprecated This is here only to ease transition from
+   * the pre-4.0 APIs.
+   */
+  @Deprecated
+  public static final FieldType translateFieldType(Store store, Index index, TermVector termVector) {
+    final FieldType ft = new FieldType();
+
+    ft.setStored(store == Store.YES);
+
+    switch(index) {
+    case ANALYZED:
+      ft.setIndexed(true);
+      ft.setTokenized(true);
+      break;
+    case ANALYZED_NO_NORMS:
+      ft.setIndexed(true);
+      ft.setTokenized(true);
+      ft.setOmitNorms(true);
+      break;
+    case NOT_ANALYZED:
+      ft.setIndexed(true);
+      break;
+    case NOT_ANALYZED_NO_NORMS:
+      ft.setIndexed(true);
+      ft.setOmitNorms(true);
+      break;
+    case NO:
+      break;
+    }
+
+    switch(termVector) {
+    case NO:
+      break;
+    case YES:
+      ft.setStoreTermVectors(true);
+      break;
+    case WITH_POSITIONS:
+      ft.setStoreTermVectors(true);
+      ft.setStoreTermVectorPositions(true);
+      break;
+    case WITH_OFFSETS:
+      ft.setStoreTermVectors(true);
+      ft.setStoreTermVectorOffsets(true);
+      break;
+    case WITH_POSITIONS_OFFSETS:
+      ft.setStoreTermVectors(true);
+      ft.setStoreTermVectorPositions(true);
+      ft.setStoreTermVectorOffsets(true);
+      break;
+    }
+    ft.freeze();
+    return ft;
+  }
+
+  /**
+   * Create a field by specifying its name, value and how it will
+   * be saved in the index. Term vectors will not be stored in the index.
+   * 
+   * @param name The name of the field
+   * @param value The string to process
+   * @param store Whether <code>value</code> should be stored in the index
+   * @param index Whether the field should be indexed, and if so, if it should
+   *  be tokenized before indexing 
+   * @throws NullPointerException if name or value is <code>null</code>
+   * @throws IllegalArgumentException if the field is neither stored nor indexed 
+   *
+   * @deprecated Use {@link StringField}, {@link TextField} instead. */
+  @Deprecated
+  public Field(String name, String value, Store store, Index index) {
+    this(name, value, translateFieldType(store, index, TermVector.NO));
+  }
+
+  /**
+   * Create a field by specifying its name, value and how it will
+   * be saved in the index.
+   * 
+   * @param name The name of the field
+   * @param value The string to process
+   * @param store Whether <code>value</code> should be stored in the index
+   * @param index Whether the field should be indexed, and if so, if it should
+   *  be tokenized before indexing 
+   * @param termVector Whether term vector should be stored
+   * @throws NullPointerException if name or value is <code>null</code>
+   * @throws IllegalArgumentException in any of the following situations:
+   * <ul> 
+   *  <li>the field is neither stored nor indexed</li> 
+   *  <li>the field is not indexed but termVector is <code>TermVector.YES</code></li>
+   * </ul> 
+   *
+   * @deprecated Use {@link StringField}, {@link TextField} instead. */
+  @Deprecated
+  public Field(String name, String value, Store store, Index index, TermVector termVector) {  
+    this(name, value, translateFieldType(store, index, termVector));
+  }
+
+  /**
+   * Create a tokenized and indexed field that is not stored. Term vectors will
+   * not be stored.  The Reader is read only when the Document is added to the index,
+   * i.e. you may not close the Reader until {@link IndexWriter#addDocument(Document)}
+   * has been called.
+   * 
+   * @param name The name of the field
+   * @param reader The reader with the content
+   * @throws NullPointerException if name or reader is <code>null</code>
+   *
+   * @deprecated Use {@link TextField} instead.
+   */
+  @Deprecated
+  public Field(String name, Reader reader) {
+    this(name, reader, TermVector.NO);
+  }
+
+  /**
+   * Create a tokenized and indexed field that is not stored, optionally with 
+   * storing term vectors.  The Reader is read only when the Document is added to the index,
+   * i.e. you may not close the Reader until {@link IndexWriter#addDocument(Document)}
+   * has been called.
+   * 
+   * @param name The name of the field
+   * @param reader The reader with the content
+   * @param termVector Whether term vector should be stored
+   * @throws NullPointerException if name or reader is <code>null</code>
+   *
+   * @deprecated Use {@link TextField} instead.
+   */ 
+  @Deprecated
+  public Field(String name, Reader reader, TermVector termVector) {
+    this(name, reader, translateFieldType(Store.NO, Index.ANALYZED, termVector));
+  }
+
+  /**
+   * Create a tokenized and indexed field that is not stored. Term vectors will
+   * not be stored. This is useful for pre-analyzed fields.
+   * The TokenStream is read only when the Document is added to the index,
+   * i.e. you may not close the TokenStream until {@link IndexWriter#addDocument(Document)}
+   * has been called.
+   * 
+   * @param name The name of the field
+   * @param tokenStream The TokenStream with the content
+   * @throws NullPointerException if name or tokenStream is <code>null</code>
+   *
+   * @deprecated Use {@link TextField} instead
+   */ 
+  @Deprecated
+  public Field(String name, TokenStream tokenStream) {
+    this(name, tokenStream, TermVector.NO);
+  }
+
+  /**
+   * Create a tokenized and indexed field that is not stored, optionally with 
+   * storing term vectors.  This is useful for pre-analyzed fields.
+   * The TokenStream is read only when the Document is added to the index,
+   * i.e. you may not close the TokenStream until {@link IndexWriter#addDocument(Document)}
+   * has been called.
+   * 
+   * @param name The name of the field
+   * @param tokenStream The TokenStream with the content
+   * @param termVector Whether term vector should be stored
+   * @throws NullPointerException if name or tokenStream is <code>null</code>
+   *
+   * @deprecated Use {@link TextField} instead
+   */ 
+  @Deprecated
+  public Field(String name, TokenStream tokenStream, TermVector termVector) {
+    this(name, tokenStream, translateFieldType(Store.NO, Index.ANALYZED, termVector));
+  }
+
+  /**
+   * Create a stored field with binary value. Optionally the value may be compressed.
+   * 
+   * @param name The name of the field
+   * @param value The binary value
+   *
+   * @deprecated Use {@link BinaryField} instead.
+   */
+  @Deprecated
+  public Field(String name, byte[] value) {
+    this(name, value, translateFieldType(Store.YES, Index.NO, TermVector.NO));
+  }
+
+  /**
+   * Create a stored field with binary value. Optionally the value may be compressed.
+   * 
+   * @param name The name of the field
+   * @param value The binary value
+   * @param offset Starting offset in value where this Field's bytes are
+   * @param length Number of bytes to use for this Field, starting at offset
+   *
+   * @deprecated Use {@link BinaryField} instead.
+   */
+  @Deprecated
+  public Field(String name, byte[] value, int offset, int length) {
+    this(name, value, offset, length, translateFieldType(Store.YES, Index.NO, TermVector.NO));
+  }
 }

Modified: lucene/dev/branches/lucene3453/lucene/src/java/org/apache/lucene/document/StringField.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3453/lucene/src/java/org/apache/lucene/document/StringField.java?rev=1230683&r1=1230682&r2=1230683&view=diff
==============================================================================
--- lucene/dev/branches/lucene3453/lucene/src/java/org/apache/lucene/document/StringField.java (original)
+++ lucene/dev/branches/lucene3453/lucene/src/java/org/apache/lucene/document/StringField.java Thu Jan 12 18:25:09 2012
@@ -59,7 +59,7 @@ public final class StringField extends F
   public StringField(String name, String value) {
     super(name, value, TYPE_UNSTORED);
   }
-  
+
   @Override
   public String stringValue() {
     return (fieldsData == null) ? null : fieldsData.toString();

Modified: lucene/dev/branches/lucene3453/lucene/src/java/org/apache/lucene/document/TextField.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3453/lucene/src/java/org/apache/lucene/document/TextField.java?rev=1230683&r1=1230682&r2=1230683&view=diff
==============================================================================
--- lucene/dev/branches/lucene3453/lucene/src/java/org/apache/lucene/document/TextField.java (original)
+++ lucene/dev/branches/lucene3453/lucene/src/java/org/apache/lucene/document/TextField.java Thu Jan 12 18:25:09 2012
@@ -48,6 +48,8 @@ public final class TextField extends Fie
     TYPE_STORED.freeze();
   }
 
+  // nocommit how to sugar term vectors...?
+
   /** Creates a new un-stored TextField */
   public TextField(String name, Reader reader) {
     super(name, reader, TextField.TYPE_UNSTORED);

Modified: lucene/dev/branches/lucene3453/lucene/src/test/org/apache/lucene/document/TestDocument.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene3453/lucene/src/test/org/apache/lucene/document/TestDocument.java?rev=1230683&r1=1230682&r2=1230683&view=diff
==============================================================================
--- lucene/dev/branches/lucene3453/lucene/src/test/org/apache/lucene/document/TestDocument.java (original)
+++ lucene/dev/branches/lucene3453/lucene/src/test/org/apache/lucene/document/TestDocument.java Thu Jan 12 18:25:09 2012
@@ -1,18 +1,5 @@
 package org.apache.lucene.document;
 
-import org.apache.lucene.analysis.Tokenizer;
-import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.IndexableField;
-import org.apache.lucene.index.RandomIndexWriter;
-import org.apache.lucene.index.Term;
-import org.apache.lucene.search.IndexSearcher;
-import org.apache.lucene.search.Query;
-import org.apache.lucene.search.ScoreDoc;
-import org.apache.lucene.search.TermQuery;
-import org.apache.lucene.store.Directory;
-import org.apache.lucene.util.BytesRef;
-import org.apache.lucene.util.LuceneTestCase;
-
 /**
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -30,6 +17,26 @@ import org.apache.lucene.util.LuceneTest
  * limitations under the License.
  */
 
+import java.io.StringReader;
+
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.index.DocsAndPositionsEnum;
+import org.apache.lucene.index.Fields;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexableField;
+import org.apache.lucene.index.RandomIndexWriter;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.index.Terms;
+import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.ScoreDoc;
+import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.LuceneTestCase;
+
+
 /**
  * Tests {@link Document} class.
  */
@@ -288,4 +295,69 @@ public class TestDocument extends Lucene
       // expected
     }
   }
+
+  // LUCENE-3682
+  public void testTransitionAPI() throws Exception {
+    Directory dir = newDirectory();
+    RandomIndexWriter w = new RandomIndexWriter(random, dir);
+
+    Document doc = new Document();
+    doc.add(new Field("stored", "abc", Field.Store.YES, Field.Index.NO));
+    doc.add(new Field("stored_indexed", "abc xyz", Field.Store.YES, Field.Index.NOT_ANALYZED));
+    doc.add(new Field("stored_tokenized", "abc xyz", Field.Store.YES, Field.Index.ANALYZED));
+    doc.add(new Field("indexed", "abc xyz", Field.Store.NO, Field.Index.NOT_ANALYZED));
+    doc.add(new Field("tokenized", "abc xyz", Field.Store.NO, Field.Index.ANALYZED));
+    doc.add(new Field("tokenized_reader", new StringReader("abc xyz")));
+    doc.add(new Field("tokenized_tokenstream", w.w.getAnalyzer().tokenStream("tokenized_tokenstream", new StringReader("abc xyz"))));
+    doc.add(new Field("binary", new byte[10]));
+    doc.add(new Field("tv", "abc xyz", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.YES));
+    doc.add(new Field("tv_pos", "abc xyz", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS));
+    doc.add(new Field("tv_off", "abc xyz", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_OFFSETS));
+    doc.add(new Field("tv_pos_off", "abc xyz", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
+    w.addDocument(doc);
+    IndexReader r = w.getReader();
+    w.close();
+
+    doc = r.document(0);
+    // 4 stored fields
+    assertEquals(4, doc.getFields().size());
+    assertEquals("abc", doc.get("stored"));
+    assertEquals("abc xyz", doc.get("stored_indexed"));
+    assertEquals("abc xyz", doc.get("stored_tokenized"));
+    final BytesRef br = doc.getBinaryValue("binary");
+    assertNotNull(br);
+    assertEquals(10, br.length);
+
+    IndexSearcher s = new IndexSearcher(r);
+    assertEquals(1, s.search(new TermQuery(new Term("stored_indexed", "abc xyz")), 1).totalHits);
+    assertEquals(1, s.search(new TermQuery(new Term("stored_tokenized", "abc")), 1).totalHits);
+    assertEquals(1, s.search(new TermQuery(new Term("stored_tokenized", "xyz")), 1).totalHits);
+    assertEquals(1, s.search(new TermQuery(new Term("indexed", "abc xyz")), 1).totalHits);
+    assertEquals(1, s.search(new TermQuery(new Term("tokenized", "abc")), 1).totalHits);
+    assertEquals(1, s.search(new TermQuery(new Term("tokenized", "xyz")), 1).totalHits);
+    assertEquals(1, s.search(new TermQuery(new Term("tokenized_reader", "abc")), 1).totalHits);
+    assertEquals(1, s.search(new TermQuery(new Term("tokenized_reader", "xyz")), 1).totalHits);
+    assertEquals(1, s.search(new TermQuery(new Term("tokenized_tokenstream", "abc")), 1).totalHits);
+    assertEquals(1, s.search(new TermQuery(new Term("tokenized_tokenstream", "xyz")), 1).totalHits);
+
+    for(String field : new String[] {"tv", "tv_pos", "tv_off", "tv_pos_off"}) {
+      Fields tvFields = r.getTermVectors(0);
+      Terms tvs = tvFields.terms(field);
+      assertNotNull(tvs);
+      assertEquals(2, tvs.getUniqueTermCount());
+      TermsEnum tvsEnum = tvs.iterator(null);
+      assertEquals(new BytesRef("abc"), tvsEnum.next());
+      final DocsAndPositionsEnum dpEnum = tvsEnum.docsAndPositions(null, null);
+      if (field.equals("tv")) {
+        assertNull(dpEnum);
+      } else {
+        assertNotNull(dpEnum);
+      }
+      assertEquals(new BytesRef("xyz"), tvsEnum.next());
+      assertNull(tvsEnum.next());
+    }
+
+    r.close();
+    dir.close();
+  }
 }