You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by bu...@apache.org on 2010/07/22 21:34:52 UTC

svn commit: r966819 [5/20] - in /lucene/dev/branches/realtime_search: ./ lucene/ lucene/backwards/ lucene/contrib/ lucene/contrib/benchmark/conf/ lucene/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ lucene/contrib/benchmark/src/j...

Modified: lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/FieldsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/FieldsReader.java?rev=966819&r1=966818&r2=966819&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/FieldsReader.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/FieldsReader.java Thu Jul 22 19:34:35 2010
@@ -100,13 +100,15 @@ final class FieldsReader implements Clon
       fieldInfos = fn;
 
       cloneableFieldsStream = d.openInput(IndexFileNames.segmentFileName(segment, "", IndexFileNames.FIELDS_EXTENSION), readBufferSize);
-      cloneableIndexStream = d.openInput(IndexFileNames.segmentFileName(segment, "", IndexFileNames.FIELDS_INDEX_EXTENSION), readBufferSize);
+      final String indexStreamFN = IndexFileNames.segmentFileName(segment, "", IndexFileNames.FIELDS_INDEX_EXTENSION);
+      cloneableIndexStream = d.openInput(indexStreamFN, readBufferSize);
       
       format = cloneableIndexStream.readInt();
 
+      if (format < FieldsWriter.FORMAT_MINIMUM)
+        throw new IndexFormatTooOldException(indexStreamFN, format, FieldsWriter.FORMAT_MINIMUM, FieldsWriter.FORMAT_CURRENT);
       if (format > FieldsWriter.FORMAT_CURRENT)
-        throw new CorruptIndexException("Incompatible format version: " + format + " expected " 
-                                        + FieldsWriter.FORMAT_CURRENT + " or lower");
+        throw new IndexFormatTooNewException(indexStreamFN, format, FieldsWriter.FORMAT_MINIMUM, FieldsWriter.FORMAT_CURRENT);
 
       fieldsStream = (IndexInput) cloneableFieldsStream.clone();
 
@@ -185,11 +187,9 @@ final class FieldsReader implements Clon
   }
 
   boolean canReadRawDocs() {
-    // Disable reading raw docs in 2.x format, because of the removal of compressed
-    // fields in 3.0. We don't want rawDocs() to decode field bits to figure out
-    // if a field was compressed, hence we enforce ordinary (non-raw) stored field merges
-    // for <3.0 indexes.
-    return format >= FieldsWriter.FORMAT_LUCENE_3_0_NO_COMPRESSED_FIELDS;
+    // Since we currently only support >3.0 format anymore, always return true!
+    // I leave this method in because it may help for later format changes.
+    return true;
   }
 
   final Document doc(int n, FieldSelector fieldSelector) throws CorruptIndexException, IOException {
@@ -219,7 +219,10 @@ final class FieldsReader implements Clon
         break;//Get out of this loop
       }
       else if (acceptField.equals(FieldSelectorResult.LAZY_LOAD)) {
-        addFieldLazy(doc, fi, binary, tokenize);
+        addFieldLazy(doc, fi, binary, tokenize, true);
+      }
+      else if (acceptField.equals(FieldSelectorResult.LATENT)) {
+        addFieldLazy(doc, fi, binary, tokenize, false);
       }
       else if (acceptField.equals(FieldSelectorResult.SIZE)){
         skipField(addFieldSize(doc, fi, binary));
@@ -274,12 +277,12 @@ final class FieldsReader implements Clon
     fieldsStream.seek(fieldsStream.getFilePointer() + toRead);
   }
 
-  private void addFieldLazy(Document doc, FieldInfo fi, boolean binary, boolean tokenize) throws IOException {
+  private void addFieldLazy(Document doc, FieldInfo fi, boolean binary, boolean tokenize, boolean cacheResult) throws IOException {
     if (binary) {
       int toRead = fieldsStream.readVInt();
       long pointer = fieldsStream.getFilePointer();
       //was: doc.add(new Fieldable(fi.name, b, Fieldable.Store.YES));
-      doc.add(new LazyField(fi.name, Field.Store.YES, toRead, pointer, binary));
+      doc.add(new LazyField(fi.name, Field.Store.YES, toRead, pointer, binary, cacheResult));
       //Need to move the pointer ahead by toRead positions
       fieldsStream.seek(pointer + toRead);
     } else {
@@ -292,7 +295,7 @@ final class FieldsReader implements Clon
       long pointer = fieldsStream.getFilePointer();
       //Skip ahead of where we are by the length of what is stored
       fieldsStream.seek(pointer+length);
-      f = new LazyField(fi.name, store, index, termVector, length, pointer, binary);
+      f = new LazyField(fi.name, store, index, termVector, length, pointer, binary, cacheResult);
       f.setOmitNorms(fi.omitNorms);
       f.setOmitTermFreqAndPositions(fi.omitTermFreqAndPositions);
 
@@ -303,7 +306,6 @@ final class FieldsReader implements Clon
 
   private void addField(Document doc, FieldInfo fi, boolean binary, boolean tokenize) throws CorruptIndexException, IOException {
 
-    //we have a binary stored field, and it may be compressed
     if (binary) {
       int toRead = fieldsStream.readVInt();
       final byte[] b = new byte[toRead];
@@ -349,22 +351,25 @@ final class FieldsReader implements Clon
   private class LazyField extends AbstractField implements Fieldable {
     private int toRead;
     private long pointer;
+    private final boolean cacheResult;
 
-    public LazyField(String name, Field.Store store, int toRead, long pointer, boolean isBinary) {
+    public LazyField(String name, Field.Store store, int toRead, long pointer, boolean isBinary, boolean cacheResult) {
       super(name, store, Field.Index.NO, Field.TermVector.NO);
       this.toRead = toRead;
       this.pointer = pointer;
       this.isBinary = isBinary;
+      this.cacheResult = cacheResult;
       if (isBinary)
         binaryLength = toRead;
       lazy = true;
     }
 
-    public LazyField(String name, Field.Store store, Field.Index index, Field.TermVector termVector, int toRead, long pointer, boolean isBinary) {
+    public LazyField(String name, Field.Store store, Field.Index index, Field.TermVector termVector, int toRead, long pointer, boolean isBinary, boolean cacheResult) {
       super(name, store, index, termVector);
       this.toRead = toRead;
       this.pointer = pointer;
       this.isBinary = isBinary;
+      this.cacheResult = cacheResult;
       if (isBinary)
         binaryLength = toRead;
       lazy = true;
@@ -404,17 +409,23 @@ final class FieldsReader implements Clon
         return null;
       else {
         if (fieldsData == null) {
+          String result = null;
           IndexInput localFieldsStream = getFieldStream();
           try {
             localFieldsStream.seek(pointer);
             byte[] bytes = new byte[toRead];
             localFieldsStream.readBytes(bytes, 0, toRead);
-            fieldsData = new String(bytes, "UTF-8");
+            result = new String(bytes, "UTF-8");
           } catch (IOException e) {
             throw new FieldReaderException(e);
           }
+          if (cacheResult == true){
+            fieldsData = result;
+          }
+          return result;
+        } else {
+          return (String) fieldsData;
         }
-        return (String) fieldsData;
       }
     }
 
@@ -438,16 +449,19 @@ final class FieldsReader implements Clon
           try {
             localFieldsStream.seek(pointer);
             localFieldsStream.readBytes(b, 0, toRead);
-            fieldsData = b;
           } catch (IOException e) {
             throw new FieldReaderException(e);
           }
 
           binaryOffset = 0;
           binaryLength = toRead;
+          if (cacheResult == true){
+            fieldsData = b;
+          }
+          return b;
+        } else {
+          return (byte[]) fieldsData;
         }
-
-        return (byte[]) fieldsData;
       } else
         return null;     
     }

Modified: lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/FieldsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/FieldsWriter.java?rev=966819&r1=966818&r2=966819&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/FieldsWriter.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/FieldsWriter.java Thu Jul 22 19:34:35 2010
@@ -39,6 +39,9 @@ final class FieldsWriter
   // switch to a new format!
   static final int FORMAT_CURRENT = FORMAT_LUCENE_3_0_NO_COMPRESSED_FIELDS;
   
+  // when removing support for old versions, leave the last supported version here
+  static final int FORMAT_MINIMUM = FORMAT_LUCENE_3_0_NO_COMPRESSED_FIELDS;
+  
     private FieldInfos fieldInfos;
 
     private IndexOutput fieldsStream;

Modified: lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/FilterIndexReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/FilterIndexReader.java?rev=966819&r1=966818&r2=966819&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/FilterIndexReader.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/FilterIndexReader.java Thu Jul 22 19:34:35 2010
@@ -361,20 +361,6 @@ public class FilterIndexReader extends I
     in.setNorm(d, f, b);
   }
 
-  // final to force subclass to impl flex APIs, instead
-  @Override
-  public final TermEnum terms() throws IOException {
-    ensureOpen();
-    return in.terms();
-  }
-
-  // final to force subclass to impl flex APIs, instead
-  @Override
-  public final TermEnum terms(Term t) throws IOException {
-    ensureOpen();
-    return in.terms(t);
-  }
-
   @Override
   public int docFreq(Term t) throws IOException {
     ensureOpen();
@@ -387,27 +373,6 @@ public class FilterIndexReader extends I
     return in.docFreq(field, t);
   }
 
-  // final to force subclass to impl flex APIs, instead
-  @Override
-  public final TermDocs termDocs() throws IOException {
-    ensureOpen();
-    return in.termDocs();
-  }
-
-  // final to force subclass to impl flex APIs, instead
-  @Override
-  public final TermDocs termDocs(Term term) throws IOException {
-    ensureOpen();
-    return in.termDocs(term);
-  }
-
-  // final to force subclass to impl flex APIs, instead
-  @Override
-  public final TermPositions termPositions() throws IOException {
-    ensureOpen();
-    return in.termPositions();
-  }
-
   @Override
   protected void doDelete(int n) throws  CorruptIndexException, IOException { in.deleteDocument(n); }
   

Modified: lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/IndexFileDeleter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/IndexFileDeleter.java?rev=966819&r1=966818&r2=966819&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/IndexFileDeleter.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/IndexFileDeleter.java Thu Jul 22 19:34:35 2010
@@ -102,6 +102,7 @@ final class IndexFileDeleter {
   private DocumentsWriter docWriter;
 
   final boolean startingCommitDeleted;
+  private SegmentInfos lastSegmentInfos;
 
   /** Change to true to see details of reference counts when
    *  infoStream != null */
@@ -134,8 +135,10 @@ final class IndexFileDeleter {
     this.docWriter = docWriter;
     this.infoStream = infoStream;
 
+    final String currentSegmentsFile = segmentInfos.getCurrentSegmentFileName();
+
     if (infoStream != null)
-      message("init: current segments file is \"" + segmentInfos.getCurrentSegmentFileName() + "\"; deletionPolicy=" + policy);
+      message("init: current segments file is \"" + currentSegmentsFile + "\"; deletionPolicy=" + policy);
 
     this.policy = policy;
     this.directory = directory;
@@ -146,7 +149,6 @@ final class IndexFileDeleter {
     indexFilenameFilter = new IndexFileNameFilter(codecs);
     
     CommitPoint currentCommitPoint = null;
-    boolean seenIndexFiles = false;
     String[] files = null;
     try {
       files = directory.listAll();
@@ -158,7 +160,6 @@ final class IndexFileDeleter {
     for (String fileName : files) {
 
       if ((indexFilenameFilter.accept(null, fileName)) && !fileName.endsWith("write.lock") && !fileName.equals(IndexFileNames.SEGMENTS_GEN)) {
-        seenIndexFiles = true;
         
         // Add this file to refCounts with initial count 0:
         getRefCount(fileName);
@@ -168,43 +169,51 @@ final class IndexFileDeleter {
           // This is a commit (segments or segments_N), and
           // it's valid (<= the max gen).  Load it, then
           // incref all files it refers to:
-          if (SegmentInfos.generationFromSegmentsFileName(fileName) <= currentGen) {
+          if (infoStream != null) {
+            message("init: load commit \"" + fileName + "\"");
+          }
+          SegmentInfos sis = new SegmentInfos();
+          try {
+            sis.read(directory, fileName, codecs);
+          } catch (FileNotFoundException e) {
+            // LUCENE-948: on NFS (and maybe others), if
+            // you have writers switching back and forth
+            // between machines, it's very likely that the
+            // dir listing will be stale and will claim a
+            // file segments_X exists when in fact it
+            // doesn't.  So, we catch this and handle it
+            // as if the file does not exist
             if (infoStream != null) {
-              message("init: load commit \"" + fileName + "\"");
+              message("init: hit FileNotFoundException when loading commit \"" + fileName + "\"; skipping this commit point");
             }
-            SegmentInfos sis = new SegmentInfos();
-            try {
-              sis.read(directory, fileName, codecs);
-            } catch (FileNotFoundException e) {
-              // LUCENE-948: on NFS (and maybe others), if
-              // you have writers switching back and forth
-              // between machines, it's very likely that the
-              // dir listing will be stale and will claim a
-              // file segments_X exists when in fact it
-              // doesn't.  So, we catch this and handle it
-              // as if the file does not exist
-              if (infoStream != null) {
-                message("init: hit FileNotFoundException when loading commit \"" + fileName + "\"; skipping this commit point");
-              }
+            sis = null;
+          } catch (IOException e) {
+            if (SegmentInfos.generationFromSegmentsFileName(fileName) <= currentGen) {
+              throw e;
+            } else {
+              // Most likely we are opening an index that
+              // has an aborted "future" commit, so suppress
+              // exc in this case
               sis = null;
             }
-            if (sis != null) {
-              CommitPoint commitPoint = new CommitPoint(commitsToDelete, directory, sis);
-              if (sis.getGeneration() == segmentInfos.getGeneration()) {
-                currentCommitPoint = commitPoint;
-              }
-              commits.add(commitPoint);
-              incRef(sis, true);
+          }
+          if (sis != null) {
+            CommitPoint commitPoint = new CommitPoint(commitsToDelete, directory, sis);
+            if (sis.getGeneration() == segmentInfos.getGeneration()) {
+              currentCommitPoint = commitPoint;
+            }
+            commits.add(commitPoint);
+            incRef(sis, true);
+
+            if (lastSegmentInfos == null || sis.getGeneration() > lastSegmentInfos.getGeneration()) {
+              lastSegmentInfos = sis;
             }
           }
         }
       }
     }
 
-    // If we haven't seen any Lucene files, then currentCommitPoint is expected
-    // to be null, because it means it's a fresh Directory. Therefore it cannot
-    // be any NFS cache issues - so just ignore.
-    if (currentCommitPoint == null && seenIndexFiles) {
+    if (currentCommitPoint == null && currentSegmentsFile != null) {
       // We did not in fact see the segments_N file
       // corresponding to the segmentInfos that was passed
       // in.  Yet, it must exist, because our caller holds
@@ -214,7 +223,7 @@ final class IndexFileDeleter {
       // try now to explicitly open this commit point:
       SegmentInfos sis = new SegmentInfos();
       try {
-        sis.read(directory, segmentInfos.getCurrentSegmentFileName(), codecs);
+        sis.read(directory, currentSegmentsFile, codecs);
       } catch (IOException e) {
         throw new CorruptIndexException("failed to locate current segments_N file");
       }
@@ -244,7 +253,7 @@ final class IndexFileDeleter {
 
     // Finally, give policy a chance to remove things on
     // startup:
-    if (seenIndexFiles) {
+    if (currentSegmentsFile != null) {
       policy.onInit(commits);
     }
 
@@ -257,6 +266,10 @@ final class IndexFileDeleter {
     deleteCommits();
   }
 
+  public SegmentInfos getLastSegmentInfos() {
+    return lastSegmentInfos;
+  }
+
   /**
    * Remove the CommitPoints in the commitsToDelete List by
    * DecRef'ing all files from each SegmentInfos.

Modified: lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/IndexReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/IndexReader.java?rev=966819&r1=966818&r2=966819&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/IndexReader.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/IndexReader.java Thu Jul 22 19:34:35 2010
@@ -868,18 +868,6 @@ public abstract class IndexReader implem
     setNorm(doc, field, Similarity.getDefault().encodeNormValue(value));
   }
 
-  /** Returns an enumeration of all the terms in the index. The
-   * enumeration is ordered by Term.compareTo(). Each term is greater
-   * than all that precede it in the enumeration. Note that after
-   * calling terms(), {@link TermEnum#next()} must be called
-   * on the resulting enumeration before calling other methods such as
-   * {@link TermEnum#term()}.
-   * @deprecated Use the new flex API ({@link #fields()}) instead.
-   * @throws IOException if there is a low-level IO error
-   */
-  @Deprecated
-  public abstract TermEnum terms() throws IOException;
-
   /** Flex API: returns {@link Fields} for this reader.
    *  This method may return null if the reader has no
    *  postings.
@@ -893,24 +881,10 @@ public abstract class IndexReader implem
    * using {@link ReaderUtil#gatherSubReaders} and iterate
    * through them yourself. */
   public abstract Fields fields() throws IOException;
-  
-  /** Returns an enumeration of all terms starting at a given term. If
-   * the given term does not exist, the enumeration is positioned at the
-   * first term greater than the supplied term. The enumeration is
-   * ordered by Term.compareTo(). Each term is greater than all that
-   * precede it in the enumeration.
-   * @deprecated Use the new flex API ({@link #fields()}) instead.
-   * @throws IOException if there is a low-level IO error
-   */
-  @Deprecated
-  public abstract TermEnum terms(Term t) throws IOException;
 
-  /** Returns the number of documents containing the term <code>t</code>.
-   * @throws IOException if there is a low-level IO error
-   * @deprecated Use {@link #docFreq(String,BytesRef)} instead.
-   */
-  @Deprecated
-  public abstract int docFreq(Term t) throws IOException;
+  public int docFreq(Term term) throws IOException {
+    return docFreq(term.field(), term.bytes());
+  }
 
   /** Returns the number of documents containing the term
    * <code>t</code>.  This method returns 0 if the term or
@@ -929,28 +903,6 @@ public abstract class IndexReader implem
     return terms.docFreq(term);
   }
 
-  /** Returns an enumeration of all the documents which contain
-   * <code>term</code>. For each document, the document number, the frequency of
-   * the term in that document is also provided, for use in
-   * search scoring.  If term is null, then all non-deleted
-   * docs are returned with freq=1.
-   * Thus, this method implements the mapping:
-   * <p><ul>
-   * Term &nbsp;&nbsp; =&gt; &nbsp;&nbsp; &lt;docNum, freq&gt;<sup>*</sup>
-   * </ul>
-   * <p>The enumeration is ordered by document number.  Each document number
-   * is greater than all that precede it in the enumeration.
-   * @deprecated Use the new flex API ({@link #termDocsEnum}) instead.
-   * @throws IOException if there is a low-level IO error
-   */
-  @Deprecated
-  public TermDocs termDocs(Term term) throws IOException {
-    ensureOpen();
-    TermDocs termDocs = termDocs();
-    termDocs.seek(term);
-    return termDocs;
-  }
-
   /** This may return null if the field does not exist.*/
   public Terms terms(String field) throws IOException {
     final Fields fields = fields();
@@ -997,50 +949,6 @@ public abstract class IndexReader implem
     }
   }
 
-  /** Returns an unpositioned {@link TermDocs} enumerator.
-   * @deprecated Use the new flex API ({@link #fields()}) instead.
-   * @throws IOException if there is a low-level IO error
-   */
-  @Deprecated
-  public abstract TermDocs termDocs() throws IOException;
-
-  /** Returns an enumeration of all the documents which contain
-   * <code>term</code>.  For each document, in addition to the document number
-   * and frequency of the term in that document, a list of all of the ordinal
-   * positions of the term in the document is available.  Thus, this method
-   * implements the mapping:
-   *
-   * <p><ul>
-   * Term &nbsp;&nbsp; =&gt; &nbsp;&nbsp; &lt;docNum, freq,
-   * &lt;pos<sub>1</sub>, pos<sub>2</sub>, ...
-   * pos<sub>freq-1</sub>&gt;
-   * &gt;<sup>*</sup>
-   * </ul>
-   * <p> This positional information facilitates phrase and proximity searching.
-   * <p>The enumeration is ordered by document number.  Each document number is
-   * greater than all that precede it in the enumeration.
-   * @deprecated Please switch the flex API ({@link
-   * #termDocsEnum}) instead
-   * @throws IOException if there is a low-level IO error
-   */
-  @Deprecated
-  public TermPositions termPositions(Term term) throws IOException {
-    ensureOpen();
-    TermPositions termPositions = termPositions();
-    termPositions.seek(term);
-    return termPositions;
-  }
-
-  /** Returns an unpositioned {@link TermPositions} enumerator.
-   * @deprecated Please switch the flex API ({@link
-   * #termDocsEnum}) instead
-   * @throws IOException if there is a low-level IO error
-   */
-  @Deprecated
-  public abstract TermPositions termPositions() throws IOException;
-
-
-
   /** Deletes the document numbered <code>docNum</code>.  Once a document is
    * deleted it will not appear in TermDocs or TermPositions enumerations.
    * Attempts to read its field with the {@link #document}
@@ -1089,16 +997,16 @@ public abstract class IndexReader implem
    */
   public int deleteDocuments(Term term) throws StaleReaderException, CorruptIndexException, LockObtainFailedException, IOException {
     ensureOpen();
-    TermDocs docs = termDocs(term);
+    DocsEnum docs = MultiFields.getTermDocsEnum(this,
+                                                MultiFields.getDeletedDocs(this),
+                                                term.field(),
+                                                term.bytes());
     if (docs == null) return 0;
     int n = 0;
-    try {
-      while (docs.next()) {
-        deleteDocument(docs.doc());
-        n++;
-      }
-    } finally {
-      docs.close();
+    int doc;
+    while ((doc = docs.nextDoc()) != docs.NO_MORE_DOCS) {
+      deleteDocument(doc);
+      n++;
     }
     return n;
   }
@@ -1206,22 +1114,6 @@ public abstract class IndexReader implem
    */
   public abstract Collection<String> getFieldNames(FieldOption fldOption);
 
-  // Only used by external subclasses of IndexReader; all
-  // internal classes should implement Bits more
-  // efficiently:
-  /*
-  private final class DeletedDocsBits implements Bits {
-    public boolean get(int docID) {
-      return isDeleted(docID);
-    }
-    public int length() {
-      return maxDoc();
-    }
-  }
-
-  private Bits deletedDocsBits;
-  */
-
   /** Returns the {@link Bits} representing deleted docs.  A
    *  set bit indicates the doc ID has been deleted.  This
    *  method should return null when there are no deleted

Modified: lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/MultiDocsAndPositionsEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/MultiDocsAndPositionsEnum.java?rev=966819&r1=966818&r2=966819&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/MultiDocsAndPositionsEnum.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/MultiDocsAndPositionsEnum.java Thu Jul 22 19:34:35 2010
@@ -40,7 +40,12 @@ public final class MultiDocsAndPositions
 
   MultiDocsAndPositionsEnum reset(final EnumWithSlice[] subs, final int numSubs) throws IOException {
     this.numSubs = numSubs;
-    this.subs = subs;
+    this.subs = new EnumWithSlice[subs.length];
+    for(int i=0;i<subs.length;i++) {
+      this.subs[i] = new EnumWithSlice();
+      this.subs[i].docsAndPositionsEnum = subs[i].docsAndPositionsEnum;
+      this.subs[i].slice = subs[i].slice;
+    }
     upto = -1;
     current = null;
     return this;

Modified: lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/MultiDocsEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/MultiDocsEnum.java?rev=966819&r1=966818&r2=966819&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/MultiDocsEnum.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/MultiDocsEnum.java Thu Jul 22 19:34:35 2010
@@ -38,7 +38,13 @@ public final class MultiDocsEnum extends
 
   MultiDocsEnum reset(final EnumWithSlice[] subs, final int numSubs) throws IOException {
     this.numSubs = numSubs;
-    this.subs = subs;
+
+    this.subs = new EnumWithSlice[subs.length];
+    for(int i=0;i<subs.length;i++) {
+      this.subs[i] = new EnumWithSlice();
+      this.subs[i].docsEnum = subs[i].docsEnum;
+      this.subs[i].slice = subs[i].slice;
+    }
     upto = -1;
     current = null;
     return this;

Modified: lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/MultiReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/MultiReader.java?rev=966819&r1=966818&r2=966819&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/MultiReader.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/MultiReader.java Thu Jul 22 19:34:35 2010
@@ -25,9 +25,6 @@ import java.util.Map;
 
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.FieldSelector;
-import org.apache.lucene.index.DirectoryReader.MultiTermDocs;       // deprecated
-import org.apache.lucene.index.DirectoryReader.MultiTermEnum;       // deprecated
-import org.apache.lucene.index.DirectoryReader.MultiTermPositions;  // deprecated
 import org.apache.lucene.search.Similarity;
 import org.apache.lucene.search.FieldCache; // not great (circular); used only to purge FieldCache entry on close
 import org.apache.lucene.util.Bits;
@@ -370,28 +367,6 @@ public class MultiReader extends IndexRe
   }
 
   @Override
-  public TermEnum terms() throws IOException {
-    ensureOpen();
-    if (subReaders.length == 1) {
-      // Optimize single segment case:
-      return subReaders[0].terms();
-    } else {
-      return new MultiTermEnum(this, subReaders, starts, null);
-    }
-  }
-
-  @Override
-  public TermEnum terms(Term term) throws IOException {
-    ensureOpen();
-    if (subReaders.length == 1) {
-      // Optimize single segment case:
-      return subReaders[0].terms(term);
-    } else {
-      return new MultiTermEnum(this, subReaders, starts, term);
-    }
-  }
-
-  @Override
   public int docFreq(Term t) throws IOException {
     ensureOpen();
     int total = 0;          // sum freqs in segments
@@ -411,39 +386,6 @@ public class MultiReader extends IndexRe
   }
   
   @Override
-  public TermDocs termDocs() throws IOException {
-    ensureOpen();
-    if (subReaders.length == 1) {
-      // Optimize single segment case:
-      return subReaders[0].termDocs();
-    } else {
-      return new MultiTermDocs(this, subReaders, starts);
-    }
-  }
-
-  @Override
-  public TermDocs termDocs(Term term) throws IOException {
-    ensureOpen();
-    if (subReaders.length == 1) {
-      // Optimize single segment case:
-      return subReaders[0].termDocs(term);
-    } else {
-      return super.termDocs(term);
-    }
-  }
-
-  @Override
-  public TermPositions termPositions() throws IOException {
-    ensureOpen();
-    if (subReaders.length == 1) {
-      // Optimize single segment case:
-      return subReaders[0].termPositions();
-    } else {
-      return new MultiTermPositions(this, subReaders, starts);
-    }
-  }
-
-  @Override
   protected void doCommit(Map<String,String> commitUserData) throws IOException {
     for (int i = 0; i < subReaders.length; i++)
       subReaders[i].commit(commitUserData);

Modified: lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/MultiTermsEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/MultiTermsEnum.java?rev=966819&r1=966818&r2=966819&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/MultiTermsEnum.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/MultiTermsEnum.java Thu Jul 22 19:34:35 2010
@@ -42,6 +42,9 @@ public final class MultiTermsEnum extend
   private final MultiDocsEnum.EnumWithSlice[] subDocs;
   private final MultiDocsAndPositionsEnum.EnumWithSlice[] subDocsAndPositions;
 
+  private BytesRef lastSeek;
+  private final BytesRef lastSeekScratch = new BytesRef();
+
   private int numTop;
   private int numSubs;
   private BytesRef current;
@@ -139,8 +142,40 @@ public final class MultiTermsEnum extend
   public SeekStatus seek(BytesRef term, boolean useCache) throws IOException {
     queue.clear();
     numTop = 0;
+
+    boolean seekOpt = false;
+    if (lastSeek != null && termComp.compare(lastSeek, term) <= 0) {
+      seekOpt = true;
+    }
+    lastSeekScratch.copy(term);
+    lastSeek = lastSeekScratch;
+
     for(int i=0;i<numSubs;i++) {
-      final SeekStatus status = currentSubs[i].terms.seek(term, useCache);
+      final SeekStatus status;
+      // LUCENE-2130: if we had just seek'd already, prior
+      // to this seek, and the new seek term is after the
+      // previous one, don't try to re-seek this sub if its
+      // current term is already beyond this new seek term.
+      // Doing so is a waste because this sub will simply
+      // seek to the same spot.
+      if (seekOpt) {
+        final BytesRef curTerm = currentSubs[i].current;
+        if (curTerm != null) {
+          final int cmp = termComp.compare(term, curTerm);
+          if (cmp == 0) {
+            status = SeekStatus.FOUND;
+          } else if (cmp < 0) {
+            status = SeekStatus.NOT_FOUND;
+          } else {
+            status = currentSubs[i].terms.seek(term, useCache);
+          }
+        } else {
+          status = SeekStatus.END;
+        }
+      } else {
+        status = currentSubs[i].terms.seek(term, useCache);
+      }
+
       if (status == SeekStatus.FOUND) {
         top[numTop++] = currentSubs[i];
         current = currentSubs[i].current = currentSubs[i].terms.term();
@@ -150,6 +185,7 @@ public final class MultiTermsEnum extend
         queue.add(currentSubs[i]);
       } else {
         // enum exhausted
+        currentSubs[i].current = null;
       }
     }
 
@@ -205,6 +241,8 @@ public final class MultiTermsEnum extend
 
   @Override
   public BytesRef next() throws IOException {
+    lastSeek = null;
+
     // restore queue
     pushTop();
 
@@ -272,9 +310,9 @@ public final class MultiTermsEnum extend
         b = null;
       }
 
-      final DocsEnum subDocsEnum = entry.terms.docs(b, entry.reuseDocs);
+      final DocsEnum subDocsEnum = entry.terms.docs(b, null);
       if (subDocsEnum != null) {
-        entry.reuseDocs = subDocs[upto].docsEnum = subDocsEnum;
+        subDocs[upto].docsEnum = subDocsEnum;
         subDocs[upto].slice = entry.subSlice;
 
         upto++;
@@ -334,14 +372,14 @@ public final class MultiTermsEnum extend
         b = null;
       }
 
-      final DocsAndPositionsEnum subPostings = entry.terms.docsAndPositions(b, entry.reusePostings);
+      final DocsAndPositionsEnum subPostings = entry.terms.docsAndPositions(b, null);
 
       if (subPostings != null) {
-        entry.reusePostings = subDocsAndPositions[upto].docsAndPositionsEnum = subPostings;
+        subDocsAndPositions[upto].docsAndPositionsEnum = subPostings;
         subDocsAndPositions[upto].slice = entry.subSlice;
         upto++;
       } else {
-        if (entry.terms.docs(b, entry.reuseDocs) != null) {
+        if (entry.terms.docs(b, null) != null) {
           // At least one of our subs does not store
           // positions -- we can't correctly produce a
           // MultiDocsAndPositions enum
@@ -360,8 +398,6 @@ public final class MultiTermsEnum extend
   private final static class TermsEnumWithSlice {
     private final ReaderUtil.Slice subSlice;
     private TermsEnum terms;
-    private DocsEnum reuseDocs;
-    private DocsAndPositionsEnum reusePostings;
     public BytesRef current;
 
     public TermsEnumWithSlice(ReaderUtil.Slice subSlice) {
@@ -372,9 +408,6 @@ public final class MultiTermsEnum extend
     public void reset(TermsEnum terms, BytesRef term) {
       this.terms = terms;
       current = term;
-      // TODO: can we not null these?
-      reuseDocs = null;
-      reusePostings = null;
     }
   }
 

Modified: lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/ParallelReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/ParallelReader.java?rev=966819&r1=966818&r2=966819&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/ParallelReader.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/ParallelReader.java Thu Jul 22 19:34:35 2010
@@ -452,18 +452,6 @@ public class ParallelReader extends Inde
   }
 
   @Override
-  public TermEnum terms() throws IOException {
-    ensureOpen();
-    return new ParallelTermEnum();
-  }
-
-  @Override
-  public TermEnum terms(Term term) throws IOException {
-    ensureOpen();
-    return new ParallelTermEnum(term);
-  }
-
-  @Override
   public int docFreq(Term term) throws IOException {
     ensureOpen();
     IndexReader reader = fieldToReader.get(term.field());
@@ -477,30 +465,6 @@ public class ParallelReader extends Inde
     return reader == null? 0 : reader.docFreq(field, term);
   }
 
-  @Override
-  public TermDocs termDocs(Term term) throws IOException {
-    ensureOpen();
-    return new ParallelTermDocs(term);
-  }
-
-  @Override
-  public TermDocs termDocs() throws IOException {
-    ensureOpen();
-    return new ParallelTermDocs();
-  }
-
-  @Override
-  public TermPositions termPositions(Term term) throws IOException {
-    ensureOpen();
-    return new ParallelTermPositions(term);
-  }
-
-  @Override
-  public TermPositions termPositions() throws IOException {
-    ensureOpen();
-    return new ParallelTermPositions();
-  }
-  
   /**
    * Checks recursively if all subreaders are up to date. 
    */
@@ -574,168 +538,6 @@ public class ParallelReader extends Inde
     }
     return fieldSet;
   }
-
-  @Deprecated
-  private class ParallelTermEnum extends TermEnum {
-    private String field;
-    private Iterator<String> fieldIterator;
-    private TermEnum termEnum;
-
-    public ParallelTermEnum() throws IOException {
-      try {
-        field = fieldToReader.firstKey();
-      } catch(NoSuchElementException e) {
-        // No fields, so keep field == null, termEnum == null
-        return;
-      }
-      if (field != null)
-        termEnum = fieldToReader.get(field).terms();
-    }
-
-    public ParallelTermEnum(Term term) throws IOException {
-      field = term.field();
-      IndexReader reader = fieldToReader.get(field);
-      if (reader!=null)
-        termEnum = reader.terms(term);
-    }
-
-    @Override
-    public boolean next() throws IOException {
-      if (termEnum==null)
-        return false;
-
-      // another term in this field?
-      if (termEnum.next() && termEnum.term().field()==field)
-        return true;                              // yes, keep going
-
-      termEnum.close();                           // close old termEnum
-
-      // find the next field with terms, if any
-      if (fieldIterator==null) {
-        fieldIterator = fieldToReader.tailMap(field).keySet().iterator();
-        fieldIterator.next();                     // Skip field to get next one
-      }
-      while (fieldIterator.hasNext()) {
-        field = fieldIterator.next();
-        termEnum = fieldToReader.get(field).terms(new Term(field));
-        Term term = termEnum.term();
-        if (term!=null && term.field()==field)
-          return true;
-        else
-          termEnum.close();
-      }
- 
-      return false;                               // no more fields
-    }
-
-    @Override
-    public Term term() {
-      if (termEnum==null)
-        return null;
-
-      return termEnum.term();
-    }
-
-    @Override
-    public int docFreq() {
-      if (termEnum==null)
-        return 0;
-
-      return termEnum.docFreq();
-    }
-
-    @Override
-    public void close() throws IOException {
-      if (termEnum!=null)
-        termEnum.close();
-    }
-
-  }
-
-  // wrap a TermDocs in order to support seek(Term)
-  private class ParallelTermDocs implements TermDocs {
-    protected TermDocs termDocs;
-
-    public ParallelTermDocs() {}
-    public ParallelTermDocs(Term term) throws IOException {
-      if (term == null)
-        termDocs = readers.isEmpty() ? null : readers.get(0).termDocs(null);
-      else
-        seek(term);
-    }
-
-    public int doc() { return termDocs.doc(); }
-    public int freq() { return termDocs.freq(); }
-
-    public void seek(Term term) throws IOException {
-      IndexReader reader = fieldToReader.get(term.field());
-      termDocs = reader!=null ? reader.termDocs(term) : null;
-    }
-
-    public void seek(TermEnum termEnum) throws IOException {
-      seek(termEnum.term());
-    }
-
-    public boolean next() throws IOException {
-      if (termDocs==null)
-        return false;
-
-      return termDocs.next();
-    }
-
-    public int read(final int[] docs, final int[] freqs) throws IOException {
-      if (termDocs==null)
-        return 0;
-
-      return termDocs.read(docs, freqs);
-    }
-
-    public boolean skipTo(int target) throws IOException {
-      if (termDocs==null)
-        return false;
-
-      return termDocs.skipTo(target);
-    }
-
-    public void close() throws IOException {
-      if (termDocs!=null)
-        termDocs.close();
-    }
-
-  }
-
-  private class ParallelTermPositions
-    extends ParallelTermDocs implements TermPositions {
-
-    public ParallelTermPositions() {}
-    public ParallelTermPositions(Term term) throws IOException { seek(term); }
-
-    @Override
-    public void seek(Term term) throws IOException {
-      IndexReader reader = fieldToReader.get(term.field());
-      termDocs = reader!=null ? reader.termPositions(term) : null;
-    }
-
-    public int nextPosition() throws IOException {
-      // It is an error to call this if there is no next position, e.g. if termDocs==null
-      return ((TermPositions)termDocs).nextPosition();
-    }
-
-    public int getPayloadLength() throws IOException {
-      return ((TermPositions)termDocs).getPayloadLength();
-    }
-
-    public byte[] getPayload(byte[] data, int offset) throws IOException {
-      return ((TermPositions)termDocs).getPayload(data, offset);
-    }
-
-
-    // TODO: Remove warning after API has been finalized
-    public boolean isPayloadAvailable() {
-      return ((TermPositions) termDocs).isPayloadAvailable();
-    }
-  }
-
 }
 
 

Modified: lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/PositionBasedTermVectorMapper.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/PositionBasedTermVectorMapper.java?rev=966819&r1=966818&r2=966819&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/PositionBasedTermVectorMapper.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/PositionBasedTermVectorMapper.java Thu Jul 22 19:34:35 2010
@@ -21,6 +21,8 @@ import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 
+import org.apache.lucene.util.BytesRef;
+
 /**
  * For each Field, store position by position information.  It ignores frequency information
  * <p/>
@@ -69,7 +71,7 @@ public class PositionBasedTermVectorMapp
    * @param positions
    */
   @Override
-  public void map(String term, int frequency, TermVectorOffsetInfo[] offsets, int[] positions) {
+  public void map(BytesRef term, int frequency, TermVectorOffsetInfo[] offsets, int[] positions) {
     for (int i = 0; i < positions.length; i++) {
       Integer posVal = Integer.valueOf(positions[i]);
       TVPositionInfo pos = currentPositions.get(posVal);
@@ -120,20 +122,20 @@ public class PositionBasedTermVectorMapp
   public static class TVPositionInfo{
     private int position;
 
-    private List<String> terms;
+    private List<BytesRef> terms;
 
     private List<TermVectorOffsetInfo> offsets;
 
 
     public TVPositionInfo(int position, boolean storeOffsets) {
       this.position = position;
-      terms = new ArrayList<String>();
+      terms = new ArrayList<BytesRef>();
       if (storeOffsets) {
         offsets = new ArrayList<TermVectorOffsetInfo>();
       }
     }
 
-    void addTerm(String term, TermVectorOffsetInfo info)
+    void addTerm(BytesRef term, TermVectorOffsetInfo info)
     {
       terms.add(term);
       if (offsets != null) {
@@ -151,9 +153,9 @@ public class PositionBasedTermVectorMapp
 
     /**
      * Note, there may be multiple terms at the same position
-     * @return A List of Strings
+     * @return A List of BytesRefs
      */
-    public List<String> getTerms() {
+    public List<BytesRef> getTerms() {
       return terms;
     }
 

Modified: lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/SegmentInfo.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/SegmentInfo.java?rev=966819&r1=966818&r2=966819&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/SegmentInfo.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/SegmentInfo.java Thu Jul 22 19:34:35 2010
@@ -22,6 +22,7 @@ import org.apache.lucene.store.IndexOutp
 import org.apache.lucene.store.IndexInput;
 import org.apache.lucene.index.codecs.Codec;
 import org.apache.lucene.index.codecs.CodecProvider;
+import org.apache.lucene.index.codecs.DefaultSegmentInfosWriter;
 import java.io.IOException;
 import java.util.Arrays;
 import java.util.List;
@@ -154,12 +155,14 @@ public final class SegmentInfo {
   /**
    * Construct a new SegmentInfo instance by reading a
    * previously saved SegmentInfo from input.
+   * <p>Note: this is public only to allow access from
+   * the codecs package.</p>
    *
    * @param dir directory to load from
    * @param format format of the segments info file
    * @param input input handle to read segment info from
    */
-  SegmentInfo(Directory dir, int format, IndexInput input, CodecProvider codecs) throws IOException {
+  public SegmentInfo(Directory dir, int format, IndexInput input, CodecProvider codecs) throws IOException {
     this.dir = dir;
     name = input.readString();
     docCount = input.readInt();
@@ -173,9 +176,10 @@ public final class SegmentInfo {
       docStoreSegment = name;
       docStoreIsCompoundFile = false;
     }
-    if (format > SegmentInfos.FORMAT_4_0) {
+    if (format > DefaultSegmentInfosWriter.FORMAT_4_0) {
       // pre-4.0 indexes write a byte if there is a single norms file
-      assert 1 == input.readByte();
+      byte b = input.readByte();
+      assert 1 == b;
     }
     int numNormGen = input.readInt();
     if (numNormGen == NO) {
@@ -195,7 +199,7 @@ public final class SegmentInfo {
     
     // System.out.println(Thread.currentThread().getName() + ": si.read hasProx=" + hasProx + " seg=" + name);
     
-    if (format <= SegmentInfos.FORMAT_4_0)
+    if (format <= DefaultSegmentInfosWriter.FORMAT_4_0)
       codecName = input.readString();
     else
       codecName = "PreFlex";
@@ -393,7 +397,7 @@ public final class SegmentInfo {
   }
   
   /** Save this segment's info. */
-  void write(IndexOutput output)
+  public void write(IndexOutput output)
     throws IOException {
     assert delCount <= docCount: "delCount=" + delCount + " docCount=" + docCount + " segment=" + name;
     output.writeString(name);

Modified: lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/SegmentInfos.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/SegmentInfos.java?rev=966819&r1=966818&r2=966819&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/SegmentInfos.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/SegmentInfos.java Thu Jul 22 19:34:35 2010
@@ -20,10 +20,10 @@ package org.apache.lucene.index;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.IndexInput;
 import org.apache.lucene.store.IndexOutput;
-import org.apache.lucene.store.ChecksumIndexOutput;
-import org.apache.lucene.store.ChecksumIndexInput;
 import org.apache.lucene.store.NoSuchDirectoryException;
 import org.apache.lucene.index.codecs.CodecProvider;
+import org.apache.lucene.index.codecs.SegmentInfosReader;
+import org.apache.lucene.index.codecs.SegmentInfosWriter;
 import org.apache.lucene.util.ThreadInterruptedException;
 
 import java.io.FileNotFoundException;
@@ -53,40 +53,40 @@ public final class SegmentInfos extends 
    * be removed, however the numbers should continue to decrease. 
    */
 
-  /** Used for the segments.gen file only! */
+  /** Used for the segments.gen file only!
+   * Whenever you add a new format, make it 1 smaller (negative version logic)! */
   public static final int FORMAT_SEGMENTS_GEN_CURRENT = -2;
-  
-  /** This format adds optional per-segment String
-   *  diagnostics storage, and switches userData to Map */
-  public static final int FORMAT_DIAGNOSTICS = -9;
-
-  /** Each segment records whether its postings are written
-   *  in the new flex format */
-  public static final int FORMAT_4_0 = -10;
-
-  /* This must always point to the most recent file format. */
-  static final int CURRENT_FORMAT = FORMAT_4_0;
-  
+    
   public int counter = 0;    // used to name new segments
   
   /**
    * counts how often the index has been changed by adding or deleting docs.
    * starting with the current time in milliseconds forces to create unique version numbers.
    */
-  private long version = System.currentTimeMillis();
+  public long version = System.currentTimeMillis();
 
   private long generation = 0;     // generation of the "segments_N" for the next commit
   private long lastGeneration = 0; // generation of the "segments_N" file we last successfully read
                                    // or wrote; this is normally the same as generation except if
                                    // there was an IOException that had interrupted a commit
 
-  private Map<String,String> userData = Collections.<String,String>emptyMap();       // Opaque Map<String, String> that user can specify during IndexWriter.commit
+  public Map<String,String> userData = Collections.<String,String>emptyMap();       // Opaque Map<String, String> that user can specify during IndexWriter.commit
+  
+  private CodecProvider codecs;
 
   /**
    * If non-null, information about loading segments_N files
    * will be printed here.  @see #setInfoStream.
    */
   private static PrintStream infoStream;
+  
+  public SegmentInfos() {
+    this(CodecProvider.getDefault());
+  }
+  
+  public SegmentInfos(CodecProvider codecs) {
+    this.codecs = codecs;
+  }
 
   public final SegmentInfo info(int i) {
     return get(i);
@@ -205,42 +205,22 @@ public final class SegmentInfos extends 
    */
   public final void read(Directory directory, String segmentFileName, 
                          CodecProvider codecs) throws CorruptIndexException, IOException {
+    this.codecs = codecs;
     boolean success = false;
 
     // Clear any previous segments:
     clear();
 
-    ChecksumIndexInput input = new ChecksumIndexInput(directory.openInput(segmentFileName));
-
     generation = generationFromSegmentsFileName(segmentFileName);
 
     lastGeneration = generation;
 
     try {
-      int format = input.readInt();
-
-      // check that it is a format we can understand
-      if (format < CURRENT_FORMAT)
-        throw new CorruptIndexException("Unknown (newer than us?) format version: " + format);
-
-      version = input.readLong(); // read version
-      counter = input.readInt(); // read counter
-      
-      for (int i = input.readInt(); i > 0; i--) { // read segmentInfos
-        add(new SegmentInfo(directory, format, input, codecs));
-      }
-      
-      userData = input.readStringStringMap();
-
-      final long checksumNow = input.getChecksum();
-      final long checksumThen = input.readLong();
-      if (checksumNow != checksumThen)
-        throw new CorruptIndexException("checksum mismatch in segments file");
-
+      SegmentInfosReader infosReader = codecs.getSegmentInfosReader();
+      infosReader.read(directory, segmentFileName, codecs, this);
       success = true;
     }
     finally {
-      input.close();
       if (!success) {
         // Clear any segment infos we had loaded so we
         // have a clean slate on retry:
@@ -261,6 +241,7 @@ public final class SegmentInfos extends 
   
   public final void read(Directory directory, final CodecProvider codecs) throws CorruptIndexException, IOException {
     generation = lastGeneration = -1;
+    this.codecs = codecs;
 
     new FindSegmentsFile(directory) {
 
@@ -274,7 +255,7 @@ public final class SegmentInfos extends 
 
   // Only non-null after prepareCommit has been called and
   // before finishCommit is called
-  ChecksumIndexOutput pendingSegnOutput;
+  IndexOutput pendingSegnOutput;
 
   private void write(Directory directory) throws IOException {
 
@@ -287,21 +268,14 @@ public final class SegmentInfos extends 
       generation++;
     }
 
-    ChecksumIndexOutput segnOutput = new ChecksumIndexOutput(directory.createOutput(segmentFileName));
+    IndexOutput segnOutput = null;
 
     boolean success = false;
 
     try {
-      segnOutput.writeInt(CURRENT_FORMAT); // write FORMAT
-      segnOutput.writeLong(++version); // every write changes
-                                   // the index
-      segnOutput.writeInt(counter); // write counter
-      segnOutput.writeInt(size()); // write infos
-      for (SegmentInfo si : this) {
-        si.write(segnOutput);
-      }
-      segnOutput.writeStringStringMap(userData);
-      segnOutput.prepareCommit();
+      SegmentInfosWriter infosWriter = codecs.getSegmentInfosWriter();
+      segnOutput = infosWriter.writeInfos(directory, segmentFileName, this);
+      infosWriter.prepareCommit(segnOutput);
       success = true;
       pendingSegnOutput = segnOutput;
     } finally {
@@ -572,9 +546,16 @@ public final class SegmentInfos extends 
                     genB = gen0;
                     break;
                   }
+                } else {
+                  /* TODO: Investigate this! 
+                  throw new IndexFormatTooNewException("segments.gen version number invalid: " + version +
+                    " (must be " + FORMAT_SEGMENTS_GEN_CURRENT + ")");
+                  */
                 }
               } catch (IOException err2) {
-                // will retry
+                // rethrow any format exception
+                if (err2 instanceof CorruptIndexException) throw err2;
+                // else will retry
               } finally {
                 genInput.close();
               }
@@ -785,8 +766,8 @@ public final class SegmentInfos extends 
       throw new IllegalStateException("prepareCommit was not called");
     boolean success = false;
     try {
-      pendingSegnOutput.finishCommit();
-      pendingSegnOutput.close();
+      SegmentInfosWriter infosWriter = codecs.getSegmentInfosWriter();
+      infosWriter.finishCommit(pendingSegnOutput);
       pendingSegnOutput = null;
       success = true;
     } finally {

Modified: lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/SegmentReadState.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/SegmentReadState.java?rev=966819&r1=966818&r2=966819&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/SegmentReadState.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/SegmentReadState.java Thu Jul 22 19:34:35 2010
@@ -27,6 +27,12 @@ public class SegmentReadState {
   public final SegmentInfo segmentInfo;
   public final FieldInfos fieldInfos;
   public final int readBufferSize;
+
+  // NOTE: if this is < 0, that means "defer terms index
+  // load until needed".  But if the codec must load the
+  // terms index on init (preflex is the only once currently
+  // that must do so), then it should negate this value to
+  // get the app's terms divisor:
   public final int termsIndexDivisor;
 
   public SegmentReadState(Directory dir,

Modified: lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/SegmentReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/SegmentReader.java?rev=966819&r1=966818&r2=966819&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/SegmentReader.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/SegmentReader.java Thu Jul 22 19:34:35 2010
@@ -39,11 +39,7 @@ import org.apache.lucene.store.IndexOutp
 import org.apache.lucene.util.BitVector;
 import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.CloseableThreadLocal;
-import org.apache.lucene.util.UnicodeUtil;
 import org.apache.lucene.index.codecs.CodecProvider;
-import org.apache.lucene.index.codecs.preflex.PreFlexFields;
-import org.apache.lucene.index.codecs.preflex.SegmentTermDocs;
-import org.apache.lucene.index.codecs.preflex.SegmentTermPositions;
 import org.apache.lucene.index.codecs.FieldsProducer;
 import org.apache.lucene.search.FieldCache; // not great (circular); used only to purge FieldCache entry on close
 import org.apache.lucene.util.BytesRef;
@@ -93,7 +89,6 @@ public class SegmentReader extends Index
     final FieldInfos fieldInfos;
 
     final FieldsProducer fields;
-    final boolean isPreFlex;
     final CodecProvider codecs;
     
     final Directory dir;
@@ -140,7 +135,6 @@ public class SegmentReader extends Index
         fields = si.getCodec().fieldsProducer(new SegmentReadState(cfsDir, si, fieldInfos, readBufferSize, termsIndexDivisor));
         assert fields != null;
 
-        isPreFlex = fields instanceof PreFlexFields;
         success = true;
       } finally {
         if (!success) {
@@ -824,37 +818,6 @@ public class SegmentReader extends Index
     return new ArrayList<String>(si.files());
   }
   
-  @Override
-  public TermEnum terms() throws IOException {
-    ensureOpen();
-    if (core.isPreFlex) {
-      // For old API on an old segment, instead of
-      // converting old API -> new API -> old API, just give
-      // direct access to old:
-      return ((PreFlexFields) core.fields).tis.terms();
-    } else {
-      // Emulate pre-flex API on top of flex index
-      return new LegacyTermEnum(null);
-    }
-  }
-
-  /** @deprecated Please switch to the flex API ({@link
-   * #fields}) instead. */
-  @Deprecated
-  @Override
-  public TermEnum terms(Term t) throws IOException {
-    ensureOpen();
-    if (core.isPreFlex) {
-      // For old API on an old segment, instead of
-      // converting old API -> new API -> old API, just give
-      // direct access to old:
-      return ((PreFlexFields) core.fields).tis.terms(t);
-    } else {
-      // Emulate pre-flex API on top of flex index
-      return new LegacyTermEnum(t);
-    }
-  }
-
   FieldInfos fieldInfos() {
     return core.fieldInfos;
   }
@@ -870,74 +833,11 @@ public class SegmentReader extends Index
     return (deletedDocs != null && deletedDocs.get(n));
   }
 
-  /** @deprecated Switch to the flex API ({@link
-   * IndexReader#termDocsEnum}) instead. */
-  @Deprecated
-  @Override
-  public TermDocs termDocs(Term term) throws IOException {
-    if (term == null) {
-      return new AllTermDocs(this);
-    } else {
-      return super.termDocs(term);
-    }
-  }
-  
   @Override
   public Fields fields() throws IOException {
     return core.fields;
   }
 
-  /** @deprecated Switch to the flex API {@link
-   *  IndexReader#termDocsEnum} instead. */
-  @Deprecated
-  @Override
-  public TermDocs termDocs() throws IOException {
-    ensureOpen();
-    if (core.isPreFlex) {
-      // For old API on an old segment, instead of
-      // converting old API -> new API -> old API, just give
-      // direct access to old:
-      final PreFlexFields pre = (PreFlexFields) core.fields;
-      SegmentTermDocs std = new SegmentTermDocs(pre.freqStream, pre.tis, core.fieldInfos);
-      std.setSkipDocs(deletedDocs);
-      return std;
-    } else {
-      // Emulate old API
-      return new LegacyTermDocs();
-    }
-  }
-
-  /** @deprecated Switch to the flex API {@link
-   *  IndexReader#termDocsEnum} instead */
-  @Deprecated
-  @Override
-  public TermPositions termPositions() throws IOException {
-    ensureOpen();
-    if (core.isPreFlex) {
-      // For old API on an old segment, instead of
-      // converting old API -> new API -> old API, just give
-      // direct access to old:
-      final PreFlexFields pre = (PreFlexFields) core.fields;
-      SegmentTermPositions stp = new SegmentTermPositions(pre.freqStream, pre.proxStream, pre.tis, core.fieldInfos);
-      stp.setSkipDocs(deletedDocs);
-      return stp;
-    } else {
-      // Emulate old API
-      return new LegacyTermPositions();
-    }
-  }
-
-  @Override
-  public int docFreq(Term t) throws IOException {
-    ensureOpen();
-    Terms terms = core.fields.terms(t.field);
-    if (terms != null) {
-      return terms.docFreq(new BytesRef(t.text));
-    } else {
-      return 0;
-    }
-  }
-
   @Override
   public int docFreq(String field, BytesRef term) throws IOException {
     ensureOpen();
@@ -1328,393 +1228,4 @@ public class SegmentReader extends Index
   public int getTermInfosIndexDivisor() {
     return core.termsIndexDivisor;
   }
-  
-  // Back compat: pre-flex TermEnum API over flex API
-  @Deprecated
-  final private class LegacyTermEnum extends TermEnum {
-    FieldsEnum fields;
-    TermsEnum terms;
-    boolean done;
-    String currentField;
-    BytesRef currentTerm;
-
-    public LegacyTermEnum(Term t) throws IOException {
-      fields = core.fields.iterator();
-      currentField = fields.next();
-      if (currentField == null) {
-        // no fields
-        done = true;
-      } else if (t != null) {
-        // Pre-seek to this term
-
-        while(currentField.compareTo(t.field) < 0) {
-          currentField = fields.next();
-          if (currentField == null) {
-            // Hit end of fields
-            done = true;
-            break;
-          }
-        }
-
-        if (!done) {
-          // We found some field -- get its terms:
-          terms = fields.terms();
-
-          if (currentField == t.field) {
-            // We found exactly the requested field; now
-            // seek the term text:
-            String text = t.text();
-
-            // this is only for backwards compatibility.
-            // previously you could supply a term with unpaired surrogates,
-            // and it would return the next Term.
-            // if someone does this, tack on the lowest possible trail surrogate.
-            // this emulates the old behavior, and forms "valid UTF-8" unicode.
-            BytesRef tr = new BytesRef(UnicodeUtil.nextValidUTF16String(text));
-            TermsEnum.SeekStatus status = terms.seek(tr);
-
-            if (status == TermsEnum.SeekStatus.END) {
-              // Rollover to the next field
-              terms = null;
-              next();
-            } else if (status == TermsEnum.SeekStatus.FOUND) {
-              // Found exactly the term
-              currentTerm = tr;
-            } else {
-              // Found another term, in this same field
-              currentTerm = terms.term();
-            }
-          } else {
-            // We didn't find exact field (we found the
-            // following field); advance to first term in
-            // this field
-            next();
-          }
-        }
-      } else {
-        terms = fields.terms();
-      }
-    }
-
-    @Override
-    public boolean next() throws IOException {
-
-      if (done) {
-        return false;
-      }
-
-      while(true) {
-        if (terms == null) {
-          // Advance to the next field
-          currentField = fields.next();
-          if (currentField == null) {
-            done = true;
-            return false;
-          }
-          terms = fields.terms();
-        }
-        currentTerm = terms.next();
-        if (currentTerm != null) {
-          // This field still has terms
-          return true;
-        } else {
-          // Done producing terms from this field; advance
-          // to next field
-          terms = null;
-        }
-      }
-    }
-
-    @Override
-    public Term term() {
-      if (!done && terms != null && currentTerm != null) {
-        return new Term(currentField, currentTerm.utf8ToString());
-      }
-      return null;
-    }
-
-    @Override
-    public int docFreq() {
-      return terms == null ? 0 : terms.docFreq();
-    }
-
-    @Override
-    public void close() {}
-  }
-
-  // Back compat: emulates legacy TermDocs API on top of
-  // flex API
-  private class LegacyTermDocs implements TermDocs {
-
-    String currentField;
-    final Fields fields;
-    TermsEnum terms;
-    DocsEnum docsEnum;
-    boolean any;
-
-    LegacyTermDocs() throws IOException {
-      fields = core.fields;
-    }
-
-    public void close() {}
-
-    public void seek(TermEnum termEnum) throws IOException {
-      seek(termEnum.term());
-    }
-
-    public boolean skipTo(int target) throws IOException {
-      if (!any) {
-        return false;
-      } else {
-        return docsEnum.advance(target) != docsEnum.NO_MORE_DOCS;
-      }
-    }
-
-    public void seek(Term term) throws IOException {
-
-      any = false;
-
-      if (terms != null && !term.field.equals(currentField)) {
-        // new field
-        terms = null;
-      }
-
-      if (terms == null) {
-        currentField = term.field;
-        Terms terms1 = fields.terms(currentField);
-        if (terms1 == null) {
-          // no such field
-          return;
-        } else {
-          terms = terms1.iterator();
-        }
-      }
-
-      if (terms.seek(new BytesRef(term.text)) == TermsEnum.SeekStatus.FOUND) {
-        // Term exists
-        any = true;
-        pendingBulkResult = null;
-        docsEnum = terms.docs(deletedDocs, docsEnum);
-      }
-    }
-
-    public int doc() {
-      if (!any) {
-        return 0;
-      } else {
-        return docsEnum.docID();
-      }
-    }
-
-    private DocsEnum.BulkReadResult pendingBulkResult;
-    private int bulkCount;
-    private int pendingBulk;
-
-    public int read(int[] docs, int[] freqs) throws IOException {
-      if (any && pendingBulkResult == null) {
-        pendingBulkResult = docsEnum.getBulkResult();
-      }
-      if (!any) {
-        return 0;
-      } else if (pendingBulk > 0) {
-        final int left = bulkCount - pendingBulk;
-        if (docs.length >= left) {
-          // read all pending
-          System.arraycopy(pendingBulkResult.docs.ints, pendingBulk, docs, 0, left);
-          System.arraycopy(pendingBulkResult.freqs.ints, pendingBulk, freqs, 0, left);
-          pendingBulk = 0;
-          return left;
-        } else {
-          // read only part of pending
-          System.arraycopy(pendingBulkResult.docs.ints, pendingBulk, docs, 0, docs.length);
-          System.arraycopy(pendingBulkResult.freqs.ints, pendingBulk, freqs, 0, docs.length);
-          pendingBulk += docs.length;
-          return docs.length;
-        }
-      } else {
-        // nothing pending
-        bulkCount = docsEnum.read();
-        if (docs.length >= bulkCount) {
-          System.arraycopy(pendingBulkResult.docs.ints, 0, docs, 0, bulkCount);
-          System.arraycopy(pendingBulkResult.freqs.ints, 0, freqs, 0, bulkCount);
-          return bulkCount;
-        } else {
-          System.arraycopy(pendingBulkResult.docs.ints, 0, docs, 0, docs.length);
-          System.arraycopy(pendingBulkResult.freqs.ints, 0, freqs, 0, docs.length);
-          pendingBulk = docs.length;
-          return docs.length;
-        }
-      }
-    }
-
-    public int freq() {
-      if (!any) {
-        return 0;
-      } else {
-        return docsEnum.freq();
-      }
-    }
-
-    public boolean next() throws IOException {
-      if (!any) {
-        return false;
-      } else {
-        return docsEnum.nextDoc() != DocsEnum.NO_MORE_DOCS;
-      }
-    }
-  }
-
-  // Back compat: implements legacy TermPositions API on top
-  // of flex API
-  final private class LegacyTermPositions implements TermPositions {
-
-    String currentField;
-    final Fields fields;
-    TermsEnum terms;
-    DocsAndPositionsEnum postingsEnum;
-    DocsEnum docsEnum;
-    boolean any;
-
-    LegacyTermPositions() throws IOException {
-      fields = core.fields;
-    }
-
-    public void close() {}
-
-    public void seek(TermEnum termEnum) throws IOException {
-      seek(termEnum.term());
-    }
-
-    public boolean skipTo(int target) throws IOException {
-      if (!any) {
-        return false;
-      } else {
-        return docsEnum.advance(target) != docsEnum.NO_MORE_DOCS;
-      }
-    }
-
-    public void seek(Term term) throws IOException {
-
-      any = false;
-
-      if (terms != null && !term.field.equals(currentField)) {
-        // new field
-        terms = null;
-      }
-
-      if (terms == null) {
-        currentField = term.field;
-        Terms terms1 = fields.terms(currentField);
-        if (terms1 == null) {
-          // no such field
-          return;
-        } else {
-          terms = terms1.iterator();
-        }
-      }
-
-      if (terms.seek(new BytesRef(term.text)) == TermsEnum.SeekStatus.FOUND) {
-        // Term exists
-        any = true;
-        postingsEnum = terms.docsAndPositions(deletedDocs, postingsEnum);
-        if (postingsEnum == null) {
-          docsEnum = terms.docs(deletedDocs, postingsEnum);
-        } else {
-          docsEnum = postingsEnum;
-        }
-      }
-    }
-
-    public int doc() {
-      if (!any) {
-        return 0;
-      } else {
-        return docsEnum.docID();
-      }
-    }
-
-    public int freq() {
-      if (!any) {
-        return 0;
-      } else {
-        return docsEnum.freq();
-      }
-    }
-
-    public boolean next() throws IOException {
-      if (!any) {
-        return false;
-      } else {
-        return docsEnum.nextDoc() != DocsEnum.NO_MORE_DOCS;
-      }
-    }
-
-    public int read(int[] docs, int[] freqs) throws IOException {
-      throw new UnsupportedOperationException("TermPositions does not support processing multiple documents in one call. Use TermDocs instead.");
-    }
-
-    public int nextPosition() throws IOException {
-      pendingPayload = null;
-      if (!any || postingsEnum == null) {
-        return 0;
-      } else {
-        return postingsEnum.nextPosition();
-      }
-    }
-    
-    private BytesRef pendingPayload;
-
-    public int getPayloadLength() throws IOException {
-      if (!any || postingsEnum == null) {
-        return 0;
-      } else {
-        if (pendingPayload == null) {
-          if (!postingsEnum.hasPayload()) {
-            return 0;
-          }
-          pendingPayload = postingsEnum.getPayload();
-        }
-        if (pendingPayload == null) {
-          return 0;
-        }
-        return pendingPayload.length;
-      }
-    }
-
-    public byte[] getPayload(byte[] bytes, int offset) throws IOException {
-      if (!any || postingsEnum == null) {
-        return null;
-      }
-      if (pendingPayload == null) {
-        if (!postingsEnum.hasPayload()) {
-          return null;
-        }
-        pendingPayload = postingsEnum.getPayload();
-      }
-      if (pendingPayload == null) {
-        return null;
-      }
-
-      // old API would always used passed in bytes if it
-      // "fits", else allocate new:
-      if (bytes != null && pendingPayload.length <= bytes.length - offset) {
-        System.arraycopy(pendingPayload.bytes, pendingPayload.offset, bytes, offset, pendingPayload.length);
-        return bytes;
-      } else if (pendingPayload.offset == 0 && pendingPayload.length == pendingPayload.bytes.length) {
-        return pendingPayload.bytes;
-      } else {
-        final byte[] retBytes = new byte[pendingPayload.length];
-        System.arraycopy(pendingPayload.bytes, pendingPayload.offset, retBytes, 0, pendingPayload.length);
-        return retBytes;
-      }
-    }
-
-    public boolean isPayloadAvailable() {
-      if (!any || postingsEnum == null) {
-        return false;
-      } else {
-        return postingsEnum.hasPayload();
-      }
-    }
-  }
 }

Modified: lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/SegmentTermPositionVector.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/SegmentTermPositionVector.java?rev=966819&r1=966818&r2=966819&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/SegmentTermPositionVector.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/SegmentTermPositionVector.java Thu Jul 22 19:34:35 2010
@@ -1,5 +1,7 @@
 package org.apache.lucene.index;
 
+import org.apache.lucene.util.BytesRef;
+
 /**
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -22,7 +24,7 @@ class SegmentTermPositionVector extends 
   protected TermVectorOffsetInfo[][] offsets;
   public static final int[] EMPTY_TERM_POS = new int[0];
   
-  public SegmentTermPositionVector(String field, String terms[], int termFreqs[], int[][] positions, TermVectorOffsetInfo[][] offsets) {
+  public SegmentTermPositionVector(String field, BytesRef terms[], int termFreqs[], int[][] positions, TermVectorOffsetInfo[][] offsets) {
     super(field, terms, termFreqs);
     this.offsets = offsets;
     this.positions = positions;

Modified: lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/SegmentTermVector.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/SegmentTermVector.java?rev=966819&r1=966818&r2=966819&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/SegmentTermVector.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/SegmentTermVector.java Thu Jul 22 19:34:35 2010
@@ -19,13 +19,15 @@ package org.apache.lucene.index;
 
 import java.util.*;
 
+import org.apache.lucene.util.BytesRef;
+
 
 class SegmentTermVector implements TermFreqVector {
   private String field;
-  private String terms[];
+  private BytesRef terms[];
   private int termFreqs[];
   
-  SegmentTermVector(String field, String terms[], int termFreqs[]) {
+  SegmentTermVector(String field, BytesRef terms[], int termFreqs[]) {
     this.field = field;
     this.terms = terms;
     this.termFreqs = termFreqs;
@@ -59,7 +61,7 @@ class SegmentTermVector implements TermF
     return terms == null ? 0 : terms.length;
   }
 
-  public String [] getTerms() {
+  public BytesRef [] getTerms() {
     return terms;
   }
 
@@ -67,14 +69,14 @@ class SegmentTermVector implements TermF
     return termFreqs;
   }
 
-  public int indexOf(String termText) {
+  public int indexOf(BytesRef termBytes) {
     if(terms == null)
       return -1;
-    int res = Arrays.binarySearch(terms, termText);
+    int res = Arrays.binarySearch(terms, termBytes);
     return res >= 0 ? res : -1;
   }
 
-  public int[] indexesOf(String [] termNumbers, int start, int len) {
+  public int[] indexesOf(BytesRef [] termNumbers, int start, int len) {
     // TODO: there must be a more efficient way of doing this.
     //       At least, we could advance the lower bound of the terms array
     //       as we find valid indexes. Also, it might be possible to leverage

Modified: lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/SortedTermVectorMapper.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/SortedTermVectorMapper.java?rev=966819&r1=966818&r2=966819&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/SortedTermVectorMapper.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/SortedTermVectorMapper.java Thu Jul 22 19:34:35 2010
@@ -17,6 +17,8 @@ package org.apache.lucene.index;
 
 import java.util.*;
 
+import org.apache.lucene.util.BytesRef;
+
 /**
  * Store a sorted collection of {@link org.apache.lucene.index.TermVectorEntry}s.  Collects all term information
  * into a single, SortedSet.
@@ -30,7 +32,7 @@ public class SortedTermVectorMapper exte
 
 
   private SortedSet<TermVectorEntry> currentSet;
-  private Map<String,TermVectorEntry> termToTVE = new HashMap<String,TermVectorEntry>();
+  private Map<BytesRef,TermVectorEntry> termToTVE = new HashMap<BytesRef,TermVectorEntry>();
   private boolean storeOffsets;
   private boolean storePositions;
   /**
@@ -61,7 +63,7 @@ public class SortedTermVectorMapper exte
    */
   //We need to combine any previous mentions of the term
   @Override
-  public void map(String term, int frequency, TermVectorOffsetInfo[] offsets, int[] positions) {
+  public void map(BytesRef term, int frequency, TermVectorOffsetInfo[] offsets, int[] positions) {
     TermVectorEntry entry =  termToTVE.get(term);
     if (entry == null) {
       entry = new TermVectorEntry(ALL, term, frequency, 

Modified: lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/Term.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/Term.java?rev=966819&r1=966818&r2=966819&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/Term.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/Term.java Thu Jul 22 19:34:35 2010
@@ -17,6 +17,9 @@ package org.apache.lucene.index;
  * limitations under the License.
  */
 
+import java.util.Comparator;
+
+import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.StringHelper;
 
 /**
@@ -29,14 +32,26 @@ import org.apache.lucene.util.StringHelp
 
 public final class Term implements Comparable<Term>, java.io.Serializable {
   String field;
-  String text;
+  BytesRef bytes;
 
+  /** Constructs a Term with the given field and bytes.
+   * <p>Note that a null field or null bytes value results in undefined
+   * behavior for most Lucene APIs that accept a Term parameter. 
+   * <p>WARNING: the provided BytesRef is not copied, but used directly.
+   * Therefore the bytes should not be modified after construction, for
+   * example, you should clone a copy rather than pass reused bytes from
+   * a TermsEnum.
+   */
+  public Term(String fld, BytesRef bytes) {
+    field = fld == null ? null : StringHelper.intern(fld);
+    this.bytes = bytes;
+  }
+  
   /** Constructs a Term with the given field and text.
    * <p>Note that a null field or null text value results in undefined
    * behavior for most Lucene APIs that accept a Term parameter. */
-  public Term(String fld, String txt) {
-    field = fld == null ? null : StringHelper.intern(fld);
-    text = txt;
+  public Term(String fld, String text) {
+    this(fld, new BytesRef(text));
   }
 
   /** Constructs a Term with the given field and empty text.
@@ -46,15 +61,27 @@ public final class Term implements Compa
    * @param fld
    */
   public Term(String fld) {
-    this(fld, "", true);
+    this(fld, new BytesRef(), true);
   }
 
-  /** @lucene.experimental */
-  public Term(String fld, String txt, boolean intern) {
+  /** 
+   * WARNING: the provided BytesRef is not copied, but used directly.
+   * Therefore the bytes should not be modified after construction, for
+   * example, you should clone a copy rather than pass reused bytes from
+   * a TermsEnum.
+   * 
+   * @lucene.experimental 
+   */
+  public Term(String fld, BytesRef bytes, boolean intern) {
     field = intern ? StringHelper.intern(fld) : fld;	  // field names are interned
-    text = txt;					          // unless already known to be
+    this.bytes = bytes;					          // unless already known to be
   }
 
+  /** @lucene.experimental */
+  public Term(String fld, String text, boolean intern) {
+    this(fld, new BytesRef(text), intern);
+  }
+  
   /** Returns the field of this term, an interned string.   The field indicates
     the part of a document which this term came from. */
   public final String field() { return field; }
@@ -62,8 +89,26 @@ public final class Term implements Compa
   /** Returns the text of this term.  In the case of words, this is simply the
     text of the word.  In the case of dates and other types, this is an
     encoding of the object as a string.  */
-  public final String text() { return text; }
-  
+  public final String text() { return bytes.utf8ToString(); }
+
+  /** Returns the bytes of this term. */
+  public final BytesRef bytes() { return bytes; }
+
+  /**
+   * Optimized construction of new Terms by reusing same field as this Term
+   * - avoids field.intern() overhead 
+   * <p>WARNING: the provided BytesRef is not copied, but used directly.
+   * Therefore the bytes should not be modified after construction, for
+   * example, you should clone a copy rather than pass reused bytes from
+   * a TermsEnum.
+   * @param text The bytes of the new term (field is implicitly same as this Term instance)
+   * @return A new Term
+   */
+  public Term createTerm(BytesRef bytes)
+  {
+      return new Term(field,bytes,false);
+  }
+
   /**
    * Optimized construction of new Terms by reusing same field as this Term
    * - avoids field.intern() overhead 
@@ -89,10 +134,10 @@ public final class Term implements Compa
         return false;
     } else if (!field.equals(other.field))
       return false;
-    if (text == null) {
-      if (other.text != null)
+    if (bytes == null) {
+      if (other.bytes != null)
         return false;
-    } else if (!text.equals(other.text))
+    } else if (!bytes.equals(other.bytes))
       return false;
     return true;
   }
@@ -102,7 +147,7 @@ public final class Term implements Compa
     final int prime = 31;
     int result = 1;
     result = prime * result + ((field == null) ? 0 : field.hashCode());
-    result = prime * result + ((text == null) ? 0 : text.hashCode());
+    result = prime * result + ((bytes == null) ? 0 : bytes.hashCode());
     return result;
   }
 
@@ -113,19 +158,47 @@ public final class Term implements Compa
     The ordering of terms is first by field, then by text.*/
   public final int compareTo(Term other) {
     if (field == other.field)			  // fields are interned
-      return text.compareTo(other.text);
+      return bytes.compareTo(other.bytes);
+    else
+      return field.compareTo(other.field);
+  }
+  
+  @Deprecated
+  private static final Comparator<BytesRef> legacyComparator = 
+    BytesRef.getUTF8SortedAsUTF16Comparator();
+
+  /** 
+   * @deprecated For internal backwards compatibility use only
+   * @lucene.internal
+   */
+  @Deprecated
+  public final int compareToUTF16(Term other) {
+    if (field == other.field) // fields are interned
+      return legacyComparator.compare(this.bytes, other.bytes);
     else
       return field.compareTo(other.field);
   }
 
+  /** 
+   * Resets the field and text of a Term. 
+   * <p>WARNING: the provided BytesRef is not copied, but used directly.
+   * Therefore the bytes should not be modified after construction, for
+   * example, you should clone a copy rather than pass reused bytes from
+   * a TermsEnum.
+   */
+  final void set(String fld, BytesRef bytes) {
+    field = fld;
+    this.bytes = bytes;
+  }
+
   /** Resets the field and text of a Term. */
   final void set(String fld, String txt) {
     field = fld;
-    text = txt;
+    this.bytes = new BytesRef(txt);
   }
 
   @Override
-  public final String toString() { return field + ":" + text; }
+  public final String toString() { return field + ":" + bytes.utf8ToString(); }
 
   private void readObject(java.io.ObjectInputStream in)
     throws java.io.IOException, ClassNotFoundException

Modified: lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/TermFreqVector.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/TermFreqVector.java?rev=966819&r1=966818&r2=966819&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/TermFreqVector.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/TermFreqVector.java Thu Jul 22 19:34:35 2010
@@ -1,5 +1,7 @@
 package org.apache.lucene.index;
 
+import org.apache.lucene.util.BytesRef;
+
 /**
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -38,7 +40,7 @@ public interface TermFreqVector {
   /** 
    * @return An Array of term texts in ascending order.
    */
-  public String[] getTerms();
+  public BytesRef[] getTerms();
 
 
   /** Array of term frequencies. Locations of the array correspond one to one
@@ -54,7 +56,7 @@ public interface TermFreqVector {
    *  <code>term</code> appears. If this term does not appear in the array,
    *  return -1.
    */
-  public int indexOf(String term);
+  public int indexOf(BytesRef term);
 
 
   /** Just like <code>indexOf(int)</code> but searches for a number of terms
@@ -66,6 +68,6 @@ public interface TermFreqVector {
    *  @param start index in the array where the list of terms starts
    *  @param len the number of terms in the list
    */
-  public int[] indexesOf(String[] terms, int start, int len);
+  public int[] indexesOf(BytesRef[] terms, int start, int len);
 
 }

Modified: lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/TermVectorEntry.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/TermVectorEntry.java?rev=966819&r1=966818&r2=966819&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/TermVectorEntry.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/TermVectorEntry.java Thu Jul 22 19:34:35 2010
@@ -1,5 +1,7 @@
 package org.apache.lucene.index;
 
+import org.apache.lucene.util.BytesRef;
+
 /**
  * Copyright 2007 The Apache Software Foundation
  * <p/>
@@ -21,7 +23,7 @@ package org.apache.lucene.index;
  */
 public class TermVectorEntry {
   private String field;
-  private String term;
+  private BytesRef term;
   private int frequency;
   private TermVectorOffsetInfo [] offsets;
   int [] positions;
@@ -30,7 +32,7 @@ public class TermVectorEntry {
   public TermVectorEntry() {
   }
 
-  public TermVectorEntry(String field, String term, int frequency, TermVectorOffsetInfo[] offsets, int[] positions) {
+  public TermVectorEntry(String field, BytesRef term, int frequency, TermVectorOffsetInfo[] offsets, int[] positions) {
     this.field = field;
     this.term = term;
     this.frequency = frequency;
@@ -55,7 +57,7 @@ public class TermVectorEntry {
     return positions;
   }
 
-  public String getTerm() {
+  public BytesRef getTerm() {
     return term;
   }
 

Modified: lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/TermVectorMapper.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/TermVectorMapper.java?rev=966819&r1=966818&r2=966819&view=diff
==============================================================================
--- lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/TermVectorMapper.java (original)
+++ lucene/dev/branches/realtime_search/lucene/src/java/org/apache/lucene/index/TermVectorMapper.java Thu Jul 22 19:34:35 2010
@@ -1,4 +1,7 @@
 package org.apache.lucene.index;
+
+import org.apache.lucene.util.BytesRef;
+
 /**
  * Copyright 2007 The Apache Software Foundation
  *
@@ -62,7 +65,7 @@ public abstract class TermVectorMapper {
    * @param offsets null if the offset is not specified, otherwise the offset into the field of the term
    * @param positions null if the position is not specified, otherwise the position in the field of the term
    */
-  public abstract void map(String term, int frequency, TermVectorOffsetInfo [] offsets, int [] positions);
+  public abstract void map(BytesRef term, int frequency, TermVectorOffsetInfo [] offsets, int [] positions);
 
   /**
    * Indicate to Lucene that even if there are positions stored, this mapper is not interested in them and they