You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by us...@apache.org on 2012/01/30 00:19:07 UTC

svn commit: r1237505 [1/3] - in /lucene/dev/branches/lucene2858: ./ lucene/ lucene/contrib/ lucene/contrib/misc/ lucene/contrib/misc/src/java/ lucene/contrib/misc/src/java/org/apache/lucene/store/ lucene/src/java/org/apache/lucene/codecs/ lucene/src/ja...

Author: uschindler
Date: Sun Jan 29 23:19:05 2012
New Revision: 1237505

URL: http://svn.apache.org/viewvc?rev=1237505&view=rev
Log:
Reverse merged revision(s) from lucene/dev/trunk up to 1237502

Added:
    lucene/dev/branches/lucene2858/lucene/contrib/misc/src/java/org/apache/lucene/store/NativeUnixDirectory.java
      - copied unchanged from r1237498, lucene/dev/trunk/lucene/contrib/misc/src/java/org/apache/lucene/store/NativeUnixDirectory.java
Removed:
    lucene/dev/branches/lucene2858/lucene/contrib/misc/src/java/org/apache/lucene/store/DirectIOLinuxDirectory.java
Modified:
    lucene/dev/branches/lucene2858/   (props changed)
    lucene/dev/branches/lucene2858/lucene/   (props changed)
    lucene/dev/branches/lucene2858/lucene/CHANGES.txt
    lucene/dev/branches/lucene2858/lucene/contrib/CHANGES.txt   (contents, props changed)
    lucene/dev/branches/lucene2858/lucene/contrib/misc/build.xml
    lucene/dev/branches/lucene2858/lucene/contrib/misc/src/java/org/apache/lucene/store/NativePosixUtil.cpp
    lucene/dev/branches/lucene2858/lucene/contrib/misc/src/java/overview.html
    lucene/dev/branches/lucene2858/lucene/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java
    lucene/dev/branches/lucene2858/lucene/src/java/org/apache/lucene/codecs/BlockTreeTermsWriter.java
    lucene/dev/branches/lucene2858/lucene/src/java/org/apache/lucene/codecs/VariableGapTermsIndexWriter.java
    lucene/dev/branches/lucene2858/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java
    lucene/dev/branches/lucene2858/lucene/src/java/org/apache/lucene/index/DirectoryReader.java
    lucene/dev/branches/lucene2858/lucene/src/java/org/apache/lucene/index/IndexCommit.java
    lucene/dev/branches/lucene2858/lucene/src/java/org/apache/lucene/index/IndexFileDeleter.java
    lucene/dev/branches/lucene2858/lucene/src/java/org/apache/lucene/index/SegmentInfos.java
    lucene/dev/branches/lucene2858/lucene/src/java/org/apache/lucene/index/SnapshotDeletionPolicy.java
    lucene/dev/branches/lucene2858/lucene/src/java/org/apache/lucene/store/CompoundFileDirectory.java
    lucene/dev/branches/lucene2858/lucene/src/java/org/apache/lucene/store/Directory.java
    lucene/dev/branches/lucene2858/lucene/src/java/org/apache/lucene/store/FSDirectory.java
    lucene/dev/branches/lucene2858/lucene/src/java/org/apache/lucene/store/FileSwitchDirectory.java
    lucene/dev/branches/lucene2858/lucene/src/java/org/apache/lucene/store/NRTCachingDirectory.java
    lucene/dev/branches/lucene2858/lucene/src/java/org/apache/lucene/store/RAMDirectory.java
    lucene/dev/branches/lucene2858/lucene/src/java/org/apache/lucene/store/RAMFile.java
    lucene/dev/branches/lucene2858/lucene/src/java/org/apache/lucene/store/RAMOutputStream.java
    lucene/dev/branches/lucene2858/lucene/src/java/org/apache/lucene/util/FixedBitSet.java
    lucene/dev/branches/lucene2858/lucene/src/java/org/apache/lucene/util/UnicodeUtil.java
    lucene/dev/branches/lucene2858/lucene/src/java/org/apache/lucene/util/fst/Builder.java
    lucene/dev/branches/lucene2858/lucene/src/java/org/apache/lucene/util/fst/FST.java
    lucene/dev/branches/lucene2858/lucene/src/java/org/apache/lucene/util/fst/FSTEnum.java
    lucene/dev/branches/lucene2858/lucene/src/java/org/apache/lucene/util/fst/NodeHash.java
    lucene/dev/branches/lucene2858/lucene/src/java/org/apache/lucene/util/fst/Outputs.java
    lucene/dev/branches/lucene2858/lucene/src/java/org/apache/lucene/util/fst/PairOutputs.java
    lucene/dev/branches/lucene2858/lucene/src/java/org/apache/lucene/util/fst/PositiveIntOutputs.java
    lucene/dev/branches/lucene2858/lucene/src/java/org/apache/lucene/util/fst/Util.java
    lucene/dev/branches/lucene2858/lucene/src/test-framework/java/org/apache/lucene/store/MockDirectoryWrapper.java
    lucene/dev/branches/lucene2858/lucene/src/test/org/apache/lucene/index/TestCrashCausesCorruptIndex.java
    lucene/dev/branches/lucene2858/lucene/src/test/org/apache/lucene/index/TestDeletionPolicy.java
    lucene/dev/branches/lucene2858/lucene/src/test/org/apache/lucene/index/TestFieldsReader.java
    lucene/dev/branches/lucene2858/lucene/src/test/org/apache/lucene/index/TestIndexCommit.java
    lucene/dev/branches/lucene2858/lucene/src/test/org/apache/lucene/index/TestIndexReader.java
    lucene/dev/branches/lucene2858/lucene/src/test/org/apache/lucene/store/TestBufferedIndexInput.java
    lucene/dev/branches/lucene2858/lucene/src/test/org/apache/lucene/util/fst/TestFSTs.java
    lucene/dev/branches/lucene2858/modules/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymFilter.java
    lucene/dev/branches/lucene2858/modules/analysis/kuromoji/   (props changed)
    lucene/dev/branches/lucene2858/modules/analysis/kuromoji/src/java/org/apache/lucene/analysis/kuromoji/dict/TokenInfoFST.java
    lucene/dev/branches/lucene2858/modules/analysis/kuromoji/src/java/org/apache/lucene/analysis/kuromoji/dict/UserDictionary.java
    lucene/dev/branches/lucene2858/modules/analysis/kuromoji/src/java/org/apache/lucene/analysis/kuromoji/viterbi/Viterbi.java
    lucene/dev/branches/lucene2858/modules/analysis/kuromoji/src/resources/org/apache/lucene/analysis/kuromoji/dict/TokenInfoDictionary$fst.dat
    lucene/dev/branches/lucene2858/modules/analysis/kuromoji/src/tools/java/org/apache/lucene/analysis/kuromoji/util/TokenInfoDictionaryBuilder.java
    lucene/dev/branches/lucene2858/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletion.java
    lucene/dev/branches/lucene2858/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletionBuilder.java
    lucene/dev/branches/lucene2858/solr/   (props changed)
    lucene/dev/branches/lucene2858/solr/core/   (props changed)
    lucene/dev/branches/lucene2858/solr/core/src/java/   (props changed)
    lucene/dev/branches/lucene2858/solr/core/src/java/org/apache/solr/core/IndexDeletionPolicyWrapper.java
    lucene/dev/branches/lucene2858/solr/core/src/java/org/apache/solr/core/SolrDeletionPolicy.java
    lucene/dev/branches/lucene2858/solr/core/src/java/org/apache/solr/handler/ReplicationHandler.java
    lucene/dev/branches/lucene2858/solr/core/src/java/org/apache/solr/handler/SnapPuller.java
    lucene/dev/branches/lucene2858/solr/core/src/java/org/apache/solr/handler/SnapShooter.java
    lucene/dev/branches/lucene2858/solr/core/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java
    lucene/dev/branches/lucene2858/solr/core/src/java/org/apache/solr/servlet/cache/HttpCacheHeaderUtil.java
    lucene/dev/branches/lucene2858/solr/core/src/java/org/apache/solr/update/DirectUpdateHandler2.java
    lucene/dev/branches/lucene2858/solr/core/src/java/org/apache/solr/update/SolrIndexWriter.java
    lucene/dev/branches/lucene2858/solr/core/src/test/   (props changed)
    lucene/dev/branches/lucene2858/solr/core/src/test/org/apache/solr/cloud/FullSolrCloudTest.java
    lucene/dev/branches/lucene2858/solr/core/src/test/org/apache/solr/core/TestSolrDeletionPolicy1.java

Modified: lucene/dev/branches/lucene2858/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene2858/lucene/CHANGES.txt?rev=1237505&r1=1237504&r2=1237505&view=diff
==============================================================================
--- lucene/dev/branches/lucene2858/lucene/CHANGES.txt (original)
+++ lucene/dev/branches/lucene2858/lucene/CHANGES.txt Sun Jan 29 23:19:05 2012
@@ -748,6 +748,10 @@ Changes in backwards compatibility polic
   
 * LUCENE-3712: Removed unused and untested ReaderUtil#subReader methods.
   (Uwe Schindler)
+
+* LUCENE-3672: Deprecate Directory.fileModified and
+  IndexCommit.getTimestamp and .getVersion. (Andrzej Bialecki, Robert
+  Muir, Mike McCandless)
   
 Security fixes
 
@@ -802,6 +806,9 @@ New Features
 
 * LUCENE-3690: Added HTMLStripCharFilter, a CharFilter that strips HTML
   markup. (Steve Rowe)
+
+* LUCENE-3725: Added optional packing to FST building; this uses extra
+  RAM during building but results in a smaller FST.  (Mike McCandless)
   
 Bug fixes
 

Modified: lucene/dev/branches/lucene2858/lucene/contrib/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene2858/lucene/contrib/CHANGES.txt?rev=1237505&r1=1237504&r2=1237505&view=diff
==============================================================================
--- lucene/dev/branches/lucene2858/lucene/contrib/CHANGES.txt (original)
+++ lucene/dev/branches/lucene2858/lucene/contrib/CHANGES.txt Sun Jan 29 23:19:05 2012
@@ -62,6 +62,14 @@ New Features
 
  * LUCENE-3602: Added query time joining under the join module. (Martijn van Groningen, Michael McCandless)
 
+ * LUCENE-2795: Generified DirectIOLinuxDirectory to work across any
+   unix supporting the O_DIRECT flag when opening a file (tested on
+   Linux and OS X but likely other Unixes will work), and improved it
+   so it can be used for indexing and searching.  The directory uses
+   direct IO when doing large merges to avoid  unnecessarily evicting
+   cached IO pages due to large merges.  (Varun Thacker, Mike
+   McCandless)
+
 API Changes
 
  * LUCENE-2606: Changed RegexCapabilities interface to fix thread 

Modified: lucene/dev/branches/lucene2858/lucene/contrib/misc/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene2858/lucene/contrib/misc/build.xml?rev=1237505&r1=1237504&r2=1237505&view=diff
==============================================================================
--- lucene/dev/branches/lucene2858/lucene/contrib/misc/build.xml (original)
+++ lucene/dev/branches/lucene2858/lucene/contrib/misc/build.xml Sun Jan 29 23:19:05 2012
@@ -40,11 +40,13 @@
       <fileset file="${src.dir}/org/apache/lucene/store/NativePosixUtil.cpp" />  
       <includepath>
         <pathelement location="${java.home}/../include"/>
+        <pathelement location="${java.home}/include"/>
         <pathelement location="${java.home}/../include/linux"/>
         <pathelement location="${java.home}/../include/solaris"/>
       </includepath>
 
       <compilerarg value="-fPIC" />
+      <linkerarg value="-lstdc++" />
     </cc>
   </target>
 

Modified: lucene/dev/branches/lucene2858/lucene/contrib/misc/src/java/org/apache/lucene/store/NativePosixUtil.cpp
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene2858/lucene/contrib/misc/src/java/org/apache/lucene/store/NativePosixUtil.cpp?rev=1237505&r1=1237504&r2=1237505&view=diff
==============================================================================
--- lucene/dev/branches/lucene2858/lucene/contrib/misc/src/java/org/apache/lucene/store/NativePosixUtil.cpp (original)
+++ lucene/dev/branches/lucene2858/lucene/contrib/misc/src/java/org/apache/lucene/store/NativePosixUtil.cpp Sun Jan 29 23:19:05 2012
@@ -15,6 +15,16 @@
  * the License.
  */
 
+#ifdef LINUX
+  #define DIRECT_FLAG O_DIRECT | O_NOATIME
+  #define LINUX
+#elif __APPLE__
+  #define DIRECT_FLAG 0
+  #define OSX
+#else
+  #define DIRECT_FLAG O_DIRECT  // __unix__ is not used as even Linux falls under it.
+#endif
+
 #include <jni.h>
 #include <fcntl.h>   // posix_fadvise, constants for open
 #include <string.h>   // strerror
@@ -26,6 +36,7 @@
 
 // java -cp .:lib/junit-4.7.jar:./build/classes/test:./build/classes/java:./build/classes/demo -Dlucene.version=2.9-dev -DtempDir=build -ea org.junit.runner.JUnitCore org.apache.lucene.index.TestDoc
 
+#ifdef LINUX
 /*
  * Class:     org_apache_lucene_store_NativePosixUtil
  * Method:    posix_fadvise
@@ -89,7 +100,7 @@ JNIEXPORT jint JNICALL Java_org_apache_l
 
   return 0;
 }
-
+#endif
 
 /*
  * Class:     org_apache_lucene_store_NativePosixUtil
@@ -107,16 +118,26 @@ JNIEXPORT jobject JNICALL Java_org_apach
   char *fname;
 
   class_ioex = env->FindClass("java/io/IOException");
-  if (class_ioex == NULL) return NULL;
+  if (class_ioex == NULL) {
+    return NULL;
+  }
   class_fdesc = env->FindClass("java/io/FileDescriptor");
-  if (class_fdesc == NULL) return NULL;
+  if (class_fdesc == NULL) {
+    return NULL;
+  }
 
   fname = (char *) env->GetStringUTFChars(filename, NULL);
 
   if (readOnly) {
-    fd = open(fname, O_RDONLY | O_DIRECT | O_NOATIME);
+	fd = open(fname, O_RDONLY | DIRECT_FLAG);
+	#ifdef OSX
+	  fcntl(fd, F_NOCACHE, 1);
+	#endif
   } else {
-    fd = open(fname, O_RDWR | O_CREAT | O_DIRECT | O_NOATIME, 0666);
+	fd = open(fname, O_RDWR | O_CREAT | DIRECT_FLAG, 0666);
+	#ifdef OSX
+	  fcntl(fd, F_NOCACHE, 1);
+	#endif
   }
 
   //printf("open %s -> %d; ro %d\n", fname, fd, readOnly); fflush(stdout);
@@ -131,19 +152,22 @@ JNIEXPORT jobject JNICALL Java_org_apach
 
   // construct a new FileDescriptor
   const_fdesc = env->GetMethodID(class_fdesc, "<init>", "()V");
-  if (const_fdesc == NULL) return NULL;
+  if (const_fdesc == NULL) {
+    return NULL;
+  }
   ret = env->NewObject(class_fdesc, const_fdesc);
 
   // poke the "fd" field with the file descriptor
   field_fd = env->GetFieldID(class_fdesc, "fd", "I");
-  if (field_fd == NULL) return NULL;
+  if (field_fd == NULL) {
+    return NULL;
+  }
   env->SetIntField(ret, field_fd, fd);
 
   // and return it
   return ret;
 }
 
-
 /*
  * Class:     org_apache_lucene_store_NativePosixUtil
  * Method:    pread

Modified: lucene/dev/branches/lucene2858/lucene/contrib/misc/src/java/overview.html
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene2858/lucene/contrib/misc/src/java/overview.html?rev=1237505&r1=1237504&r2=1237505&view=diff
==============================================================================
--- lucene/dev/branches/lucene2858/lucene/contrib/misc/src/java/overview.html (original)
+++ lucene/dev/branches/lucene2858/lucene/contrib/misc/src/java/overview.html Sun Jan 29 23:19:05 2012
@@ -27,33 +27,29 @@
 The misc package has various tools for splitting/merging indices,
 changing norms, finding high freq terms, and others.
 
-<h2>DirectIOLinuxDirectory</h2>
+<a name="NativeUnixDirectory"></a>
+<h2>NativeUnixDirectory</h2>
 
 <p>
 <b>NOTE</b>: This uses C++ sources (accessible via JNI), which you'll
-have to compile on your platform.  Further, this is a very
-platform-specific extensions (runs only on Linux, and likely only on
-2.6.x kernels).
+have to compile on your platform.
 
 <p>
-DirectIOLinuxDirectory is a Directory implementation that bypasses the
-OS's buffer cache for any IndexInput and IndexOutput opened through it
-(using the linux-specific O_DIRECT flag).
+{@link NativeUnixDirectory} is a Directory implementation that bypasses the
+OS's buffer cache (using direct IO) for any IndexInput and IndexOutput
+used during merging of segments larger than a specified size (default
+10 MB).  This avoids evicting hot pages that are still in-use for
+searching, keeping search more responsive while large merges run.
 
 <p>
-Note that doing so typically results in bad performance loss!  You
-should not use this for searching, but rather for indexing (or maybe
-just merging during indexing), to avoid evicting useful pages from the
-buffer cache.
-
-See <a target=_top href="http://chbits.blogspot.com/2010/06/lucene-and-fadvisemadvise.html">here</a>
+See <a target=_top href="http://blog.mikemccandless.com/2010/06/lucene-and-fadvisemadvise.html">this blog post</a>
 for details.
 
 Steps to build:
 <ul>
   <li> <tt>cd lucene/contrib/misc/</tt>
 
-  <li> To compile NativePosixUtil.cpp -> libNativePosixUtil.so on Linux run<tt> ant build-native-unix</tt>.
+  <li> To compile NativePosixUtil.cpp -> libNativePosixUtil.so, run<tt> ant build-native-unix</tt>.
   
   <li><tt>libNativePosixUtil.so</tt> will be located in the <tt>lucene/build/native/</tt> folder
 
@@ -63,13 +59,6 @@ Steps to build:
 </ul>
 
 <p>
-To use this, you'll likely want to make a custom subclass of
-FSDirectory that only opens direct IndexInput/Output for merging.  One
-hackish way to do this is to check if the current thread's name starts
-with "Lucene Merge Thread".  Alternatively, you could use this Dir as
-is for all indexing ops, but not for searching.
-
-<p>
 NativePosixUtil.cpp/java also expose access to the posix_madvise,
 madvise, posix_fadvise functions, which are somewhat more cross
 platform than O_DIRECT, however, in testing (see above link), these

Modified: lucene/dev/branches/lucene2858/lucene/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene2858/lucene/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java?rev=1237505&r1=1237504&r2=1237505&view=diff
==============================================================================
--- lucene/dev/branches/lucene2858/lucene/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java (original)
+++ lucene/dev/branches/lucene2858/lucene/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java Sun Jan 29 23:19:05 2012
@@ -398,7 +398,7 @@ public class BlockTreeTermsReader extend
     final long indexStartFP;
     final long rootBlockFP;
     final BytesRef rootCode;
-    private FST<BytesRef> index;
+    private final FST<BytesRef> index;
 
     //private boolean DEBUG;
 
@@ -433,6 +433,8 @@ public class BlockTreeTermsReader extend
           w.close();
         }
         */
+      } else {
+        index = null;
       }
     }
 
@@ -495,6 +497,8 @@ public class BlockTreeTermsReader extend
 
       private final BytesRef term = new BytesRef();
 
+      private final FST.BytesReader fstReader;
+
       // TODO: can we share this with the frame in STE?
       private final class Frame {
         final int ord;
@@ -755,6 +759,12 @@ public class BlockTreeTermsReader extend
           arcs[arcIdx] = new FST.Arc<BytesRef>();
         }
 
+        if (index == null) {
+          fstReader = null;
+        } else {
+          fstReader = index.getBytesReader(0);
+        }
+
         // TODO: if the automaton is "smallish" we really
         // should use the terms index to seek at least to
         // the initial term and likely to subsequent terms
@@ -842,7 +852,7 @@ public class BlockTreeTermsReader extend
           // TODO: we could be more efficient for the next()
           // case by using current arc as starting point,
           // passed to findTargetArc
-          arc = index.findTargetArc(target, arc, getArc(1+idx));
+          arc = index.findTargetArc(target, arc, getArc(1+idx), fstReader);
           assert arc != null;
           output = fstOutputs.add(output, arc.output);
           idx++;
@@ -1186,6 +1196,7 @@ public class BlockTreeTermsReader extend
       private boolean eof;
 
       final BytesRef term = new BytesRef();
+      private final FST.BytesReader fstReader;
 
       @SuppressWarnings("unchecked") private FST.Arc<BytesRef>[] arcs = new FST.Arc[1];
 
@@ -1196,6 +1207,12 @@ public class BlockTreeTermsReader extend
         // Used to hold seek by TermState, or cached seek
         staticFrame = new Frame(-1);
 
+        if (index == null) {
+          fstReader = null;
+        } else {
+          fstReader = index.getBytesReader(0);
+        }
+
         // Init w/ root block; don't use index since it may
         // not (and need not) have been loaded
         for(int arcIdx=0;arcIdx<arcs.length;arcIdx++) {
@@ -1581,7 +1598,7 @@ public class BlockTreeTermsReader extend
 
           final int targetLabel = target.bytes[target.offset + targetUpto] & 0xFF;
 
-          final FST.Arc<BytesRef> nextArc = index.findTargetArc(targetLabel, arc, getArc(1+targetUpto));
+          final FST.Arc<BytesRef> nextArc = index.findTargetArc(targetLabel, arc, getArc(1+targetUpto), fstReader);
 
           if (nextArc == null) {
 
@@ -1838,7 +1855,7 @@ public class BlockTreeTermsReader extend
 
           final int targetLabel = target.bytes[target.offset + targetUpto] & 0xFF;
 
-          final FST.Arc<BytesRef> nextArc = index.findTargetArc(targetLabel, arc, getArc(1+targetUpto));
+          final FST.Arc<BytesRef> nextArc = index.findTargetArc(targetLabel, arc, getArc(1+targetUpto), fstReader);
 
           if (nextArc == null) {
 

Modified: lucene/dev/branches/lucene2858/lucene/src/java/org/apache/lucene/codecs/BlockTreeTermsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene2858/lucene/src/java/org/apache/lucene/codecs/BlockTreeTermsWriter.java?rev=1237505&r1=1237504&r2=1237505&view=diff
==============================================================================
--- lucene/dev/branches/lucene2858/lucene/src/java/org/apache/lucene/codecs/BlockTreeTermsWriter.java (original)
+++ lucene/dev/branches/lucene2858/lucene/src/java/org/apache/lucene/codecs/BlockTreeTermsWriter.java Sun Jan 29 23:19:05 2012
@@ -288,7 +288,7 @@ public class BlockTreeTermsWriter extend
       final ByteSequenceOutputs outputs = ByteSequenceOutputs.getSingleton();
       final Builder<BytesRef> indexBuilder = new Builder<BytesRef>(FST.INPUT_TYPE.BYTE1,
                                                                    0, 0, true, false, Integer.MAX_VALUE,
-                                                                   outputs, null);
+                                                                   outputs, null, false);
       //if (DEBUG) {
       //  System.out.println("  compile index for prefix=" + prefix);
       //}
@@ -831,7 +831,7 @@ public class BlockTreeTermsWriter extend
                                          0, 0, true,
                                          true, Integer.MAX_VALUE,
                                          noOutputs,
-                                         new FindBlocks());
+                                         new FindBlocks(), false);
 
       postingsWriter.setField(fieldInfo);
     }

Modified: lucene/dev/branches/lucene2858/lucene/src/java/org/apache/lucene/codecs/VariableGapTermsIndexWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene2858/lucene/src/java/org/apache/lucene/codecs/VariableGapTermsIndexWriter.java?rev=1237505&r1=1237504&r2=1237505&view=diff
==============================================================================
--- lucene/dev/branches/lucene2858/lucene/src/java/org/apache/lucene/codecs/VariableGapTermsIndexWriter.java (original)
+++ lucene/dev/branches/lucene2858/lucene/src/java/org/apache/lucene/codecs/VariableGapTermsIndexWriter.java Sun Jan 29 23:19:05 2012
@@ -229,7 +229,7 @@ public class VariableGapTermsIndexWriter
       ////System.out.println("VGW: field=" + fieldInfo.name);
 
       // Always put empty string in
-      fstBuilder.add(new IntsRef(), fstOutputs.get(termsFilePointer));
+      fstBuilder.add(new IntsRef(), termsFilePointer);
       startTermsFilePointer = termsFilePointer;
     }
 
@@ -260,7 +260,7 @@ public class VariableGapTermsIndexWriter
       final int lengthSave = text.length;
       text.length = indexedTermPrefixLength(lastTerm, text);
       try {
-        fstBuilder.add(Util.toIntsRef(text, scratchIntsRef), fstOutputs.get(termsFilePointer));
+        fstBuilder.add(Util.toIntsRef(text, scratchIntsRef), termsFilePointer);
       } finally {
         text.length = lengthSave;
       }

Modified: lucene/dev/branches/lucene2858/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene2858/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java?rev=1237505&r1=1237504&r2=1237505&view=diff
==============================================================================
--- lucene/dev/branches/lucene2858/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java (original)
+++ lucene/dev/branches/lucene2858/lucene/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java Sun Jan 29 23:19:05 2012
@@ -521,9 +521,10 @@ class SimpleTextFieldsReader extends Fie
     private void loadTerms() throws IOException {
       PositiveIntOutputs posIntOutputs = PositiveIntOutputs.getSingleton(false);
       final Builder<PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>>> b;
-      b = new Builder<PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>>>(FST.INPUT_TYPE.BYTE1,
-                                                                          new PairOutputs<Long,PairOutputs.Pair<Long,Long>>(posIntOutputs,
-                                                                                                                            new PairOutputs<Long,Long>(posIntOutputs, posIntOutputs)));
+      final PairOutputs<Long,Long> outputsInner = new PairOutputs<Long,Long>(posIntOutputs, posIntOutputs);
+      final PairOutputs<Long,PairOutputs.Pair<Long,Long>> outputs = new PairOutputs<Long,PairOutputs.Pair<Long,Long>>(posIntOutputs,
+                                                                                                                      outputsInner);
+      b = new Builder<PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>>>(FST.INPUT_TYPE.BYTE1, outputs);
       IndexInput in = (IndexInput) SimpleTextFieldsReader.this.in.clone();
       in.seek(termsStart);
       final BytesRef lastTerm = new BytesRef(10);
@@ -536,9 +537,9 @@ class SimpleTextFieldsReader extends Fie
         SimpleTextUtil.readLine(in, scratch);
         if (scratch.equals(END) || StringHelper.startsWith(scratch, FIELD)) {
           if (lastDocsStart != -1) {
-            b.add(Util.toIntsRef(lastTerm, scratchIntsRef), new PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>>(lastDocsStart,
-                                                                                   new PairOutputs.Pair<Long,Long>((long) docFreq,
-                                                                                                                   posIntOutputs.get(totalTermFreq))));
+            b.add(Util.toIntsRef(lastTerm, scratchIntsRef),
+                  outputs.newPair(lastDocsStart,
+                                  outputsInner.newPair((long) docFreq, totalTermFreq)));
             sumTotalTermFreq += totalTermFreq;
           }
           break;
@@ -553,9 +554,8 @@ class SimpleTextFieldsReader extends Fie
           totalTermFreq += ArrayUtil.parseInt(scratchUTF16.chars, 0, scratchUTF16.length);
         } else if (StringHelper.startsWith(scratch, TERM)) {
           if (lastDocsStart != -1) {
-            b.add(Util.toIntsRef(lastTerm, scratchIntsRef), new PairOutputs.Pair<Long,PairOutputs.Pair<Long,Long>>(lastDocsStart,
-                                                                                   new PairOutputs.Pair<Long,Long>((long) docFreq,
-                                                                                                                   posIntOutputs.get(totalTermFreq))));
+            b.add(Util.toIntsRef(lastTerm, scratchIntsRef), outputs.newPair(lastDocsStart,
+                                                                            outputsInner.newPair((long) docFreq, totalTermFreq)));
           }
           lastDocsStart = in.getFilePointer();
           final int len = scratch.length - TERM.length;

Modified: lucene/dev/branches/lucene2858/lucene/src/java/org/apache/lucene/index/DirectoryReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene2858/lucene/src/java/org/apache/lucene/index/DirectoryReader.java?rev=1237505&r1=1237504&r2=1237505&view=diff
==============================================================================
--- lucene/dev/branches/lucene2858/lucene/src/java/org/apache/lucene/index/DirectoryReader.java (original)
+++ lucene/dev/branches/lucene2858/lucene/src/java/org/apache/lucene/index/DirectoryReader.java Sun Jan 29 23:19:05 2012
@@ -494,7 +494,15 @@ public final class DirectoryReader exten
     return DirectoryReader.open(directory, writer, infos, subReaders, termInfosIndexDivisor);
   }
 
-  /** Version number when this IndexReader was opened. */
+  /**
+   * Version number when this IndexReader was opened. Not
+   * implemented in the IndexReader base class.
+   *
+   * <p>This method
+   * returns the version recorded in the commit that the
+   * reader opened.  This version is advanced every time
+   * a change is made with {@link IndexWriter}.</p>
+   */
   public long getVersion() {
     ensureOpen();
     return segmentInfos.getVersion();
@@ -608,22 +616,6 @@ public final class DirectoryReader exten
   }  
   
   /**
-   * Returns the time the index in the named directory was last modified. 
-   * Do not use this to check whether the reader is still up-to-date, use
-   * {@link #isCurrent()} instead. 
-   * @throws CorruptIndexException if the index is corrupt
-   * @throws IOException if there is a low-level IO error
-   */
-  public static long lastModified(final Directory directory) throws CorruptIndexException, IOException {
-    return ((Long) new SegmentInfos.FindSegmentsFile(directory) {
-      @Override
-      public Object doBody(String segmentFileName) throws IOException {
-        return Long.valueOf(directory.fileModified(segmentFileName));
-      }
-    }.run()).longValue();
-  }
-  
-  /**
    * Reads version number from segments files. The version number is
    * initialized with a timestamp and then increased by one for each change of
    * the index.
@@ -675,7 +667,6 @@ public final class DirectoryReader exten
     Collection<String> files;
     Directory dir;
     long generation;
-    long version;
     final Map<String,String> userData;
     private final int segmentCount;
 
@@ -684,7 +675,6 @@ public final class DirectoryReader exten
       this.dir = dir;
       userData = infos.getUserData();
       files = Collections.unmodifiableCollection(infos.files(dir, true));
-      version = infos.getVersion();
       generation = infos.getGeneration();
       segmentCount = infos.size();
     }
@@ -715,11 +705,6 @@ public final class DirectoryReader exten
     }
 
     @Override
-    public long getVersion() {
-      return version;
-    }
-
-    @Override
     public long getGeneration() {
       return generation;
     }

Modified: lucene/dev/branches/lucene2858/lucene/src/java/org/apache/lucene/index/IndexCommit.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene2858/lucene/src/java/org/apache/lucene/index/IndexCommit.java?rev=1237505&r1=1237504&r2=1237505&view=diff
==============================================================================
--- lucene/dev/branches/lucene2858/lucene/src/java/org/apache/lucene/index/IndexCommit.java (original)
+++ lucene/dev/branches/lucene2858/lucene/src/java/org/apache/lucene/index/IndexCommit.java Sun Jan 29 23:19:05 2012
@@ -83,39 +83,31 @@ public abstract class IndexCommit implem
   public boolean equals(Object other) {
     if (other instanceof IndexCommit) {
       IndexCommit otherCommit = (IndexCommit) other;
-      return otherCommit.getDirectory().equals(getDirectory()) && otherCommit.getVersion() == getVersion();
-    } else
+      return otherCommit.getDirectory().equals(getDirectory()) && otherCommit.getGeneration() == getGeneration();
+    } else {
       return false;
+    }
   }
 
   @Override
   public int hashCode() {
-    return (int) (getDirectory().hashCode() + getVersion());
+    return getDirectory().hashCode() + Long.valueOf(getGeneration()).hashCode();
   }
 
-  /** Returns the version for this IndexCommit.  This is the
-   *  same value that {@link IndexReader#getVersion} would
-   *  return if it were opened on this commit. */
-  public abstract long getVersion();
-
   /** Returns the generation (the _N in segments_N) for this
    *  IndexCommit */
   public abstract long getGeneration();
 
-  /** Convenience method that returns the last modified time
-   *  of the segments_N file corresponding to this index
-   *  commit, equivalent to
-   *  getDirectory().fileModified(getSegmentsFileName()). */
-  public long getTimestamp() throws IOException {
-    return getDirectory().fileModified(getSegmentsFileName());
-  }
-
   /** Returns userData, previously passed to {@link
    *  IndexWriter#commit(Map)} for this commit.  Map is
    *  String -> String. */
   public abstract Map<String,String> getUserData() throws IOException;
   
   public int compareTo(IndexCommit commit) {
+    if (getDirectory() != commit.getDirectory()) {
+      throw new UnsupportedOperationException("cannot compare IndexCommits from different Directory instances");
+    }
+
     long gen = getGeneration();
     long comgen = commit.getGeneration();
     if (gen < comgen) {
@@ -126,5 +118,4 @@ public abstract class IndexCommit implem
       return 0;
     }
   }
-
 }

Modified: lucene/dev/branches/lucene2858/lucene/src/java/org/apache/lucene/index/IndexFileDeleter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene2858/lucene/src/java/org/apache/lucene/index/IndexFileDeleter.java?rev=1237505&r1=1237504&r2=1237505&view=diff
==============================================================================
--- lucene/dev/branches/lucene2858/lucene/src/java/org/apache/lucene/index/IndexFileDeleter.java (original)
+++ lucene/dev/branches/lucene2858/lucene/src/java/org/apache/lucene/index/IndexFileDeleter.java Sun Jan 29 23:19:05 2012
@@ -655,7 +655,6 @@ final class IndexFileDeleter {
     boolean deleted;
     Directory directory;
     Collection<CommitPoint> commitsToDelete;
-    long version;
     long generation;
     final Map<String,String> userData;
     private final int segmentCount;
@@ -665,7 +664,6 @@ final class IndexFileDeleter {
       this.commitsToDelete = commitsToDelete;
       userData = segmentInfos.getUserData();
       segmentsFileName = segmentInfos.getCurrentSegmentFileName();
-      version = segmentInfos.getVersion();
       generation = segmentInfos.getGeneration();
       files = Collections.unmodifiableCollection(segmentInfos.files(directory, true));
       segmentCount = segmentInfos.size();
@@ -697,11 +695,6 @@ final class IndexFileDeleter {
     }
 
     @Override
-    public long getVersion() {
-      return version;
-    }
-
-    @Override
     public long getGeneration() {
       return generation;
     }

Modified: lucene/dev/branches/lucene2858/lucene/src/java/org/apache/lucene/index/SegmentInfos.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene2858/lucene/src/java/org/apache/lucene/index/SegmentInfos.java?rev=1237505&r1=1237504&r2=1237505&view=diff
==============================================================================
--- lucene/dev/branches/lucene2858/lucene/src/java/org/apache/lucene/index/SegmentInfos.java (original)
+++ lucene/dev/branches/lucene2858/lucene/src/java/org/apache/lucene/index/SegmentInfos.java Sun Jan 29 23:19:05 2012
@@ -94,16 +94,15 @@ public final class SegmentInfos implemen
    * Whenever you add a new format, make it 1 smaller (negative version logic)! */
   public static final int FORMAT_SEGMENTS_GEN_CURRENT = -2;
     
-  public int counter = 0;    // used to name new segments
+  public int counter;    // used to name new segments
   
   /**
-   * counts how often the index has been changed by adding or deleting docs.
-   * starting with the current time in milliseconds forces to create unique version numbers.
+   * counts how often the index has been changed
    */
-  public long version = System.currentTimeMillis();
+  public long version;
   
-  private long generation = 0;     // generation of the "segments_N" for the next commit
-  private long lastGeneration = 0; // generation of the "segments_N" file we last successfully read
+  private long generation;     // generation of the "segments_N" for the next commit
+  private long lastGeneration; // generation of the "segments_N" file we last successfully read
                                    // or wrote; this is normally the same as generation except if
                                    // there was an IOException that had interrupted a commit
 

Modified: lucene/dev/branches/lucene2858/lucene/src/java/org/apache/lucene/index/SnapshotDeletionPolicy.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene2858/lucene/src/java/org/apache/lucene/index/SnapshotDeletionPolicy.java?rev=1237505&r1=1237504&r2=1237505&view=diff
==============================================================================
--- lucene/dev/branches/lucene2858/lucene/src/java/org/apache/lucene/index/SnapshotDeletionPolicy.java (original)
+++ lucene/dev/branches/lucene2858/lucene/src/java/org/apache/lucene/index/SnapshotDeletionPolicy.java Sun Jan 29 23:19:05 2012
@@ -126,11 +126,6 @@ public class SnapshotDeletionPolicy impl
     }
 
     @Override
-    public long getVersion() {
-      return cp.getVersion();
-    }
-
-    @Override
     public boolean isDeleted() {
       return cp.isDeleted();
     }

Modified: lucene/dev/branches/lucene2858/lucene/src/java/org/apache/lucene/store/CompoundFileDirectory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene2858/lucene/src/java/org/apache/lucene/store/CompoundFileDirectory.java?rev=1237505&r1=1237504&r2=1237505&view=diff
==============================================================================
--- lucene/dev/branches/lucene2858/lucene/src/java/org/apache/lucene/store/CompoundFileDirectory.java (original)
+++ lucene/dev/branches/lucene2858/lucene/src/java/org/apache/lucene/store/CompoundFileDirectory.java Sun Jan 29 23:19:05 2012
@@ -249,14 +249,6 @@ public final class CompoundFileDirectory
     return entries.containsKey(IndexFileNames.stripSegmentName(name));
   }
   
-  
-  /** Returns the time the compound file was last modified. */
-  @Override
-  public long fileModified(String name) throws IOException {
-    ensureOpen();
-    return directory.fileModified(fileName);
-  }
-  
   /** Not implemented
    * @throws UnsupportedOperationException */
   @Override

Modified: lucene/dev/branches/lucene2858/lucene/src/java/org/apache/lucene/store/Directory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene2858/lucene/src/java/org/apache/lucene/store/Directory.java?rev=1237505&r1=1237504&r2=1237505&view=diff
==============================================================================
--- lucene/dev/branches/lucene2858/lucene/src/java/org/apache/lucene/store/Directory.java (original)
+++ lucene/dev/branches/lucene2858/lucene/src/java/org/apache/lucene/store/Directory.java Sun Jan 29 23:19:05 2012
@@ -62,10 +62,6 @@ public abstract class Directory implemen
   public abstract boolean fileExists(String name)
        throws IOException;
 
-  /** Returns the time the named file was last modified. */
-  public abstract long fileModified(String name)
-       throws IOException;
-
   /** Removes an existing file in the directory. */
   public abstract void deleteFile(String name)
        throws IOException;

Modified: lucene/dev/branches/lucene2858/lucene/src/java/org/apache/lucene/store/FSDirectory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene2858/lucene/src/java/org/apache/lucene/store/FSDirectory.java?rev=1237505&r1=1237504&r2=1237505&view=diff
==============================================================================
--- lucene/dev/branches/lucene2858/lucene/src/java/org/apache/lucene/store/FSDirectory.java (original)
+++ lucene/dev/branches/lucene2858/lucene/src/java/org/apache/lucene/store/FSDirectory.java Sun Jan 29 23:19:05 2012
@@ -251,14 +251,6 @@ public abstract class FSDirectory extend
   }
 
   /** Returns the time the named file was last modified. */
-  @Override
-  public long fileModified(String name) {
-    ensureOpen();
-    File file = new File(directory, name);
-    return file.lastModified();
-  }
-
-  /** Returns the time the named file was last modified. */
   public static long fileModified(File directory, String name) {
     File file = new File(directory, name);
     return file.lastModified();

Modified: lucene/dev/branches/lucene2858/lucene/src/java/org/apache/lucene/store/FileSwitchDirectory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene2858/lucene/src/java/org/apache/lucene/store/FileSwitchDirectory.java?rev=1237505&r1=1237504&r2=1237505&view=diff
==============================================================================
--- lucene/dev/branches/lucene2858/lucene/src/java/org/apache/lucene/store/FileSwitchDirectory.java (original)
+++ lucene/dev/branches/lucene2858/lucene/src/java/org/apache/lucene/store/FileSwitchDirectory.java Sun Jan 29 23:19:05 2012
@@ -138,11 +138,6 @@ public class FileSwitchDirectory extends
   }
 
   @Override
-  public long fileModified(String name) throws IOException {
-    return getDirectory(name).fileModified(name);
-  }
-
-  @Override
   public void deleteFile(String name) throws IOException {
     getDirectory(name).deleteFile(name);
   }

Modified: lucene/dev/branches/lucene2858/lucene/src/java/org/apache/lucene/store/NRTCachingDirectory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene2858/lucene/src/java/org/apache/lucene/store/NRTCachingDirectory.java?rev=1237505&r1=1237504&r2=1237505&view=diff
==============================================================================
--- lucene/dev/branches/lucene2858/lucene/src/java/org/apache/lucene/store/NRTCachingDirectory.java (original)
+++ lucene/dev/branches/lucene2858/lucene/src/java/org/apache/lucene/store/NRTCachingDirectory.java Sun Jan 29 23:19:05 2012
@@ -153,15 +153,6 @@ public class NRTCachingDirectory extends
   }
 
   @Override
-  public synchronized long fileModified(String name) throws IOException {
-    if (cache.fileExists(name)) {
-      return cache.fileModified(name);
-    } else {
-      return delegate.fileModified(name);
-    }
-  }
-
-  @Override
   public synchronized void deleteFile(String name) throws IOException {
     if (VERBOSE) {
       System.out.println("nrtdir.deleteFile name=" + name);

Modified: lucene/dev/branches/lucene2858/lucene/src/java/org/apache/lucene/store/RAMDirectory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene2858/lucene/src/java/org/apache/lucene/store/RAMDirectory.java?rev=1237505&r1=1237504&r2=1237505&view=diff
==============================================================================
--- lucene/dev/branches/lucene2858/lucene/src/java/org/apache/lucene/store/RAMDirectory.java (original)
+++ lucene/dev/branches/lucene2858/lucene/src/java/org/apache/lucene/store/RAMDirectory.java Sun Jan 29 23:19:05 2012
@@ -98,19 +98,6 @@ public class RAMDirectory extends Direct
     return fileMap.containsKey(name);
   }
 
-  /** Returns the time the named file was last modified.
-   * @throws IOException if the file does not exist
-   */
-  @Override
-  public final long fileModified(String name) throws IOException {
-    ensureOpen();
-    RAMFile file = fileMap.get(name);
-    if (file == null) {
-      throw new FileNotFoundException(name);
-    }
-    return file.getLastModified();
-  }
-
   /** Returns the length in bytes of a file in the directory.
    * @throws IOException if the file does not exist
    */

Modified: lucene/dev/branches/lucene2858/lucene/src/java/org/apache/lucene/store/RAMFile.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene2858/lucene/src/java/org/apache/lucene/store/RAMFile.java?rev=1237505&r1=1237504&r2=1237505&view=diff
==============================================================================
--- lucene/dev/branches/lucene2858/lucene/src/java/org/apache/lucene/store/RAMFile.java (original)
+++ lucene/dev/branches/lucene2858/lucene/src/java/org/apache/lucene/store/RAMFile.java Sun Jan 29 23:19:05 2012
@@ -26,8 +26,6 @@ public class RAMFile {
   RAMDirectory directory;
   protected long sizeInBytes;
 
-  private long lastModified = System.currentTimeMillis();
-
   // File used as buffer, in no RAMDirectory
   public RAMFile() {}
   
@@ -44,15 +42,6 @@ public class RAMFile {
     this.length = length;
   }
 
-  // For non-stream access from thread that might be concurrent with writing
-  public synchronized long getLastModified() {
-    return lastModified;
-  }
-
-  protected synchronized void setLastModified(long lastModified) {
-    this.lastModified = lastModified;
-  }
-
   protected final byte[] addBuffer(int size) {
     byte[] buffer = newBuffer(size);
     synchronized(this) {

Modified: lucene/dev/branches/lucene2858/lucene/src/java/org/apache/lucene/store/RAMOutputStream.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene2858/lucene/src/java/org/apache/lucene/store/RAMOutputStream.java?rev=1237505&r1=1237504&r2=1237505&view=diff
==============================================================================
--- lucene/dev/branches/lucene2858/lucene/src/java/org/apache/lucene/store/RAMOutputStream.java (original)
+++ lucene/dev/branches/lucene2858/lucene/src/java/org/apache/lucene/store/RAMOutputStream.java Sun Jan 29 23:19:05 2012
@@ -167,7 +167,6 @@ public class RAMOutputStream extends Ind
 
   @Override
   public void flush() throws IOException {
-    file.setLastModified(System.currentTimeMillis());
     setFileLength();
   }
 

Modified: lucene/dev/branches/lucene2858/lucene/src/java/org/apache/lucene/util/FixedBitSet.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene2858/lucene/src/java/org/apache/lucene/util/FixedBitSet.java?rev=1237505&r1=1237504&r2=1237505&view=diff
==============================================================================
--- lucene/dev/branches/lucene2858/lucene/src/java/org/apache/lucene/util/FixedBitSet.java (original)
+++ lucene/dev/branches/lucene2858/lucene/src/java/org/apache/lucene/util/FixedBitSet.java Sun Jan 29 23:19:05 2012
@@ -95,7 +95,7 @@ public final class FixedBitSet extends D
   }
 
   public boolean get(int index) {
-    assert index >= 0 && index < numBits;
+    assert index >= 0 && index < numBits: "index=" + index;
     int i = index >> 6;               // div 64
     // signed shift will keep a negative index and force an
     // array-index-out-of-bounds-exception, removing the need for an explicit check.

Modified: lucene/dev/branches/lucene2858/lucene/src/java/org/apache/lucene/util/UnicodeUtil.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene2858/lucene/src/java/org/apache/lucene/util/UnicodeUtil.java?rev=1237505&r1=1237504&r2=1237505&view=diff
==============================================================================
--- lucene/dev/branches/lucene2858/lucene/src/java/org/apache/lucene/util/UnicodeUtil.java (original)
+++ lucene/dev/branches/lucene2858/lucene/src/java/org/apache/lucene/util/UnicodeUtil.java Sun Jan 29 23:19:05 2012
@@ -588,7 +588,7 @@ public final class UnicodeUtil {
         out[out_offset++] = (char)(((b&0xf)<<12) + ((utf8[offset]&0x3f)<<6) + (utf8[offset+1]&0x3f));
         offset += 2;
       } else {
-        assert b < 0xf8;
+        assert b < 0xf8: "b=" + b;
         int ch = ((b&0x7)<<18) + ((utf8[offset]&0x3f)<<12) + ((utf8[offset+1]&0x3f)<<6) + (utf8[offset+2]&0x3f);
         offset += 3;
         if (ch < UNI_MAX_BMP) {

Modified: lucene/dev/branches/lucene2858/lucene/src/java/org/apache/lucene/util/fst/Builder.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene2858/lucene/src/java/org/apache/lucene/util/fst/Builder.java?rev=1237505&r1=1237504&r2=1237505&view=diff
==============================================================================
--- lucene/dev/branches/lucene2858/lucene/src/java/org/apache/lucene/util/fst/Builder.java (original)
+++ lucene/dev/branches/lucene2858/lucene/src/java/org/apache/lucene/util/fst/Builder.java Sun Jan 29 23:19:05 2012
@@ -17,15 +17,15 @@ package org.apache.lucene.util.fst;
  * limitations under the License.
  */
 
+import java.io.IOException;
+
 import org.apache.lucene.util.ArrayUtil;
-import org.apache.lucene.util.RamUsageEstimator;
 import org.apache.lucene.util.IntsRef;
+import org.apache.lucene.util.RamUsageEstimator;
 import org.apache.lucene.util.fst.FST.INPUT_TYPE; // javadoc
 
-import java.io.IOException;
-
 /**
- * Builds a compact FST (maps an IntsRef term to an arbitrary
+ * Builds a minimal FST (maps an IntsRef term to an arbitrary
  * output) from pre-sorted terms with outputs (the FST
  * becomes an FSA if you use NoOutputs).  The FST is written
  * on-the-fly into a compact serialized format byte array, which can
@@ -35,12 +35,6 @@ import java.io.IOException;
  * <p>NOTE: The algorithm is described at
  * http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.24.3698</p>
  *
- * If your outputs are ByteSequenceOutput then the final FST
- * will be minimal, but if you use PositiveIntOutput then
- * it's only "near minimal".  For example, aa/0, aab/1, bbb/2
- * will produce 6 states when a 5 state fst is also
- * possible.
- *
  * The parameterized type T is the output type.  See the
  * subclasses of {@link Outputs}.
  *
@@ -52,7 +46,7 @@ public class Builder<T> {
   private final FST<T> fst;
   private final T NO_OUTPUT;
 
-  // private static final boolean DEBUG = false;
+  // private static final boolean DEBUG = true;
 
   // simplistic pruning: we prune node (and all following
   // nodes) if less than this number of terms go through it:
@@ -88,7 +82,7 @@ public class Builder<T> {
    * pruning options turned off.
    */
   public Builder(FST.INPUT_TYPE inputType, Outputs<T> outputs) {
-    this(inputType, 0, 0, true, true, Integer.MAX_VALUE, outputs, null);
+    this(inputType, 0, 0, true, true, Integer.MAX_VALUE, outputs, null, false);
   }
 
   /**
@@ -127,16 +121,20 @@ public class Builder<T> {
    * @param outputs The output type for each input sequence. Applies only if building an FST. For
    *    FSA, use {@link NoOutputs#getSingleton()} and {@link NoOutputs#getNoOutput()} as the
    *    singleton output object.
+   *
+   * @param willPackFST Pass true if you will rewrite (compact) the FST before saving.  This
+   *    causes the FST to create additional data structures intenrally to facilitate rewriting, but
+   *    it means the resulting FST cannot be saved: it must first be rewritten using {@link FST#FST(FST,int[])}}
    */
   public Builder(FST.INPUT_TYPE inputType, int minSuffixCount1, int minSuffixCount2, boolean doShareSuffix,
                  boolean doShareNonSingletonNodes, int shareMaxTailLength, Outputs<T> outputs,
-                 FreezeTail<T> freezeTail) {
+                 FreezeTail<T> freezeTail, boolean willPackFST) {
     this.minSuffixCount1 = minSuffixCount1;
     this.minSuffixCount2 = minSuffixCount2;
     this.freezeTail = freezeTail;
     this.doShareNonSingletonNodes = doShareNonSingletonNodes;
     this.shareMaxTailLength = shareMaxTailLength;
-    fst = new FST<T>(inputType, outputs);
+    fst = new FST<T>(inputType, outputs, willPackFST);
     if (doShareSuffix) {
       dedupHash = new NodeHash<T>(fst);
     } else {
@@ -170,23 +168,23 @@ public class Builder<T> {
     fst.setAllowArrayArcs(b);
   }
 
-  private CompiledNode compileNode(UnCompiledNode<T> n, int tailLength) throws IOException {
-    final int address;
-    if (dedupHash != null && (doShareNonSingletonNodes || n.numArcs <= 1) && tailLength <= shareMaxTailLength) {
-      if (n.numArcs == 0) {
-        address = fst.addNode(n);
+  private CompiledNode compileNode(UnCompiledNode<T> nodeIn, int tailLength) throws IOException {
+    final int node;
+    if (dedupHash != null && (doShareNonSingletonNodes || nodeIn.numArcs <= 1) && tailLength <= shareMaxTailLength) {
+      if (nodeIn.numArcs == 0) {
+        node = fst.addNode(nodeIn);
       } else {
-        address = dedupHash.add(n);
+        node = dedupHash.add(nodeIn);
       }
     } else {
-      address = fst.addNode(n);
+      node = fst.addNode(nodeIn);
     }
-    assert address != -2;
+    assert node != -2;
 
-    n.clear();
+    nodeIn.clear();
 
     final CompiledNode fn = new CompiledNode();
-    fn.address = address;
+    fn.node = node;
     return fn;
   }
 
@@ -319,6 +317,11 @@ public class Builder<T> {
     }
     */
 
+    // De-dup NO_OUTPUT since it must be a singleton:
+    if (output.equals(NO_OUTPUT)) {
+      output = NO_OUTPUT;
+    }
+
     assert lastInput.length == 0 || input.compareTo(lastInput) >= 0: "inputs are added out of order lastInput=" + lastInput + " vs input=" + input;
     assert validOutput(output);
 
@@ -443,7 +446,7 @@ public class Builder<T> {
       }
     }
     //if (DEBUG) System.out.println("  builder.finish root.isFinal=" + root.isFinal + " root.output=" + root.output);
-    fst.finish(compileNode(root, lastInput.length).address);
+    fst.finish(compileNode(root, lastInput.length).node);
 
     return fst;
   }
@@ -480,7 +483,7 @@ public class Builder<T> {
   }
 
   static final class CompiledNode implements Node {
-    int address;
+    int node;
     public boolean isCompiled() {
       return true;
     }
@@ -560,7 +563,7 @@ public class Builder<T> {
       final Arc<T> arc = arcs[numArcs-1];
       assert arc.label == labelToMatch: "arc.label=" + arc.label + " vs " + labelToMatch;
       arc.target = target;
-      //assert target.address != -2;
+      //assert target.node != -2;
       arc.nextFinalOutput = nextFinalOutput;
       arc.isFinal = isFinal;
     }