You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@lucene.apache.org by jp...@apache.org on 2016/11/10 14:14:28 UTC

[1/4] lucene-solr:master: LUCENE-7545: Dense norms/doc-values should not consume memory for the IW buffer.

Repository: lucene-solr
Updated Branches:
  refs/heads/master 2902727a1 -> c4c5e868d


LUCENE-7545: Dense norms/doc-values should not consume memory for the IW buffer.


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/c415bc8d
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/c415bc8d
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/c415bc8d

Branch: refs/heads/master
Commit: c415bc8d1ddbb6477c83d559cb254a384aceb6df
Parents: 2902727
Author: Adrien Grand <jp...@gmail.com>
Authored: Thu Nov 10 14:04:15 2016 +0100
Committer: Adrien Grand <jp...@gmail.com>
Committed: Thu Nov 10 14:04:15 2016 +0100

----------------------------------------------------------------------
 .../lucene/index/BinaryDocValuesWriter.java     | 15 ++--
 .../apache/lucene/index/DocsWithFieldSet.java   | 62 +++++++++++++++
 .../apache/lucene/index/NormValuesWriter.java   | 15 ++--
 .../lucene/index/NumericDocValuesWriter.java    | 15 ++--
 .../lucene/index/SortedDocValuesWriter.java     | 15 ++--
 .../index/SortedNumericDocValuesWriter.java     | 15 ++--
 .../lucene/index/SortedSetDocValuesWriter.java  | 15 ++--
 .../lucene/index/TestDocsWithFieldSet.java      | 80 ++++++++++++++++++++
 8 files changed, 178 insertions(+), 54 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/c415bc8d/lucene/core/src/java/org/apache/lucene/index/BinaryDocValuesWriter.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/BinaryDocValuesWriter.java b/lucene/core/src/java/org/apache/lucene/index/BinaryDocValuesWriter.java
index 9611a03..0344c58 100644
--- a/lucene/core/src/java/org/apache/lucene/index/BinaryDocValuesWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/index/BinaryDocValuesWriter.java
@@ -24,11 +24,9 @@ import org.apache.lucene.search.DocIdSetIterator;
 import org.apache.lucene.store.DataInput;
 import org.apache.lucene.store.DataOutput;
 import org.apache.lucene.util.ArrayUtil;
-import org.apache.lucene.util.BitSetIterator;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.BytesRefBuilder;
 import org.apache.lucene.util.Counter;
-import org.apache.lucene.util.FixedBitSet;
 import org.apache.lucene.util.PagedBytes;
 import org.apache.lucene.util.packed.PackedInts;
 import org.apache.lucene.util.packed.PackedLongValues;
@@ -48,7 +46,7 @@ class BinaryDocValuesWriter extends DocValuesWriter {
 
   private final Counter iwBytesUsed;
   private final PackedLongValues.Builder lengths;
-  private FixedBitSet docsWithField;
+  private DocsWithFieldSet docsWithField;
   private final FieldInfo fieldInfo;
   private long bytesUsed;
   private int lastDocID = -1;
@@ -60,7 +58,7 @@ class BinaryDocValuesWriter extends DocValuesWriter {
     this.bytesOut = bytes.getDataOutput();
     this.lengths = PackedLongValues.deltaPackedBuilder(PackedInts.COMPACT);
     this.iwBytesUsed = iwBytesUsed;
-    this.docsWithField = new FixedBitSet(64);
+    this.docsWithField = new DocsWithFieldSet();
     this.bytesUsed = lengths.ramBytesUsed() + docsWithField.ramBytesUsed();
     iwBytesUsed.addAndGet(bytesUsed);
   }
@@ -84,8 +82,7 @@ class BinaryDocValuesWriter extends DocValuesWriter {
       // Should never happen!
       throw new RuntimeException(ioe);
     }
-    docsWithField = FixedBitSet.ensureCapacity(docsWithField, docID);
-    docsWithField.set(docID);
+    docsWithField.add(docID);
     updateBytesUsed();
 
     lastDocID = docID;
@@ -112,7 +109,7 @@ class BinaryDocValuesWriter extends DocValuesWriter {
                                   if (fieldInfoIn != fieldInfo) {
                                     throw new IllegalArgumentException("wrong fieldInfo");
                                   }
-                                  return new BufferedBinaryDocValues(lengths, maxLength, bytes.getDataInput(), docsWithField);
+                                  return new BufferedBinaryDocValues(lengths, maxLength, bytes.getDataInput(), docsWithField.iterator());
                                 }
                               });
   }
@@ -124,12 +121,12 @@ class BinaryDocValuesWriter extends DocValuesWriter {
     final DocIdSetIterator docsWithField;
     final DataInput bytesIterator;
     
-    BufferedBinaryDocValues(PackedLongValues lengths, int maxLength, DataInput bytesIterator, FixedBitSet docsWithFields) {
+    BufferedBinaryDocValues(PackedLongValues lengths, int maxLength, DataInput bytesIterator, DocIdSetIterator docsWithFields) {
       this.value = new BytesRefBuilder();
       this.value.grow(maxLength);
       this.lengthsIterator = lengths.iterator();
       this.bytesIterator = bytesIterator;
-      this.docsWithField = new BitSetIterator(docsWithFields, lengths.size());
+      this.docsWithField = docsWithFields;
     }
 
     @Override

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/c415bc8d/lucene/core/src/java/org/apache/lucene/index/DocsWithFieldSet.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/DocsWithFieldSet.java b/lucene/core/src/java/org/apache/lucene/index/DocsWithFieldSet.java
new file mode 100644
index 0000000..6c0d6dd
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/index/DocsWithFieldSet.java
@@ -0,0 +1,62 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.index;
+
+import org.apache.lucene.search.DocIdSet;
+import org.apache.lucene.search.DocIdSetIterator;
+import org.apache.lucene.util.BitSetIterator;
+import org.apache.lucene.util.FixedBitSet;
+import org.apache.lucene.util.RamUsageEstimator;
+
+/** Accumulator for documents that have a value for a field. This is optimized
+ *  for the case that all documents have a value. */
+final class DocsWithFieldSet extends DocIdSet {
+
+  private static long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(DocsWithFieldSet.class);
+
+  private FixedBitSet set;
+  private int cost = 0;
+  private int lastDocId = -1;
+
+  void add(int docID) {
+    if (docID <= lastDocId) {
+      throw new IllegalArgumentException("Out of order doc ids: last=" + lastDocId + ", next=" + docID);
+    }
+    if (set != null) {
+      set = FixedBitSet.ensureCapacity(set, docID);
+      set.set(docID);
+    } else if (docID != cost) {
+      // migrate to a sparse encoding using a bit set
+      set = new FixedBitSet(docID + 1);
+      set.set(0, cost);
+      set.set(docID);
+    }
+    lastDocId = docID;
+    cost++;
+  }
+
+  @Override
+  public long ramBytesUsed() {
+    return BASE_RAM_BYTES_USED + (set == null ? 0 : set.ramBytesUsed());
+  }
+
+  @Override
+  public DocIdSetIterator iterator() {
+    return set != null ? new BitSetIterator(set, cost) : DocIdSetIterator.all(cost);
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/c415bc8d/lucene/core/src/java/org/apache/lucene/index/NormValuesWriter.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/NormValuesWriter.java b/lucene/core/src/java/org/apache/lucene/index/NormValuesWriter.java
index b0d05e4..4923f3b 100644
--- a/lucene/core/src/java/org/apache/lucene/index/NormValuesWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/index/NormValuesWriter.java
@@ -22,9 +22,7 @@ import java.io.IOException;
 import org.apache.lucene.codecs.NormsConsumer;
 import org.apache.lucene.codecs.NormsProducer;
 import org.apache.lucene.search.DocIdSetIterator;
-import org.apache.lucene.util.BitSetIterator;
 import org.apache.lucene.util.Counter;
-import org.apache.lucene.util.FixedBitSet;
 import org.apache.lucene.util.packed.PackedInts;
 import org.apache.lucene.util.packed.PackedLongValues;
 
@@ -32,7 +30,7 @@ import org.apache.lucene.util.packed.PackedLongValues;
  *  segment flushes. */
 class NormValuesWriter {
 
-  private FixedBitSet docsWithField;
+  private DocsWithFieldSet docsWithField;
   private PackedLongValues.Builder pending;
   private final Counter iwBytesUsed;
   private long bytesUsed;
@@ -40,7 +38,7 @@ class NormValuesWriter {
   private int lastDocID = -1;
 
   public NormValuesWriter(FieldInfo fieldInfo, Counter iwBytesUsed) {
-    docsWithField = new FixedBitSet(64);
+    docsWithField = new DocsWithFieldSet();
     pending = PackedLongValues.deltaPackedBuilder(PackedInts.COMPACT);
     bytesUsed = pending.ramBytesUsed() + docsWithField.ramBytesUsed();
     this.fieldInfo = fieldInfo;
@@ -54,8 +52,7 @@ class NormValuesWriter {
     }
 
     pending.add(value);
-    docsWithField = FixedBitSet.ensureCapacity(docsWithField, docID);
-    docsWithField.set(docID);
+    docsWithField.add(docID);
 
     updateBytesUsed();
 
@@ -82,7 +79,7 @@ class NormValuesWriter {
                                    if (fieldInfo != NormValuesWriter.this.fieldInfo) {
                                      throw new IllegalArgumentException("wrong fieldInfo");
                                    }
-                                   return new BufferedNorms(values, docsWithField);
+                                   return new BufferedNorms(values, docsWithField.iterator());
                                   }
 
                                   @Override
@@ -108,9 +105,9 @@ class NormValuesWriter {
     final DocIdSetIterator docsWithField;
     private long value;
 
-    BufferedNorms(PackedLongValues values, FixedBitSet docsWithFields) {
+    BufferedNorms(PackedLongValues values, DocIdSetIterator docsWithFields) {
       this.iter = values.iterator();
-      this.docsWithField = new BitSetIterator(docsWithFields, values.size());
+      this.docsWithField = docsWithFields;
     }
 
     @Override

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/c415bc8d/lucene/core/src/java/org/apache/lucene/index/NumericDocValuesWriter.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/NumericDocValuesWriter.java b/lucene/core/src/java/org/apache/lucene/index/NumericDocValuesWriter.java
index 24a7010..d4dd66a 100644
--- a/lucene/core/src/java/org/apache/lucene/index/NumericDocValuesWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/index/NumericDocValuesWriter.java
@@ -21,9 +21,7 @@ import java.io.IOException;
 
 import org.apache.lucene.codecs.DocValuesConsumer;
 import org.apache.lucene.search.DocIdSetIterator;
-import org.apache.lucene.util.BitSetIterator;
 import org.apache.lucene.util.Counter;
-import org.apache.lucene.util.FixedBitSet;
 import org.apache.lucene.util.packed.PackedInts;
 import org.apache.lucene.util.packed.PackedLongValues;
 
@@ -34,13 +32,13 @@ class NumericDocValuesWriter extends DocValuesWriter {
   private PackedLongValues.Builder pending;
   private final Counter iwBytesUsed;
   private long bytesUsed;
-  private FixedBitSet docsWithField;
+  private DocsWithFieldSet docsWithField;
   private final FieldInfo fieldInfo;
   private int lastDocID = -1;
 
   public NumericDocValuesWriter(FieldInfo fieldInfo, Counter iwBytesUsed) {
     pending = PackedLongValues.deltaPackedBuilder(PackedInts.COMPACT);
-    docsWithField = new FixedBitSet(64);
+    docsWithField = new DocsWithFieldSet();
     bytesUsed = pending.ramBytesUsed() + docsWithField.ramBytesUsed();
     this.fieldInfo = fieldInfo;
     this.iwBytesUsed = iwBytesUsed;
@@ -53,8 +51,7 @@ class NumericDocValuesWriter extends DocValuesWriter {
     }
 
     pending.add(value);
-    docsWithField = FixedBitSet.ensureCapacity(docsWithField, docID);
-    docsWithField.set(docID);
+    docsWithField.add(docID);
 
     updateBytesUsed();
 
@@ -83,7 +80,7 @@ class NumericDocValuesWriter extends DocValuesWriter {
                                    if (fieldInfo != NumericDocValuesWriter.this.fieldInfo) {
                                      throw new IllegalArgumentException("wrong fieldInfo");
                                    }
-                                   return new BufferedNumericDocValues(values, docsWithField);
+                                   return new BufferedNumericDocValues(values, docsWithField.iterator());
                                  }
                                });
   }
@@ -94,9 +91,9 @@ class NumericDocValuesWriter extends DocValuesWriter {
     final DocIdSetIterator docsWithField;
     private long value;
 
-    BufferedNumericDocValues(PackedLongValues values, FixedBitSet docsWithFields) {
+    BufferedNumericDocValues(PackedLongValues values, DocIdSetIterator docsWithFields) {
       this.iter = values.iterator();
-      this.docsWithField = new BitSetIterator(docsWithFields, values.size());
+      this.docsWithField = docsWithFields;
     }
 
     @Override

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/c415bc8d/lucene/core/src/java/org/apache/lucene/index/SortedDocValuesWriter.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/SortedDocValuesWriter.java b/lucene/core/src/java/org/apache/lucene/index/SortedDocValuesWriter.java
index 7e43e49..e439caf 100644
--- a/lucene/core/src/java/org/apache/lucene/index/SortedDocValuesWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/index/SortedDocValuesWriter.java
@@ -22,13 +22,11 @@ import java.io.IOException;
 
 import org.apache.lucene.codecs.DocValuesConsumer;
 import org.apache.lucene.search.DocIdSetIterator;
-import org.apache.lucene.util.BitSetIterator;
 import org.apache.lucene.util.ByteBlockPool;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.BytesRefHash.DirectBytesStartArray;
 import org.apache.lucene.util.BytesRefHash;
 import org.apache.lucene.util.Counter;
-import org.apache.lucene.util.FixedBitSet;
 import org.apache.lucene.util.packed.PackedInts;
 import org.apache.lucene.util.packed.PackedLongValues;
 
@@ -37,7 +35,7 @@ import org.apache.lucene.util.packed.PackedLongValues;
 class SortedDocValuesWriter extends DocValuesWriter {
   final BytesRefHash hash;
   private PackedLongValues.Builder pending;
-  private FixedBitSet docsWithField;
+  private DocsWithFieldSet docsWithField;
   private final Counter iwBytesUsed;
   private long bytesUsed; // this currently only tracks differences in 'pending'
   private final FieldInfo fieldInfo;
@@ -52,7 +50,7 @@ class SortedDocValuesWriter extends DocValuesWriter {
             BytesRefHash.DEFAULT_CAPACITY,
             new DirectBytesStartArray(BytesRefHash.DEFAULT_CAPACITY, iwBytesUsed));
     pending = PackedLongValues.deltaPackedBuilder(PackedInts.COMPACT);
-    docsWithField = new FixedBitSet(64);
+    docsWithField = new DocsWithFieldSet();
     bytesUsed = pending.ramBytesUsed() + docsWithField.ramBytesUsed();
     iwBytesUsed.addAndGet(bytesUsed);
   }
@@ -69,8 +67,7 @@ class SortedDocValuesWriter extends DocValuesWriter {
     }
 
     addOneValue(value);
-    docsWithField = FixedBitSet.ensureCapacity(docsWithField, docID);
-    docsWithField.set(docID);
+    docsWithField.add(docID);
 
     lastDocID = docID;
   }
@@ -121,7 +118,7 @@ class SortedDocValuesWriter extends DocValuesWriter {
                                   if (fieldInfoIn != fieldInfo) {
                                     throw new IllegalArgumentException("wrong fieldInfo");
                                   }
-                                  return new BufferedSortedDocValues(hash, valueCount, ords, sortedValues, ordMap, docsWithField);
+                                  return new BufferedSortedDocValues(hash, valueCount, ords, sortedValues, ordMap, docsWithField.iterator());
                                 }
                               });
   }
@@ -136,13 +133,13 @@ class SortedDocValuesWriter extends DocValuesWriter {
     final PackedLongValues.Iterator iter;
     final DocIdSetIterator docsWithField;
 
-    public BufferedSortedDocValues(BytesRefHash hash, int valueCount, PackedLongValues docToOrd, int[] sortedValues, int[] ordMap, FixedBitSet docsWithField) {
+    public BufferedSortedDocValues(BytesRefHash hash, int valueCount, PackedLongValues docToOrd, int[] sortedValues, int[] ordMap, DocIdSetIterator docsWithField) {
       this.hash = hash;
       this.valueCount = valueCount;
       this.sortedValues = sortedValues;
       this.iter = docToOrd.iterator();
       this.ordMap = ordMap;
-      this.docsWithField = new BitSetIterator(docsWithField, docToOrd.size());
+      this.docsWithField = docsWithField;
     }
 
     @Override

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/c415bc8d/lucene/core/src/java/org/apache/lucene/index/SortedNumericDocValuesWriter.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/SortedNumericDocValuesWriter.java b/lucene/core/src/java/org/apache/lucene/index/SortedNumericDocValuesWriter.java
index 3f50623..75236cc 100644
--- a/lucene/core/src/java/org/apache/lucene/index/SortedNumericDocValuesWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/index/SortedNumericDocValuesWriter.java
@@ -23,9 +23,7 @@ import java.util.Arrays;
 import org.apache.lucene.codecs.DocValuesConsumer;
 import org.apache.lucene.search.DocIdSetIterator;
 import org.apache.lucene.util.ArrayUtil;
-import org.apache.lucene.util.BitSetIterator;
 import org.apache.lucene.util.Counter;
-import org.apache.lucene.util.FixedBitSet;
 import org.apache.lucene.util.RamUsageEstimator;
 import org.apache.lucene.util.packed.PackedInts;
 import org.apache.lucene.util.packed.PackedLongValues;
@@ -34,7 +32,7 @@ import org.apache.lucene.util.packed.PackedLongValues;
 class SortedNumericDocValuesWriter extends DocValuesWriter {
   private PackedLongValues.Builder pending; // stream of all values
   private PackedLongValues.Builder pendingCounts; // count of values per doc
-  private FixedBitSet docsWithField;
+  private DocsWithFieldSet docsWithField;
   private final Counter iwBytesUsed;
   private long bytesUsed; // this only tracks differences in 'pending' and 'pendingCounts'
   private final FieldInfo fieldInfo;
@@ -47,7 +45,7 @@ class SortedNumericDocValuesWriter extends DocValuesWriter {
     this.iwBytesUsed = iwBytesUsed;
     pending = PackedLongValues.deltaPackedBuilder(PackedInts.COMPACT);
     pendingCounts = PackedLongValues.deltaPackedBuilder(PackedInts.COMPACT);
-    docsWithField = new FixedBitSet(64);
+    docsWithField = new DocsWithFieldSet();
     bytesUsed = pending.ramBytesUsed() + pendingCounts.ramBytesUsed() + docsWithField.ramBytesUsed() + RamUsageEstimator.sizeOf(currentValues);
     iwBytesUsed.addAndGet(bytesUsed);
   }
@@ -76,8 +74,7 @@ class SortedNumericDocValuesWriter extends DocValuesWriter {
     pendingCounts.add(currentUpto);
     currentUpto = 0;
 
-    docsWithField = FixedBitSet.ensureCapacity(docsWithField, currentDoc);
-    docsWithField.set(currentDoc);
+    docsWithField.add(currentDoc);
   }
 
   @Override
@@ -112,7 +109,7 @@ class SortedNumericDocValuesWriter extends DocValuesWriter {
                                          if (fieldInfoIn != fieldInfo) {
                                            throw new IllegalArgumentException("wrong fieldInfo");
                                          }
-                                         return new BufferedSortedNumericDocValues(values, valueCounts, docsWithField);
+                                         return new BufferedSortedNumericDocValues(values, valueCounts, docsWithField.iterator());
                                        }
                                      });
   }
@@ -124,10 +121,10 @@ class SortedNumericDocValuesWriter extends DocValuesWriter {
     private int valueCount;
     private int valueUpto;
 
-    public BufferedSortedNumericDocValues(PackedLongValues values, PackedLongValues valueCounts, FixedBitSet docsWithField) {
+    public BufferedSortedNumericDocValues(PackedLongValues values, PackedLongValues valueCounts, DocIdSetIterator docsWithField) {
       valuesIter = values.iterator();
       valueCountsIter = valueCounts.iterator();
-      this.docsWithField = new BitSetIterator(docsWithField, values.size());
+      this.docsWithField = docsWithField;
     }
 
     @Override

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/c415bc8d/lucene/core/src/java/org/apache/lucene/index/SortedSetDocValuesWriter.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/SortedSetDocValuesWriter.java b/lucene/core/src/java/org/apache/lucene/index/SortedSetDocValuesWriter.java
index 35157d4..0f4fb5e 100644
--- a/lucene/core/src/java/org/apache/lucene/index/SortedSetDocValuesWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/index/SortedSetDocValuesWriter.java
@@ -24,13 +24,11 @@ import java.util.Arrays;
 import org.apache.lucene.codecs.DocValuesConsumer;
 import org.apache.lucene.search.DocIdSetIterator;
 import org.apache.lucene.util.ArrayUtil;
-import org.apache.lucene.util.BitSetIterator;
 import org.apache.lucene.util.ByteBlockPool;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.BytesRefHash.DirectBytesStartArray;
 import org.apache.lucene.util.BytesRefHash;
 import org.apache.lucene.util.Counter;
-import org.apache.lucene.util.FixedBitSet;
 import org.apache.lucene.util.packed.PackedInts;
 import org.apache.lucene.util.packed.PackedLongValues;
 
@@ -40,7 +38,7 @@ class SortedSetDocValuesWriter extends DocValuesWriter {
   final BytesRefHash hash;
   private PackedLongValues.Builder pending; // stream of all termIDs
   private PackedLongValues.Builder pendingCounts; // termIDs per doc
-  private FixedBitSet docsWithField;
+  private DocsWithFieldSet docsWithField;
   private final Counter iwBytesUsed;
   private long bytesUsed; // this only tracks differences in 'pending' and 'pendingCounts'
   private final FieldInfo fieldInfo;
@@ -59,7 +57,7 @@ class SortedSetDocValuesWriter extends DocValuesWriter {
             new DirectBytesStartArray(BytesRefHash.DEFAULT_CAPACITY, iwBytesUsed));
     pending = PackedLongValues.packedBuilder(PackedInts.COMPACT);
     pendingCounts = PackedLongValues.deltaPackedBuilder(PackedInts.COMPACT);
-    docsWithField = new FixedBitSet(64);
+    docsWithField = new DocsWithFieldSet();
     bytesUsed = pending.ramBytesUsed() + pendingCounts.ramBytesUsed();
     iwBytesUsed.addAndGet(bytesUsed);
   }
@@ -103,8 +101,7 @@ class SortedSetDocValuesWriter extends DocValuesWriter {
     pendingCounts.add(count);
     maxCount = Math.max(maxCount, count);
     currentUpto = 0;
-    docsWithField = FixedBitSet.ensureCapacity(docsWithField, currentDoc);
-    docsWithField.set(currentDoc);
+    docsWithField.add(currentDoc);
   }
 
   @Override
@@ -158,7 +155,7 @@ class SortedSetDocValuesWriter extends DocValuesWriter {
                                      if (fieldInfoIn != fieldInfo) {
                                        throw new IllegalArgumentException("wrong fieldInfo");
                                      }
-                                     return new BufferedSortedSetDocValues(sortedValues, ordMap, hash, ords, ordCounts, maxCount, docsWithField);
+                                     return new BufferedSortedSetDocValues(sortedValues, ordMap, hash, ords, ordCounts, maxCount, docsWithField.iterator());
                                    }
                                  });
   }
@@ -176,14 +173,14 @@ class SortedSetDocValuesWriter extends DocValuesWriter {
     private int ordCount;
     private int ordUpto;
 
-    public BufferedSortedSetDocValues(int[] sortedValues, int[] ordMap, BytesRefHash hash, PackedLongValues ords, PackedLongValues ordCounts, int maxCount, FixedBitSet docsWithField) {
+    public BufferedSortedSetDocValues(int[] sortedValues, int[] ordMap, BytesRefHash hash, PackedLongValues ords, PackedLongValues ordCounts, int maxCount, DocIdSetIterator docsWithField) {
       this.currentDoc = new int[maxCount];
       this.sortedValues = sortedValues;
       this.ordMap = ordMap;
       this.hash = hash;
       this.ordsIter = ords.iterator();
       this.ordCountsIter = ordCounts.iterator();
-      this.docsWithField = new BitSetIterator(docsWithField, ordCounts.size());
+      this.docsWithField = docsWithField;
     }
 
     @Override

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/c415bc8d/lucene/core/src/test/org/apache/lucene/index/TestDocsWithFieldSet.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/test/org/apache/lucene/index/TestDocsWithFieldSet.java b/lucene/core/src/test/org/apache/lucene/index/TestDocsWithFieldSet.java
new file mode 100644
index 0000000..b719adf
--- /dev/null
+++ b/lucene/core/src/test/org/apache/lucene/index/TestDocsWithFieldSet.java
@@ -0,0 +1,80 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.index;
+
+import java.io.IOException;
+
+import org.apache.lucene.search.DocIdSetIterator;
+import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util.TestUtil;
+
+public class TestDocsWithFieldSet extends LuceneTestCase {
+
+  public void testDense() throws IOException {
+    DocsWithFieldSet set = new DocsWithFieldSet();
+    DocIdSetIterator it = set.iterator();
+    assertEquals(DocIdSetIterator.NO_MORE_DOCS, it.nextDoc());
+
+    set.add(0);
+    it = set.iterator();
+    assertEquals(0, it.nextDoc());
+    assertEquals(DocIdSetIterator.NO_MORE_DOCS, it.nextDoc());
+
+    long ramBytesUsed = set.ramBytesUsed();
+    for (int i = 1; i < 1000; ++i) {
+      set.add(i);
+    }
+    assertEquals(ramBytesUsed, set.ramBytesUsed());
+    it = set.iterator();
+    for (int i = 0; i < 1000; ++i) {
+      assertEquals(i, it.nextDoc());
+    }
+    assertEquals(DocIdSetIterator.NO_MORE_DOCS, it.nextDoc());
+  }
+
+  public void testSparse() throws IOException {
+    DocsWithFieldSet set = new DocsWithFieldSet();
+    int doc = random().nextInt(10000);
+    set.add(doc);
+    DocIdSetIterator it = set.iterator();
+    assertEquals(doc, it.nextDoc());
+    assertEquals(DocIdSetIterator.NO_MORE_DOCS, it.nextDoc());
+    int doc2 = doc + TestUtil.nextInt(random(), 1, 100);
+    set.add(doc2);
+    it = set.iterator();
+    assertEquals(doc, it.nextDoc());
+    assertEquals(doc2, it.nextDoc());
+    assertEquals(DocIdSetIterator.NO_MORE_DOCS, it.nextDoc());
+  }
+
+  public void testDenseThenSparse() throws IOException {
+    int denseCount = random().nextInt(10000);
+    int nextDoc = denseCount + random().nextInt(10000);
+    DocsWithFieldSet set = new DocsWithFieldSet();
+    for (int i = 0; i < denseCount; ++i) {
+      set.add(i);
+    }
+    set.add(nextDoc);
+    DocIdSetIterator it = set.iterator();
+    for (int i = 0; i < denseCount; ++i) {
+      assertEquals(i, it.nextDoc());
+    }
+    assertEquals(nextDoc, it.nextDoc());
+    assertEquals(DocIdSetIterator.NO_MORE_DOCS, it.nextDoc());
+  }
+
+}

[3/4] lucene-solr:master: LUCENE-7531: Removed packing support from FST.

Posted by jp...@apache.org.

LUCENE-7531: Removed packing support from FST.


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/c4c5e868
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/c4c5e868
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/c4c5e868

Branch: refs/heads/master
Commit: c4c5e868d2304148c146d52833ac2c80b0541dc3
Parents: 6b9f113
Author: Adrien Grand <jp...@gmail.com>
Authored: Thu Nov 10 14:09:52 2016 +0100
Committer: Adrien Grand <jp...@gmail.com>
Committed: Thu Nov 10 15:01:49 2016 +0100

----------------------------------------------------------------------
 lucene/analysis/kuromoji/ivy.xml                |   2 +-
 .../ja/dict/TokenInfoDictionary$fst.dat         | Bin 1716198 -> 1698563 bytes
 .../analysis/ja/util/ConnectionCostsWriter.java |   1 -
 .../ja/util/TokenInfoDictionaryBuilder.java     |   4 +-
 .../blocktreeords/OrdsBlockTreeTermsWriter.java |   4 +-
 .../codecs/memory/MemoryPostingsFormat.java     |  18 +-
 .../codecs/blocktree/BlockTreeTermsWriter.java  |   4 +-
 .../org/apache/lucene/util/fst/Builder.java     |  28 +-
 .../java/org/apache/lucene/util/fst/FST.java    | 633 ++-----------------
 .../apache/lucene/util/fst/package-info.java    |   1 -
 .../org/apache/lucene/util/fst/Test2BFST.java   |  17 +-
 .../org/apache/lucene/util/fst/TestFSTs.java    |  27 +-
 .../idversion/VersionBlockTreeTermsWriter.java  |   4 +-
 .../suggest/fst/FSTCompletionBuilder.java       |   4 +-
 .../org/apache/lucene/util/fst/FSTTester.java   |  14 +-
 15 files changed, 81 insertions(+), 680 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/c4c5e868/lucene/analysis/kuromoji/ivy.xml
----------------------------------------------------------------------
diff --git a/lucene/analysis/kuromoji/ivy.xml b/lucene/analysis/kuromoji/ivy.xml
index 10eba4e..eb08509 100644
--- a/lucene/analysis/kuromoji/ivy.xml
+++ b/lucene/analysis/kuromoji/ivy.xml
@@ -27,7 +27,7 @@
 
   <dependencies>
     <dependency org="mecab" name="mecab-ipadic" rev="${/mecab/mecab-ipadic}" conf="ipadic"> 
-      <artifact name="ipadic" type=".tar.gz" url="http://mecab.googlecode.com/files/mecab-ipadic-2.7.0-20070801.tar.gz"/>
+      <artifact name="ipadic" type=".tar.gz" url="http://jaist.dl.sourceforge.net/project/mecab/mecab-ipadic/2.7.0-20070801/mecab-ipadic-2.7.0-20070801.tar.gz"/>
     </dependency>
     <dependency org="mecab" name="mecab-naist-jdic" rev="${/mecab/mecab-naist-jdic}" conf="naist">
       <artifact name="mecab-naist-jdic" type=".tar.gz" url="http://sourceforge.jp/frs/redir.php?m=iij&amp;f=/naist-jdic/53500/mecab-naist-jdic-0.6.3b-20111013.tar.gz"/>

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/c4c5e868/lucene/analysis/kuromoji/src/resources/org/apache/lucene/analysis/ja/dict/TokenInfoDictionary$fst.dat
----------------------------------------------------------------------
diff --git a/lucene/analysis/kuromoji/src/resources/org/apache/lucene/analysis/ja/dict/TokenInfoDictionary$fst.dat b/lucene/analysis/kuromoji/src/resources/org/apache/lucene/analysis/ja/dict/TokenInfoDictionary$fst.dat
index 8935809..6cfad72 100644
Binary files a/lucene/analysis/kuromoji/src/resources/org/apache/lucene/analysis/ja/dict/TokenInfoDictionary$fst.dat and b/lucene/analysis/kuromoji/src/resources/org/apache/lucene/analysis/ja/dict/TokenInfoDictionary$fst.dat differ

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/c4c5e868/lucene/analysis/kuromoji/src/tools/java/org/apache/lucene/analysis/ja/util/ConnectionCostsWriter.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/kuromoji/src/tools/java/org/apache/lucene/analysis/ja/util/ConnectionCostsWriter.java b/lucene/analysis/kuromoji/src/tools/java/org/apache/lucene/analysis/ja/util/ConnectionCostsWriter.java
index 54382ed..6ad8a68 100644
--- a/lucene/analysis/kuromoji/src/tools/java/org/apache/lucene/analysis/ja/util/ConnectionCostsWriter.java
+++ b/lucene/analysis/kuromoji/src/tools/java/org/apache/lucene/analysis/ja/util/ConnectionCostsWriter.java
@@ -28,7 +28,6 @@ import org.apache.lucene.analysis.ja.dict.ConnectionCosts;
 import org.apache.lucene.codecs.CodecUtil;
 import org.apache.lucene.store.DataOutput;
 import org.apache.lucene.store.OutputStreamDataOutput;
-import org.apache.lucene.util.BitUtil;
 
 public final class ConnectionCostsWriter {
   

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/c4c5e868/lucene/analysis/kuromoji/src/tools/java/org/apache/lucene/analysis/ja/util/TokenInfoDictionaryBuilder.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/kuromoji/src/tools/java/org/apache/lucene/analysis/ja/util/TokenInfoDictionaryBuilder.java b/lucene/analysis/kuromoji/src/tools/java/org/apache/lucene/analysis/ja/util/TokenInfoDictionaryBuilder.java
index 61d6f27..1b8abbb 100644
--- a/lucene/analysis/kuromoji/src/tools/java/org/apache/lucene/analysis/ja/util/TokenInfoDictionaryBuilder.java
+++ b/lucene/analysis/kuromoji/src/tools/java/org/apache/lucene/analysis/ja/util/TokenInfoDictionaryBuilder.java
@@ -33,12 +33,10 @@ import java.util.Comparator;
 import java.util.List;
 
 import org.apache.lucene.analysis.ja.util.DictionaryBuilder.DictionaryFormat;
-import org.apache.lucene.util.IntsRef;
 import org.apache.lucene.util.IntsRefBuilder;
 import org.apache.lucene.util.fst.Builder;
 import org.apache.lucene.util.fst.FST;
 import org.apache.lucene.util.fst.PositiveIntOutputs;
-import org.apache.lucene.util.packed.PackedInts;
 
 import com.ibm.icu.text.Normalizer2;
 
@@ -133,7 +131,7 @@ public class TokenInfoDictionaryBuilder {
     System.out.println("  encode...");
 
     PositiveIntOutputs fstOutput = PositiveIntOutputs.getSingleton();
-    Builder<Long> fstBuilder = new Builder<>(FST.INPUT_TYPE.BYTE2, 0, 0, true, true, Integer.MAX_VALUE, fstOutput, true, PackedInts.DEFAULT, true, 15);
+    Builder<Long> fstBuilder = new Builder<>(FST.INPUT_TYPE.BYTE2, 0, 0, true, true, Integer.MAX_VALUE, fstOutput, true, 15);
     IntsRefBuilder scratch = new IntsRefBuilder();
     long ord = -1; // first ord will be 0
     String lastValue = null;

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/c4c5e868/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsBlockTreeTermsWriter.java
----------------------------------------------------------------------
diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsBlockTreeTermsWriter.java b/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsBlockTreeTermsWriter.java
index fb682fd..b16bb15 100644
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsBlockTreeTermsWriter.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/blocktreeords/OrdsBlockTreeTermsWriter.java
@@ -48,7 +48,6 @@ import org.apache.lucene.util.fst.Builder;
 import org.apache.lucene.util.fst.BytesRefFSTEnum;
 import org.apache.lucene.util.fst.FST;
 import org.apache.lucene.util.fst.Util;
-import org.apache.lucene.util.packed.PackedInts;
 
 /*
   TODO:
@@ -363,8 +362,7 @@ public final class OrdsBlockTreeTermsWriter extends FieldsConsumer {
 
       final Builder<Output> indexBuilder = new Builder<>(FST.INPUT_TYPE.BYTE1,
                                                          0, 0, true, false, Integer.MAX_VALUE,
-                                                         FST_OUTPUTS, false,
-                                                         PackedInts.COMPACT, true, 15);
+                                                         FST_OUTPUTS, true, 15);
       //if (DEBUG) {
       //  System.out.println("  compile index for prefix=" + prefix);
       //}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/c4c5e868/lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java
----------------------------------------------------------------------
diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java
index 1427dec..2f71765 100644
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java
@@ -81,9 +81,6 @@ import org.apache.lucene.util.packed.PackedInts;
 // loads itself in ram?
 public final class MemoryPostingsFormat extends PostingsFormat {
 
-  private final boolean doPackFST;
-  private final float acceptableOverheadRatio;
-
   public MemoryPostingsFormat() {
     this(false, PackedInts.DEFAULT);
   }
@@ -97,13 +94,11 @@ public final class MemoryPostingsFormat extends PostingsFormat {
    */
   public MemoryPostingsFormat(boolean doPackFST, float acceptableOverheadRatio) {
     super("Memory");
-    this.doPackFST = doPackFST;
-    this.acceptableOverheadRatio = acceptableOverheadRatio;
   }
   
   @Override
   public String toString() {
-    return "PostingsFormat(name=" + getName() + " doPackFST= " + doPackFST + ")";
+    return "PostingsFormat(name=" + getName() + ")";
   }
 
   private final static class TermsWriter {
@@ -111,16 +106,12 @@ public final class MemoryPostingsFormat extends PostingsFormat {
     private final FieldInfo field;
     private final Builder<BytesRef> builder;
     private final ByteSequenceOutputs outputs = ByteSequenceOutputs.getSingleton();
-    private final boolean doPackFST;
-    private final float acceptableOverheadRatio;
     private int termCount;
 
-    public TermsWriter(IndexOutput out, FieldInfo field, boolean doPackFST, float acceptableOverheadRatio) {
+    public TermsWriter(IndexOutput out, FieldInfo field) {
       this.out = out;
       this.field = field;
-      this.doPackFST = doPackFST;
-      this.acceptableOverheadRatio = acceptableOverheadRatio;
-      builder = new Builder<>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, Integer.MAX_VALUE, outputs, doPackFST, acceptableOverheadRatio, true, 15);
+      builder = new Builder<>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, Integer.MAX_VALUE, outputs, true, 15);
     }
 
     private class PostingsWriter {
@@ -307,8 +298,7 @@ public final class MemoryPostingsFormat extends PostingsFormat {
         TermsEnum termsEnum = terms.iterator();
 
         FieldInfo fieldInfo = state.fieldInfos.fieldInfo(field);
-        TermsWriter termsWriter = new TermsWriter(out, fieldInfo,
-                                                  doPackFST, acceptableOverheadRatio);
+        TermsWriter termsWriter = new TermsWriter(out, fieldInfo);
 
         FixedBitSet docsSeen = new FixedBitSet(state.segmentInfo.maxDoc());
         long sumTotalTermFreq = 0;

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/c4c5e868/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BlockTreeTermsWriter.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BlockTreeTermsWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BlockTreeTermsWriter.java
index a4a150b..bdacc22 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BlockTreeTermsWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BlockTreeTermsWriter.java
@@ -48,7 +48,6 @@ import org.apache.lucene.util.fst.ByteSequenceOutputs;
 import org.apache.lucene.util.fst.BytesRefFSTEnum;
 import org.apache.lucene.util.fst.FST;
 import org.apache.lucene.util.fst.Util;
-import org.apache.lucene.util.packed.PackedInts;
 
 /*
   TODO:
@@ -456,8 +455,7 @@ public final class BlockTreeTermsWriter extends FieldsConsumer {
       final ByteSequenceOutputs outputs = ByteSequenceOutputs.getSingleton();
       final Builder<BytesRef> indexBuilder = new Builder<>(FST.INPUT_TYPE.BYTE1,
                                                            0, 0, true, false, Integer.MAX_VALUE,
-                                                           outputs, false,
-                                                           PackedInts.COMPACT, true, 15);
+                                                           outputs, true, 15);
       //if (DEBUG) {
       //  System.out.println("  compile index for prefix=" + prefix);
       //}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/c4c5e868/lucene/core/src/java/org/apache/lucene/util/fst/Builder.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/util/fst/Builder.java b/lucene/core/src/java/org/apache/lucene/util/fst/Builder.java
index c5ab849..428edd3 100644
--- a/lucene/core/src/java/org/apache/lucene/util/fst/Builder.java
+++ b/lucene/core/src/java/org/apache/lucene/util/fst/Builder.java
@@ -23,7 +23,6 @@ import org.apache.lucene.util.ArrayUtil;
 import org.apache.lucene.util.IntsRef;
 import org.apache.lucene.util.IntsRefBuilder;
 import org.apache.lucene.util.fst.FST.INPUT_TYPE; // javadoc
-import org.apache.lucene.util.packed.PackedInts;
 
 // TODO: could we somehow stream an FST to disk while we
 // build it?
@@ -69,10 +68,6 @@ public class Builder<T> {
   private final int shareMaxTailLength;
 
   private final IntsRefBuilder lastInput = new IntsRefBuilder();
-  
-  // for packing
-  private final boolean doPackFST;
-  private final float acceptableOverheadRatio;
 
   // NOTE: cutting this over to ArrayList instead loses ~6%
   // in build performance on 9.8M Wikipedia terms; so we
@@ -99,11 +94,10 @@ public class Builder<T> {
   /**
    * Instantiates an FST/FSA builder without any pruning. A shortcut
    * to {@link #Builder(FST.INPUT_TYPE, int, int, boolean,
-   * boolean, int, Outputs, boolean, float,
-   * boolean, int)} with pruning options turned off.
+   * boolean, int, Outputs, boolean, int)} with pruning options turned off.
    */
   public Builder(FST.INPUT_TYPE inputType, Outputs<T> outputs) {
-    this(inputType, 0, 0, true, true, Integer.MAX_VALUE, outputs, false, PackedInts.COMPACT, true, 15);
+    this(inputType, 0, 0, true, true, Integer.MAX_VALUE, outputs, true, 15);
   }
 
   /**
@@ -143,11 +137,6 @@ public class Builder<T> {
    *    FSA, use {@link NoOutputs#getSingleton()} and {@link NoOutputs#getNoOutput()} as the
    *    singleton output object.
    *
-   * @param doPackFST Pass true to create a packed FST.
-   * 
-   * @param acceptableOverheadRatio How to trade speed for space when building the FST. This option
-   *    is only relevant when doPackFST is true. @see PackedInts#getMutable(int, int, float)
-   *
    * @param allowArrayArcs Pass false to disable the array arc optimization
    *    while building the FST; this will make the resulting
    *    FST smaller but slower to traverse.
@@ -159,16 +148,13 @@ public class Builder<T> {
    */
   public Builder(FST.INPUT_TYPE inputType, int minSuffixCount1, int minSuffixCount2, boolean doShareSuffix,
                  boolean doShareNonSingletonNodes, int shareMaxTailLength, Outputs<T> outputs,
-                 boolean doPackFST, float acceptableOverheadRatio, boolean allowArrayArcs,
-                 int bytesPageBits) {
+                 boolean allowArrayArcs, int bytesPageBits) {
     this.minSuffixCount1 = minSuffixCount1;
     this.minSuffixCount2 = minSuffixCount2;
     this.doShareNonSingletonNodes = doShareNonSingletonNodes;
     this.shareMaxTailLength = shareMaxTailLength;
-    this.doPackFST = doPackFST;
-    this.acceptableOverheadRatio = acceptableOverheadRatio;
     this.allowArrayArcs = allowArrayArcs;
-    fst = new FST<>(inputType, outputs, doPackFST, acceptableOverheadRatio, bytesPageBits);
+    fst = new FST<>(inputType, outputs, bytesPageBits);
     bytes = fst.bytes;
     assert bytes != null;
     if (doShareSuffix) {
@@ -496,11 +482,7 @@ public class Builder<T> {
     //if (DEBUG) System.out.println("  builder.finish root.isFinal=" + root.isFinal + " root.output=" + root.output);
     fst.finish(compileNode(root, lastInput.length()).node);
 
-    if (doPackFST) {
-      return fst.pack(this, 3, Math.max(10, (int) (getNodeCount()/4)), acceptableOverheadRatio);
-    } else {
-      return fst;
-    }
+    return fst;
   }
 
   private void compileAllTargets(UnCompiledNode<T> node, int tailLength) throws IOException {

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/c4c5e868/lucene/core/src/java/org/apache/lucene/util/fst/FST.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/util/fst/FST.java b/lucene/core/src/java/org/apache/lucene/util/fst/FST.java
index 4a0a3a9..5ea6dab 100644
--- a/lucene/core/src/java/org/apache/lucene/util/fst/FST.java
+++ b/lucene/core/src/java/org/apache/lucene/util/fst/FST.java
@@ -24,13 +24,9 @@ import java.io.InputStream;
 import java.io.OutputStream;
 import java.nio.file.Files;
 import java.nio.file.Path;
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
 
 import org.apache.lucene.codecs.CodecUtil;
+import org.apache.lucene.index.CorruptIndexException;
 import org.apache.lucene.store.ByteArrayDataOutput;
 import org.apache.lucene.store.DataInput;
 import org.apache.lucene.store.DataOutput;
@@ -38,13 +34,9 @@ import org.apache.lucene.store.InputStreamDataInput;
 import org.apache.lucene.store.OutputStreamDataOutput;
 import org.apache.lucene.store.RAMOutputStream;
 import org.apache.lucene.util.Accountable;
-import org.apache.lucene.util.Accountables;
 import org.apache.lucene.util.ArrayUtil;
 import org.apache.lucene.util.Constants;
-import org.apache.lucene.util.PriorityQueue;
 import org.apache.lucene.util.RamUsageEstimator;
-import org.apache.lucene.util.packed.GrowableWriter;
-import org.apache.lucene.util.packed.PackedInts;
 
 // TODO: break this into WritableFST and ReadOnlyFST.. then
 // we can have subclasses of ReadOnlyFST to handle the
@@ -90,14 +82,6 @@ public final class FST<T> implements Accountable {
 
   static final int BIT_ARC_HAS_FINAL_OUTPUT = 1 << 5;
 
-  // Arcs are stored as fixed-size (per entry) array, so
-  // that we can find an arc using binary search.  We do
-  // this when number of arcs is > NUM_ARCS_ARRAY:
-
-  // If set, the target node is delta coded vs current
-  // position:
-  private static final int BIT_TARGET_DELTA = 1 << 6;
-
   // We use this as a marker (because this one flag is
   // illegal by itself ...):
   private static final byte ARCS_AS_FIXED_ARRAY = BIT_ARC_HAS_FINAL_OUTPUT;
@@ -137,7 +121,9 @@ public final class FST<T> implements Accountable {
   /** Don't store arcWithOutputCount anymore */
   private static final int VERSION_NO_NODE_ARC_COUNTS = 5;
 
-  private static final int VERSION_CURRENT = VERSION_NO_NODE_ARC_COUNTS;
+  private static final int VERSION_PACKED_REMOVED = 6;
+
+  private static final int VERSION_CURRENT = VERSION_PACKED_REMOVED;
 
   // Never serialized; just used to represent the virtual
   // final node w/ no arcs:
@@ -168,9 +154,6 @@ public final class FST<T> implements Accountable {
 
   public final Outputs<T> outputs;
 
-  private final boolean packed;
-  private PackedInts.Reader nodeRefToAddress;
-
   private Arc<T> cachedRootArcs[];
 
   /** Represents a single arc. */
@@ -273,18 +256,11 @@ public final class FST<T> implements Accountable {
     return (flags & bit) != 0;
   }
 
-  private GrowableWriter nodeAddress;
-
-  // TODO: we could be smarter here, and prune periodically
-  // as we go; high in-count nodes will "usually" become
-  // clear early on:
-  private GrowableWriter inCounts;
-
   private final int version;
 
   // make a new empty FST, for building; Builder invokes
   // this ctor
-  FST(INPUT_TYPE inputType, Outputs<T> outputs, boolean willPackFST, float acceptableOverheadRatio, int bytesPageBits) {
+  FST(INPUT_TYPE inputType, Outputs<T> outputs, int bytesPageBits) {
     this.inputType = inputType;
     this.outputs = outputs;
     version = VERSION_CURRENT;
@@ -293,17 +269,8 @@ public final class FST<T> implements Accountable {
     // pad: ensure no node gets address 0 which is reserved to mean
     // the stop state w/ no arcs
     bytes.writeByte((byte) 0);
-    if (willPackFST) {
-      nodeAddress = new GrowableWriter(15, 8, acceptableOverheadRatio);
-      inCounts = new GrowableWriter(1, 8, acceptableOverheadRatio);
-    } else {
-      nodeAddress = null;
-      inCounts = null;
-    }
 
     emptyOutput = null;
-    packed = false;
-    nodeRefToAddress = null;
   }
 
   public static final int DEFAULT_MAX_BLOCK_BITS = Constants.JRE_IS_64BIT ? 30 : 28;
@@ -324,8 +291,12 @@ public final class FST<T> implements Accountable {
 
     // NOTE: only reads most recent format; we don't have
     // back-compat promise for FSTs (they are experimental):
-    version = CodecUtil.checkHeader(in, FILE_FORMAT_NAME, VERSION_PACKED, VERSION_NO_NODE_ARC_COUNTS);
-    packed = in.readByte() == 1;
+    version = CodecUtil.checkHeader(in, FILE_FORMAT_NAME, VERSION_PACKED, VERSION_CURRENT);
+    if (version < VERSION_PACKED_REMOVED) {
+      if (in.readByte() == 1) {
+        throw new CorruptIndexException("Cannot read packed FSTs anymore", in);
+      }
+    }
     if (in.readByte() == 1) {
       // accepts empty string
       // 1 KB blocks:
@@ -334,17 +305,12 @@ public final class FST<T> implements Accountable {
       emptyBytes.copyBytes(in, numBytes);
 
       // De-serialize empty-string output:
-      BytesReader reader;
-      if (packed) {
-        reader = emptyBytes.getForwardReader();
-      } else {
-        reader = emptyBytes.getReverseReader();
-        // NoOutputs uses 0 bytes when writing its output,
-        // so we have to check here else BytesStore gets
-        // angry:
-        if (numBytes > 0) {
-          reader.setPosition(numBytes-1);
-        }
+      BytesReader reader = emptyBytes.getReverseReader();
+      // NoOutputs uses 0 bytes when writing its output,
+      // so we have to check here else BytesStore gets
+      // angry:
+      if (numBytes > 0) {
+        reader.setPosition(numBytes-1);
       }
       emptyOutput = outputs.readFinalOutput(reader);
     } else {
@@ -364,11 +330,6 @@ public final class FST<T> implements Accountable {
     default:
       throw new IllegalStateException("invalid input type " + t);
     }
-    if (packed) {
-      nodeRefToAddress = PackedInts.getReader(in);
-    } else {
-      nodeRefToAddress = null;
-    }
     startNode = in.readVLong();
     if (version < VERSION_NO_NODE_ARC_COUNTS) {
       in.readVLong();
@@ -424,31 +385,13 @@ public final class FST<T> implements Accountable {
     } else {
       size += bytes.ramBytesUsed();
     }
-    if (packed) {
-      size += nodeRefToAddress.ramBytesUsed();
-    } else if (nodeAddress != null) {
-      size += nodeAddress.ramBytesUsed();
-      size += inCounts.ramBytesUsed();
-    }
     size += cachedArcsBytesUsed;
     return size;
   }
 
   @Override
-  public Collection<Accountable> getChildResources() {
-    List<Accountable> resources = new ArrayList<>();
-    if (packed) {
-      resources.add(Accountables.namedAccountable("node ref to address", nodeRefToAddress));
-    } else if (nodeAddress != null) {
-      resources.add(Accountables.namedAccountable("node addresses", nodeAddress));
-      resources.add(Accountables.namedAccountable("in counts", inCounts));
-    }
-    return resources;
-  }
-
-  @Override
   public String toString() {
-    return getClass().getSimpleName() + "(input=" + inputType + ",output=" + outputs + ",packed=" + packed;
+    return getClass().getSimpleName() + "(input=" + inputType + ",output=" + outputs;
   }
 
   void finish(long newStartNode) throws IOException {
@@ -463,16 +406,6 @@ public final class FST<T> implements Accountable {
     bytes.finish();
     cacheRootArcs();
   }
-
-  private long getNodeAddress(long node) {
-    if (nodeAddress != null) {
-      // Deref
-      return nodeAddress.get((int) node);
-    } else {
-      // Straight
-      return node;
-    }
-  }
   
   // Optionally caches first 128 labels
   @SuppressWarnings({"rawtypes","unchecked"})
@@ -527,18 +460,7 @@ public final class FST<T> implements Accountable {
     if (startNode == -1) {
       throw new IllegalStateException("call finish first");
     }
-    if (nodeAddress != null) {
-      throw new IllegalStateException("cannot save an FST pre-packed FST; it must first be packed");
-    }
-    if (packed && !(nodeRefToAddress instanceof PackedInts.Mutable)) {
-      throw new IllegalStateException("cannot save a FST which has been loaded from disk ");
-    }
     CodecUtil.writeHeader(out, FILE_FORMAT_NAME, VERSION_CURRENT);
-    if (packed) {
-      out.writeByte((byte) 1);
-    } else {
-      out.writeByte((byte) 0);
-    }
     // TODO: really we should encode this as an arc, arriving
     // to the root node, instead of special casing here:
     if (emptyOutput != null) {
@@ -552,16 +474,14 @@ public final class FST<T> implements Accountable {
       byte[] emptyOutputBytes = new byte[(int) ros.getFilePointer()];
       ros.writeTo(emptyOutputBytes, 0);
 
-      if (!packed) {
-        // reverse
-        final int stopAt = emptyOutputBytes.length/2;
-        int upto = 0;
-        while(upto < stopAt) {
-          final byte b = emptyOutputBytes[upto];
-          emptyOutputBytes[upto] = emptyOutputBytes[emptyOutputBytes.length-upto-1];
-          emptyOutputBytes[emptyOutputBytes.length-upto-1] = b;
-          upto++;
-        }
+      // reverse
+      final int stopAt = emptyOutputBytes.length/2;
+      int upto = 0;
+      while(upto < stopAt) {
+        final byte b = emptyOutputBytes[upto];
+        emptyOutputBytes[upto] = emptyOutputBytes[emptyOutputBytes.length-upto-1];
+        emptyOutputBytes[emptyOutputBytes.length-upto-1] = b;
+        upto++;
       }
       out.writeVInt(emptyOutputBytes.length);
       out.writeBytes(emptyOutputBytes, 0, emptyOutputBytes.length);
@@ -577,9 +497,6 @@ public final class FST<T> implements Accountable {
       t = 2;
     }
     out.writeByte(t);
-    if (packed) {
-      ((PackedInts.Mutable) nodeRefToAddress).save(out);
-    }
     out.writeVLong(startNode);
     if (bytes != null) {
       long numBytes = bytes.getPosition();
@@ -705,8 +622,6 @@ public final class FST<T> implements Accountable {
 
       if (!targetHasArcs) {
         flags += BIT_STOP_NODE;
-      } else if (inCounts != null) {
-        inCounts.set((int) target.node, inCounts.get((int) target.node) + 1);
       }
 
       if (arc.output != NO_OUTPUT) {
@@ -810,30 +725,8 @@ public final class FST<T> implements Accountable {
 
     builder.bytes.reverse(startAddress, thisNodeAddress);
 
-    // PackedInts uses int as the index, so we cannot handle
-    // > 2.1B nodes when packing:
-    if (nodeAddress != null && builder.nodeCount == Integer.MAX_VALUE) {
-      throw new IllegalStateException("cannot create a packed FST with more than 2.1 billion nodes");
-    }
-
     builder.nodeCount++;
-    final long node;
-    if (nodeAddress != null) {
-
-      // Nodes are addressed by 1+ord:
-      if ((int) builder.nodeCount == nodeAddress.size()) {
-        nodeAddress = nodeAddress.resize(ArrayUtil.oversize(nodeAddress.size() + 1, nodeAddress.getBitsPerValue()));
-        inCounts = inCounts.resize(ArrayUtil.oversize(inCounts.size() + 1, inCounts.getBitsPerValue()));
-      }
-      nodeAddress.set((int) builder.nodeCount, thisNodeAddress);
-      // System.out.println("  write nodeAddress[" + nodeCount + "] = " + endAddress);
-      node = builder.nodeCount;
-    } else {
-      node = thisNodeAddress;
-    }
-
-    //System.out.println("  ret node=" + node + " address=" + thisNodeAddress + " nodeAddress=" + nodeAddress);
-    return node;
+    return thisNodeAddress;
   }
 
   /** Fills virtual 'start' arc, ie, an empty incoming arc to
@@ -876,13 +769,13 @@ public final class FST<T> implements Accountable {
       arc.flags = BIT_LAST_ARC;
       return arc;
     } else {
-      in.setPosition(getNodeAddress(follow.target));
+      in.setPosition(follow.target);
       arc.node = follow.target;
       final byte b = in.readByte();
       if (b == ARCS_AS_FIXED_ARRAY) {
         // array: jump straight to end
         arc.numArcs = in.readVInt();
-        if (packed || version >= VERSION_VINT_TARGET) {
+        if (version >= VERSION_VINT_TARGET) {
           arc.bytesPerArc = in.readVInt();
         } else {
           arc.bytesPerArc = in.readInt();
@@ -906,8 +799,6 @@ public final class FST<T> implements Accountable {
           }
           if (arc.flag(BIT_STOP_NODE)) {
           } else if (arc.flag(BIT_TARGET_NEXT)) {
-          } else if (packed) {
-            in.readVLong();
           } else {
             readUnpackedNodeTarget(in);
           }
@@ -964,7 +855,7 @@ public final class FST<T> implements Accountable {
   }
 
   public Arc<T> readFirstRealTargetArc(long node, Arc<T> arc, final BytesReader in) throws IOException {
-    final long address = getNodeAddress(node);
+    final long address = node;
     in.setPosition(address);
     //System.out.println("  readFirstRealTargtArc address="
     //+ address);
@@ -975,7 +866,7 @@ public final class FST<T> implements Accountable {
       //System.out.println("  fixedArray");
       // this is first arc in a fixed-array
       arc.numArcs = in.readVInt();
-      if (packed || version >= VERSION_VINT_TARGET) {
+      if (version >= VERSION_VINT_TARGET) {
         arc.bytesPerArc = in.readVInt();
       } else {
         arc.bytesPerArc = in.readInt();
@@ -1002,7 +893,7 @@ public final class FST<T> implements Accountable {
     if (!targetHasArcs(follow)) {
       return false;
     } else {
-      in.setPosition(getNodeAddress(follow.target));
+      in.setPosition(follow.target);
       return in.readByte() == ARCS_AS_FIXED_ARRAY;
     }
   }
@@ -1029,7 +920,7 @@ public final class FST<T> implements Accountable {
       //System.out.println("    nextArc fake " +
       //arc.nextArc);
       
-      long pos = getNodeAddress(arc.nextArc);
+      long pos = arc.nextArc;
       in.setPosition(pos);
 
       final byte b = in.readByte();
@@ -1038,7 +929,7 @@ public final class FST<T> implements Accountable {
         in.readVInt();
 
         // Skip bytesPerArc:
-        if (packed || version >= VERSION_VINT_TARGET) {
+        if (version >= VERSION_VINT_TARGET) {
           in.readVInt();
         } else {
           in.readInt();
@@ -1107,41 +998,18 @@ public final class FST<T> implements Accountable {
       arc.nextArc = in.getPosition();
       // TODO: would be nice to make this lazy -- maybe
       // caller doesn't need the target and is scanning arcs...
-      if (nodeAddress == null) {
-        if (!arc.flag(BIT_LAST_ARC)) {
-          if (arc.bytesPerArc == 0) {
-            // must scan
-            seekToNextNode(in);
-          } else {
-            in.setPosition(arc.posArcsStart);
-            in.skipBytes(arc.bytesPerArc * arc.numArcs);
-          }
-        }
-        arc.target = in.getPosition();
-      } else {
-        arc.target = arc.node - 1;
-        assert arc.target > 0;
-      }
-    } else {
-      if (packed) {
-        final long pos = in.getPosition();
-        final long code = in.readVLong();
-        if (arc.flag(BIT_TARGET_DELTA)) {
-          // Address is delta-coded from current address:
-          arc.target = pos + code;
-          //System.out.println("    delta pos=" + pos + " delta=" + code + " target=" + arc.target);
-        } else if (code < nodeRefToAddress.size()) {
-          // Deref
-          arc.target = nodeRefToAddress.get((int) code);
-          //System.out.println("    deref code=" + code + " target=" + arc.target);
+      if (!arc.flag(BIT_LAST_ARC)) {
+        if (arc.bytesPerArc == 0) {
+          // must scan
+          seekToNextNode(in);
         } else {
-          // Absolute
-          arc.target = code;
-          //System.out.println("    abs code=" + code);
+          in.setPosition(arc.posArcsStart);
+          in.skipBytes(arc.bytesPerArc * arc.numArcs);
         }
-      } else {
-        arc.target = readUnpackedNodeTarget(in);
       }
+      arc.target = in.getPosition();
+    } else {
+      arc.target = readUnpackedNodeTarget(in);
       arc.nextArc = in.getPosition();
     }
     return arc;
@@ -1228,7 +1096,7 @@ public final class FST<T> implements Accountable {
       return null;
     }
 
-    in.setPosition(getNodeAddress(follow.target));
+    in.setPosition(follow.target);
 
     arc.node = follow.target;
 
@@ -1237,7 +1105,7 @@ public final class FST<T> implements Accountable {
     if (in.readByte() == ARCS_AS_FIXED_ARRAY) {
       // Arcs are full array; do binary search:
       arc.numArcs = in.readVInt();
-      if (packed || version >= VERSION_VINT_TARGET) {
+      if (version >= VERSION_VINT_TARGET) {
         arc.bytesPerArc = in.readVInt();
       } else {
         arc.bytesPerArc = in.readInt();
@@ -1303,11 +1171,7 @@ public final class FST<T> implements Accountable {
       }
 
       if (!flag(flags, BIT_STOP_NODE) && !flag(flags, BIT_TARGET_NEXT)) {
-        if (packed) {
-          in.readVLong();
-        } else {
-          readUnpackedNodeTarget(in);
-        }
+        readUnpackedNodeTarget(in);
       }
 
       if (flag(flags, BIT_LAST_ARC)) {
@@ -1340,18 +1204,10 @@ public final class FST<T> implements Accountable {
   /** Returns a {@link BytesReader} for this FST, positioned at
    *  position 0. */
   public BytesReader getBytesReader() {
-    if (packed) {
-      if (bytesArray != null) {
-        return new ForwardBytesReader(bytesArray);
-      } else {
-        return bytes.getForwardReader();
-      }
+    if (bytesArray != null) {
+      return new ReverseBytesReader(bytesArray);
     } else {
-      if (bytesArray != null) {
-        return new ReverseBytesReader(bytesArray);
-      } else {
-        return bytes.getReverseReader();
-      }
+      return bytes.getReverseReader();
     }
   }
 
@@ -1476,395 +1332,4 @@ public final class FST<T> implements Accountable {
   }
  */
 
-  // Creates a packed FST
-  private FST(INPUT_TYPE inputType, Outputs<T> outputs, int bytesPageBits) {
-    version = VERSION_CURRENT;
-    packed = true;
-    this.inputType = inputType;
-    bytesArray = null;
-    bytes = new BytesStore(bytesPageBits);
-    this.outputs = outputs;
-  }
-
-  /** Expert: creates an FST by packing this one.  This
-   *  process requires substantial additional RAM (currently
-   *  up to ~8 bytes per node depending on
-   *  <code>acceptableOverheadRatio</code>), but then should
-   *  produce a smaller FST.
-   *
-   *  <p>The implementation of this method uses ideas from
-   *  <a target="_blank" href="http://www.cs.put.poznan.pl/dweiss/site/publications/download/fsacomp.pdf">Smaller Representation of Finite State Automata</a>,
-   *  which describes techniques to reduce the size of a FST.
-   *  However, this is not a strict implementation of the
-   *  algorithms described in this paper.
-   */
-  FST<T> pack(Builder<T> builder, int minInCountDeref, int maxDerefNodes, float acceptableOverheadRatio) throws IOException {
-
-    // NOTE: maxDerefNodes is intentionally int: we cannot
-    // support > 2.1B deref nodes
-
-    // TODO: other things to try
-    //   - renumber the nodes to get more next / better locality?
-    //   - allow multiple input labels on an arc, so
-    //     singular chain of inputs can take one arc (on
-    //     wikipedia terms this could save another ~6%)
-    //   - in the ord case, the output '1' is presumably
-    //     very common (after NO_OUTPUT)... maybe use a bit
-    //     for it..?
-    //   - use spare bits in flags.... for top few labels /
-    //     outputs / targets
-
-    if (nodeAddress == null) {
-      throw new IllegalArgumentException("this FST was not built with willPackFST=true");
-    }
-
-    T NO_OUTPUT = outputs.getNoOutput();
-
-    Arc<T> arc = new Arc<>();
-
-    final BytesReader r = getBytesReader();
-
-    final int topN = Math.min(maxDerefNodes, inCounts.size());
-
-    // Find top nodes with highest number of incoming arcs:
-    NodeQueue q = new NodeQueue(topN);
-
-    // TODO: we could use more RAM efficient selection algo here...
-    NodeAndInCount bottom = null;
-    for(int node=0; node<inCounts.size(); node++) {
-      if (inCounts.get(node) >= minInCountDeref) {
-        if (bottom == null) {
-          q.add(new NodeAndInCount(node, (int) inCounts.get(node)));
-          if (q.size() == topN) {
-            bottom = q.top();
-          }
-        } else if (inCounts.get(node) > bottom.count) {
-          q.insertWithOverflow(new NodeAndInCount(node, (int) inCounts.get(node)));
-        }
-      }
-    }
-
-    // Free up RAM:
-    inCounts = null;
-
-    final Map<Integer,Integer> topNodeMap = new HashMap<>();
-    for(int downTo=q.size()-1;downTo>=0;downTo--) {
-      NodeAndInCount n = q.pop();
-      topNodeMap.put(n.node, downTo);
-      //System.out.println("map node=" + n.node + " inCount=" + n.count + " to newID=" + downTo);
-    }
-
-    // +1 because node ords start at 1 (0 is reserved as stop node):
-    final GrowableWriter newNodeAddress = new GrowableWriter(
-                                                             PackedInts.bitsRequired(builder.bytes.getPosition()), (int) (1 + builder.nodeCount), acceptableOverheadRatio);
-
-    // Fill initial coarse guess:
-    for(int node=1;node<=builder.nodeCount;node++) {
-      newNodeAddress.set(node, 1 + builder.bytes.getPosition() - nodeAddress.get(node));
-    }
-
-    int absCount;
-    int deltaCount;
-    int topCount;
-    int nextCount;
-
-    FST<T> fst;
-
-    // Iterate until we converge:
-    while(true) {
-
-      //System.out.println("\nITER");
-      boolean changed = false;
-
-      // for assert:
-      boolean negDelta = false;
-
-      fst = new FST<>(inputType, outputs, builder.bytes.getBlockBits());
-      
-      final BytesStore writer = fst.bytes;
-
-      // Skip 0 byte since 0 is reserved target:
-      writer.writeByte((byte) 0);
-
-      absCount = deltaCount = topCount = nextCount = 0;
-
-      int changedCount = 0;
-
-      long addressError = 0;
-
-      //int totWasted = 0;
-
-      // Since we re-reverse the bytes, we now write the
-      // nodes backwards, so that BIT_TARGET_NEXT is
-      // unchanged:
-      for(int node=(int) builder.nodeCount;node>=1;node--) {
-        final long address = writer.getPosition();
-
-        //System.out.println("  node: " + node + " address=" + address);
-        if (address != newNodeAddress.get(node)) {
-          addressError = address - newNodeAddress.get(node);
-          //System.out.println("    change: " + (address - newNodeAddress[node]));
-          changed = true;
-          newNodeAddress.set(node, address);
-          changedCount++;
-        }
-
-        int nodeArcCount = 0;
-        int bytesPerArc = 0;
-
-        boolean retry = false;
-
-        // for assert:
-        boolean anyNegDelta = false;
-
-        // Retry loop: possibly iterate more than once, if
-        // this is an array'd node and bytesPerArc changes:
-        writeNode:
-        while(true) { // retry writing this node
-
-          //System.out.println("  cycle: retry");
-          readFirstRealTargetArc(node, arc, r);
-
-          final boolean useArcArray = arc.bytesPerArc != 0;
-          if (useArcArray) {
-            // Write false first arc:
-            if (bytesPerArc == 0) {
-              bytesPerArc = arc.bytesPerArc;
-            }
-            writer.writeByte(ARCS_AS_FIXED_ARRAY);
-            writer.writeVInt(arc.numArcs);
-            writer.writeVInt(bytesPerArc);
-            //System.out.println("node " + node + ": " + arc.numArcs + " arcs");
-          }
-
-          int maxBytesPerArc = 0;
-          //int wasted = 0;
-          while(true) {  // iterate over all arcs for this node
-            //System.out.println("    cycle next arc");
-
-            final long arcStartPos = writer.getPosition();
-            nodeArcCount++;
-
-            byte flags = 0;
-
-            if (arc.isLast()) {
-              flags += BIT_LAST_ARC;
-            }
-            /*
-            if (!useArcArray && nodeUpto < nodes.length-1 && arc.target == nodes[nodeUpto+1]) {
-              flags += BIT_TARGET_NEXT;
-            }
-            */
-            if (!useArcArray && node != 1 && arc.target == node-1) {
-              flags += BIT_TARGET_NEXT;
-              if (!retry) {
-                nextCount++;
-              }
-            }
-            if (arc.isFinal()) {
-              flags += BIT_FINAL_ARC;
-              if (arc.nextFinalOutput != NO_OUTPUT) {
-                flags += BIT_ARC_HAS_FINAL_OUTPUT;
-              }
-            } else {
-              assert arc.nextFinalOutput == NO_OUTPUT;
-            }
-            if (!targetHasArcs(arc)) {
-              flags += BIT_STOP_NODE;
-            }
-
-            if (arc.output != NO_OUTPUT) {
-              flags += BIT_ARC_HAS_OUTPUT;
-            }
-
-            final long absPtr;
-            final boolean doWriteTarget = targetHasArcs(arc) && (flags & BIT_TARGET_NEXT) == 0;
-            if (doWriteTarget) {
-
-              final Integer ptr = topNodeMap.get(arc.target);
-              if (ptr != null) {
-                absPtr = ptr;
-              } else {
-                absPtr = topNodeMap.size() + newNodeAddress.get((int) arc.target) + addressError;
-              }
-
-              long delta = newNodeAddress.get((int) arc.target) + addressError - writer.getPosition() - 2;
-              if (delta < 0) {
-                //System.out.println("neg: " + delta);
-                anyNegDelta = true;
-                delta = 0;
-              }
-
-              if (delta < absPtr) {
-                flags |= BIT_TARGET_DELTA;
-              }
-            } else {
-              absPtr = 0;
-            }
-
-            assert flags != ARCS_AS_FIXED_ARRAY;
-            writer.writeByte(flags);
-
-            fst.writeLabel(writer, arc.label);
-
-            if (arc.output != NO_OUTPUT) {
-              outputs.write(arc.output, writer);
-            }
-            if (arc.nextFinalOutput != NO_OUTPUT) {
-              outputs.writeFinalOutput(arc.nextFinalOutput, writer);
-            }
-
-            if (doWriteTarget) {
-
-              long delta = newNodeAddress.get((int) arc.target) + addressError - writer.getPosition();
-              if (delta < 0) {
-                anyNegDelta = true;
-                //System.out.println("neg: " + delta);
-                delta = 0;
-              }
-
-              if (flag(flags, BIT_TARGET_DELTA)) {
-                //System.out.println("        delta");
-                writer.writeVLong(delta);
-                if (!retry) {
-                  deltaCount++;
-                }
-              } else {
-                /*
-                if (ptr != null) {
-                  System.out.println("        deref");
-                } else {
-                  System.out.println("        abs");
-                }
-                */
-                writer.writeVLong(absPtr);
-                if (!retry) {
-                  if (absPtr >= topNodeMap.size()) {
-                    absCount++;
-                  } else {
-                    topCount++;
-                  }
-                }
-              }
-            }
-
-            if (useArcArray) {
-              final int arcBytes = (int) (writer.getPosition() - arcStartPos);
-              //System.out.println("  " + arcBytes + " bytes");
-              maxBytesPerArc = Math.max(maxBytesPerArc, arcBytes);
-              // NOTE: this may in fact go "backwards", if
-              // somehow (rarely, possibly never) we use
-              // more bytesPerArc in this rewrite than the
-              // incoming FST did... but in this case we
-              // will retry (below) so it's OK to ovewrite
-              // bytes:
-              //wasted += bytesPerArc - arcBytes;
-              writer.skipBytes((int) (arcStartPos + bytesPerArc - writer.getPosition()));
-            }
-
-            if (arc.isLast()) {
-              break;
-            }
-
-            readNextRealArc(arc, r);
-          }
-
-          if (useArcArray) {
-            if (maxBytesPerArc == bytesPerArc || (retry && maxBytesPerArc <= bytesPerArc)) {
-              // converged
-              //System.out.println("  bba=" + bytesPerArc + " wasted=" + wasted);
-              //totWasted += wasted;
-              break;
-            }
-          } else {
-            break;
-          }
-
-          //System.out.println("  retry this node maxBytesPerArc=" + maxBytesPerArc + " vs " + bytesPerArc);
-
-          // Retry:
-          bytesPerArc = maxBytesPerArc;
-          writer.truncate(address);
-          nodeArcCount = 0;
-          retry = true;
-          anyNegDelta = false;
-        }
-
-        negDelta |= anyNegDelta;
-      }
-
-      if (!changed) {
-        // We don't renumber the nodes (just reverse their
-        // order) so nodes should only point forward to
-        // other nodes because we only produce acyclic FSTs
-        // w/ nodes only pointing "forwards":
-        assert !negDelta;
-        //System.out.println("TOT wasted=" + totWasted);
-        // Converged!
-        break;
-      }
-    }
-
-    long maxAddress = 0;
-    for (long key : topNodeMap.keySet()) {
-      maxAddress = Math.max(maxAddress, newNodeAddress.get((int) key));
-    }
-
-    PackedInts.Mutable nodeRefToAddressIn = PackedInts.getMutable(topNodeMap.size(),
-        PackedInts.bitsRequired(maxAddress), acceptableOverheadRatio);
-    for(Map.Entry<Integer,Integer> ent : topNodeMap.entrySet()) {
-      nodeRefToAddressIn.set(ent.getValue(), newNodeAddress.get(ent.getKey()));
-    }
-    fst.nodeRefToAddress = nodeRefToAddressIn;
-    
-    fst.startNode = newNodeAddress.get((int) startNode);
-    //System.out.println("new startNode=" + fst.startNode + " old startNode=" + startNode);
-
-    if (emptyOutput != null) {
-      fst.setEmptyOutput(emptyOutput);
-    }
-
-    fst.bytes.finish();
-    fst.cacheRootArcs();
-
-    //final int size = fst.sizeInBytes();
-    //System.out.println("nextCount=" + nextCount + " topCount=" + topCount + " deltaCount=" + deltaCount + " absCount=" + absCount);
-
-    return fst;
-  }
-
-  private static class NodeAndInCount implements Comparable<NodeAndInCount> {
-    final int node;
-    final int count;
-
-    public NodeAndInCount(int node, int count) {
-      this.node = node;
-      this.count = count;
-    }
-    
-    @Override
-    public int compareTo(NodeAndInCount other) {
-      if (count > other.count) {
-        return 1;
-      } else if (count < other.count) {
-        return -1;
-      } else {
-        // Tie-break: smaller node compares as greater than
-        return other.node - node;
-      }
-    }
-  }
-
-  private static class NodeQueue extends PriorityQueue<NodeAndInCount> {
-    public NodeQueue(int topN) {
-      super(topN, false);
-    }
-
-    @Override
-    public boolean lessThan(NodeAndInCount a, NodeAndInCount b) {
-      final int cmp = a.compareTo(b);
-      assert cmp != 0;
-      return cmp < 0;
-    }
-  }
 }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/c4c5e868/lucene/core/src/java/org/apache/lucene/util/fst/package-info.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/util/fst/package-info.java b/lucene/core/src/java/org/apache/lucene/util/fst/package-info.java
index 41426f9..d984586 100644
--- a/lucene/core/src/java/org/apache/lucene/util/fst/package-info.java
+++ b/lucene/core/src/java/org/apache/lucene/util/fst/package-info.java
@@ -24,7 +24,6 @@
  *    <li>Fast and low memory overhead construction of the minimal FST 
  *        (but inputs must be provided in sorted order)</li>
  *    <li>Low object overhead and quick deserialization (byte[] representation)</li>
- *    <li>Optional two-pass compression: {@link org.apache.lucene.util.fst.FST#pack FST.pack()}</li>
  *    <li>{@link org.apache.lucene.util.fst.Util#getByOutput Lookup-by-output} when the 
  *        outputs are in sorted order (e.g., ordinals or file pointers)</li>
  *    <li>Pluggable {@link org.apache.lucene.util.fst.Outputs Outputs} representation</li>

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/c4c5e868/lucene/core/src/test/org/apache/lucene/util/fst/Test2BFST.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/test/org/apache/lucene/util/fst/Test2BFST.java b/lucene/core/src/test/org/apache/lucene/util/fst/Test2BFST.java
index bdec65c..a02bf8a 100644
--- a/lucene/core/src/test/org/apache/lucene/util/fst/Test2BFST.java
+++ b/lucene/core/src/test/org/apache/lucene/util/fst/Test2BFST.java
@@ -29,7 +29,6 @@ import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.IntsRef;
 import org.apache.lucene.util.LuceneTestCase;
 import org.apache.lucene.util.TimeUnits;
-import org.apache.lucene.util.packed.PackedInts;
 import org.junit.Ignore;
 
 import com.carrotsearch.randomizedtesting.annotations.TimeoutSuite;
@@ -47,16 +46,14 @@ public class Test2BFST extends LuceneTestCase {
 
     Directory dir = new MMapDirectory(createTempDir("2BFST"));
 
-    for(int doPackIter=0;doPackIter<2;doPackIter++) {
-      boolean doPack = doPackIter == 1;
-
+    for(int iter=0;iter<1;iter++) {
       // Build FST w/ NoOutputs and stop when nodeCount > 2.2B
-      if (!doPack) {
+      {
         System.out.println("\nTEST: 3B nodes; doPack=false output=NO_OUTPUTS");
         Outputs<Object> outputs = NoOutputs.getSingleton();
         Object NO_OUTPUT = outputs.getNoOutput();
         final Builder<Object> b = new Builder<>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, Integer.MAX_VALUE, outputs,
-                                                doPack, PackedInts.COMPACT, true, 15);
+                                                true, 15);
 
         int count = 0;
         Random r = new Random(seed);
@@ -135,10 +132,10 @@ public class Test2BFST extends LuceneTestCase {
       // Build FST w/ ByteSequenceOutputs and stop when FST
       // size = 3GB
       {
-        System.out.println("\nTEST: 3 GB size; doPack=" + doPack + " outputs=bytes");
+        System.out.println("\nTEST: 3 GB size; outputs=bytes");
         Outputs<BytesRef> outputs = ByteSequenceOutputs.getSingleton();
         final Builder<BytesRef> b = new Builder<>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, Integer.MAX_VALUE, outputs,
-                                                  doPack, PackedInts.COMPACT, true, 15);
+                                                  true, 15);
 
         byte[] outputBytes = new byte[20];
         BytesRef output = new BytesRef(outputBytes);
@@ -212,10 +209,10 @@ public class Test2BFST extends LuceneTestCase {
       // Build FST w/ PositiveIntOutputs and stop when FST
       // size = 3GB
       {
-        System.out.println("\nTEST: 3 GB size; doPack=" + doPack + " outputs=long");
+        System.out.println("\nTEST: 3 GB size; outputs=long");
         Outputs<Long> outputs = PositiveIntOutputs.getSingleton();
         final Builder<Long> b = new Builder<>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, Integer.MAX_VALUE, outputs,
-                                              doPack, PackedInts.COMPACT, true, 15);
+                                              true, 15);
 
         long output = 1;
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/c4c5e868/lucene/core/src/test/org/apache/lucene/util/fst/TestFSTs.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/test/org/apache/lucene/util/fst/TestFSTs.java b/lucene/core/src/test/org/apache/lucene/util/fst/TestFSTs.java
index 39b3282..6b218cf 100644
--- a/lucene/core/src/test/org/apache/lucene/util/fst/TestFSTs.java
+++ b/lucene/core/src/test/org/apache/lucene/util/fst/TestFSTs.java
@@ -76,7 +76,6 @@ import org.apache.lucene.util.fst.FST.Arc;
 import org.apache.lucene.util.fst.FST.BytesReader;
 import org.apache.lucene.util.fst.PairOutputs.Pair;
 import org.apache.lucene.util.fst.Util.Result;
-import org.apache.lucene.util.packed.PackedInts;
 
 import static org.apache.lucene.util.fst.FSTTester.getRandomString;
 import static org.apache.lucene.util.fst.FSTTester.simpleRandomString;
@@ -328,9 +327,7 @@ public class TestFSTs extends LuceneTestCase {
     writer.close();
     final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
 
-    final boolean doRewrite = random().nextBoolean();
-
-    Builder<Long> builder = new Builder<>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, Integer.MAX_VALUE, outputs, doRewrite, PackedInts.DEFAULT, true, 15);
+    Builder<Long> builder = new Builder<>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, Integer.MAX_VALUE, outputs, true, 15);
 
     boolean storeOrd = random().nextBoolean();
     if (VERBOSE) {
@@ -464,16 +461,14 @@ public class TestFSTs extends LuceneTestCase {
     private int inputMode;
     private final Outputs<T> outputs;
     private final Builder<T> builder;
-    private final boolean doPack;
 
-    public VisitTerms(Path dirOut, Path wordsFileIn, int inputMode, int prune, Outputs<T> outputs, boolean doPack, boolean noArcArrays) {
+    public VisitTerms(Path dirOut, Path wordsFileIn, int inputMode, int prune, Outputs<T> outputs, boolean noArcArrays) {
       this.dirOut = dirOut;
       this.wordsFileIn = wordsFileIn;
       this.inputMode = inputMode;
       this.outputs = outputs;
-      this.doPack = doPack;
 
-      builder = new Builder<>(inputMode == 0 ? FST.INPUT_TYPE.BYTE1 : FST.INPUT_TYPE.BYTE4, 0, prune, prune == 0, true, Integer.MAX_VALUE, outputs, doPack, PackedInts.DEFAULT, !noArcArrays, 15);
+      builder = new Builder<>(inputMode == 0 ? FST.INPUT_TYPE.BYTE1 : FST.INPUT_TYPE.BYTE4, 0, prune, prune == 0, true, Integer.MAX_VALUE, outputs, !noArcArrays, 15);
     }
 
     protected abstract T getOutput(IntsRef input, int ord) throws IOException;
@@ -622,7 +617,6 @@ public class TestFSTs extends LuceneTestCase {
     boolean storeOrds = false;
     boolean storeDocFreqs = false;
     boolean verify = true;
-    boolean doPack = false;
     boolean noArcArrays = false;
     Path wordsFileIn = null;
     Path dirOut = null;
@@ -647,8 +641,6 @@ public class TestFSTs extends LuceneTestCase {
         storeOrds = true;
       } else if (args[idx].equals("-noverify")) {
         verify = false;
-      } else if (args[idx].equals("-pack")) {
-        doPack = true;
       } else if (args[idx].startsWith("-")) {
         System.err.println("Unrecognized option: " + args[idx]);
         System.exit(-1);
@@ -677,7 +669,7 @@ public class TestFSTs extends LuceneTestCase {
       final PositiveIntOutputs o1 = PositiveIntOutputs.getSingleton();
       final PositiveIntOutputs o2 = PositiveIntOutputs.getSingleton();
       final PairOutputs<Long,Long> outputs = new PairOutputs<>(o1, o2);
-      new VisitTerms<PairOutputs.Pair<Long,Long>>(dirOut, wordsFileIn, inputMode, prune, outputs, doPack, noArcArrays) {
+      new VisitTerms<PairOutputs.Pair<Long,Long>>(dirOut, wordsFileIn, inputMode, prune, outputs, noArcArrays) {
         Random rand;
         @Override
         public PairOutputs.Pair<Long,Long> getOutput(IntsRef input, int ord) {
@@ -691,7 +683,7 @@ public class TestFSTs extends LuceneTestCase {
     } else if (storeOrds) {
       // Store only ords
       final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
-      new VisitTerms<Long>(dirOut, wordsFileIn, inputMode, prune, outputs, doPack, noArcArrays) {
+      new VisitTerms<Long>(dirOut, wordsFileIn, inputMode, prune, outputs, noArcArrays) {
         @Override
         public Long getOutput(IntsRef input, int ord) {
           return (long) ord;
@@ -700,7 +692,7 @@ public class TestFSTs extends LuceneTestCase {
     } else if (storeDocFreqs) {
       // Store only docFreq
       final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
-      new VisitTerms<Long>(dirOut, wordsFileIn, inputMode, prune, outputs, doPack, noArcArrays) {
+      new VisitTerms<Long>(dirOut, wordsFileIn, inputMode, prune, outputs, noArcArrays) {
         Random rand;
         @Override
         public Long getOutput(IntsRef input, int ord) {
@@ -714,7 +706,7 @@ public class TestFSTs extends LuceneTestCase {
       // Store nothing
       final NoOutputs outputs = NoOutputs.getSingleton();
       final Object NO_OUTPUT = outputs.getNoOutput();
-      new VisitTerms<Object>(dirOut, wordsFileIn, inputMode, prune, outputs, doPack, noArcArrays) {
+      new VisitTerms<Object>(dirOut, wordsFileIn, inputMode, prune, outputs, noArcArrays) {
         @Override
         public Object getOutput(IntsRef input, int ord) {
           return NO_OUTPUT;
@@ -1118,7 +1110,7 @@ public class TestFSTs extends LuceneTestCase {
   public void testFinalOutputOnEndState() throws Exception {
     final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
 
-    final Builder<Long> builder = new Builder<>(FST.INPUT_TYPE.BYTE4, 2, 0, true, true, Integer.MAX_VALUE, outputs, random().nextBoolean(), PackedInts.DEFAULT, true, 15);
+    final Builder<Long> builder = new Builder<>(FST.INPUT_TYPE.BYTE4, 2, 0, true, true, Integer.MAX_VALUE, outputs, true, 15);
     builder.add(Util.toUTF32("stat", new IntsRefBuilder()), 17L);
     builder.add(Util.toUTF32("station", new IntsRefBuilder()), 10L);
     final FST<Long> fst = builder.finish();
@@ -1132,8 +1124,7 @@ public class TestFSTs extends LuceneTestCase {
 
   public void testInternalFinalState() throws Exception {
     final PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton();
-    final boolean willRewrite = random().nextBoolean();
-    final Builder<Long> builder = new Builder<>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, Integer.MAX_VALUE, outputs, willRewrite, PackedInts.DEFAULT, true, 15);
+    final Builder<Long> builder = new Builder<>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, Integer.MAX_VALUE, outputs, true, 15);
     builder.add(Util.toIntsRef(new BytesRef("stat"), new IntsRefBuilder()), outputs.getNoOutput());
     builder.add(Util.toIntsRef(new BytesRef("station"), new IntsRefBuilder()), outputs.getNoOutput());
     final FST<Long> fst = builder.finish();

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/c4c5e868/lucene/sandbox/src/java/org/apache/lucene/codecs/idversion/VersionBlockTreeTermsWriter.java
----------------------------------------------------------------------
diff --git a/lucene/sandbox/src/java/org/apache/lucene/codecs/idversion/VersionBlockTreeTermsWriter.java b/lucene/sandbox/src/java/org/apache/lucene/codecs/idversion/VersionBlockTreeTermsWriter.java
index b49ea79..d83b915 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/codecs/idversion/VersionBlockTreeTermsWriter.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/codecs/idversion/VersionBlockTreeTermsWriter.java
@@ -50,7 +50,6 @@ import org.apache.lucene.util.fst.PairOutputs.Pair;
 import org.apache.lucene.util.fst.PairOutputs;
 import org.apache.lucene.util.fst.PositiveIntOutputs;
 import org.apache.lucene.util.fst.Util;
-import org.apache.lucene.util.packed.PackedInts;
 
 /*
   TODO:
@@ -354,8 +353,7 @@ public final class VersionBlockTreeTermsWriter extends FieldsConsumer {
 
       final Builder<Pair<BytesRef,Long>> indexBuilder = new Builder<>(FST.INPUT_TYPE.BYTE1,
                                                                       0, 0, true, false, Integer.MAX_VALUE,
-                                                                      FST_OUTPUTS, false,
-                                                                      PackedInts.COMPACT, true, 15);
+                                                                      FST_OUTPUTS, true, 15);
       //if (DEBUG) {
       //  System.out.println("  compile index for prefix=" + prefix);
       //}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/c4c5e868/lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletionBuilder.java
----------------------------------------------------------------------
diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletionBuilder.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletionBuilder.java
index 3706724..3d20412 100644
--- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletionBuilder.java
+++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTCompletionBuilder.java
@@ -26,7 +26,6 @@ import org.apache.lucene.util.BytesRefBuilder;
 import org.apache.lucene.util.BytesRefIterator;
 import org.apache.lucene.util.IntsRefBuilder;
 import org.apache.lucene.util.fst.*;
-import org.apache.lucene.util.packed.PackedInts;
 
 /**
  * Finite state automata based implementation of "autocomplete" functionality.
@@ -237,8 +236,7 @@ public class FSTCompletionBuilder {
     final Object empty = outputs.getNoOutput();
     final Builder<Object> builder = new Builder<>(
         FST.INPUT_TYPE.BYTE1, 0, 0, true, true, 
-        shareMaxTailLength, outputs, false, 
-        PackedInts.DEFAULT, true, 15);
+        shareMaxTailLength, outputs, true, 15);
     
     BytesRefBuilder scratch = new BytesRefBuilder();
     BytesRef entry;

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/c4c5e868/lucene/test-framework/src/java/org/apache/lucene/util/fst/FSTTester.java
----------------------------------------------------------------------
diff --git a/lucene/test-framework/src/java/org/apache/lucene/util/fst/FSTTester.java b/lucene/test-framework/src/java/org/apache/lucene/util/fst/FSTTester.java
index 11b1325..8e6a4ea 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/util/fst/FSTTester.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/util/fst/FSTTester.java
@@ -40,7 +40,6 @@ import org.apache.lucene.util.IntsRefBuilder;
 import org.apache.lucene.util.LuceneTestCase;
 import org.apache.lucene.util.TestUtil;
 import org.apache.lucene.util.UnicodeUtil;
-import org.apache.lucene.util.packed.PackedInts;
 
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
@@ -273,25 +272,14 @@ public class FSTTester<T> {
       System.out.println("\nTEST: prune1=" + prune1 + " prune2=" + prune2);
     }
 
-    final boolean willRewrite = random.nextBoolean();
-
     final Builder<T> builder = new Builder<>(inputMode == 0 ? FST.INPUT_TYPE.BYTE1 : FST.INPUT_TYPE.BYTE4,
                                               prune1, prune2,
                                               prune1==0 && prune2==0,
                                               allowRandomSuffixSharing ? random.nextBoolean() : true,
                                               allowRandomSuffixSharing ? TestUtil.nextInt(random, 1, 10) : Integer.MAX_VALUE,
                                               outputs,
-                                              willRewrite,
-                                              PackedInts.DEFAULT,
                                               true,
                                               15);
-    if (LuceneTestCase.VERBOSE) {
-      if (willRewrite) {
-        System.out.println("TEST: packed FST");
-      } else {
-        System.out.println("TEST: non-packed FST");
-      }
-    }
 
     for(InputOutput<T> pair : pairs) {
       if (pair.output instanceof List) {
@@ -306,7 +294,7 @@ public class FSTTester<T> {
     }
     FST<T> fst = builder.finish();
 
-    if (random.nextBoolean() && fst != null && !willRewrite) {
+    if (random.nextBoolean() && fst != null) {
       IOContext context = LuceneTestCase.newIOContext(random);
       IndexOutput out = dir.createOutput("fst.bin", context);
       fst.save(out);

[2/4] lucene-solr:master: LUCENE-5867: Add a BooleanSimilarity.

Posted by jp...@apache.org.

LUCENE-5867: Add a BooleanSimilarity.


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/3e15233b
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/3e15233b
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/3e15233b

Branch: refs/heads/master
Commit: 3e15233b23197122e40a851edab7b7257ce63f02
Parents: c415bc8
Author: Adrien Grand <jp...@gmail.com>
Authored: Thu Nov 10 14:05:13 2016 +0100
Committer: Adrien Grand <jp...@gmail.com>
Committed: Thu Nov 10 15:01:49 2016 +0100

----------------------------------------------------------------------
 lucene/CHANGES.txt                              |   5 +-
 .../search/similarities/BooleanSimilarity.java  |  95 +++++++++++++++
 .../similarities/TestBooleanSimilarity.java     | 117 +++++++++++++++++++
 .../search/similarities/RandomSimilarity.java   |   1 +
 .../util/TestRuleSetupAndRestoreClassEnv.java   |   3 +-
 5 files changed, 218 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/3e15233b/lucene/CHANGES.txt
----------------------------------------------------------------------
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 191c103..97e6939 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -55,7 +55,10 @@ Other
 * LUCENE-7360: Remove Explanation.toHtml() (Alan Woodward)
 
 ======================= Lucene 6.4.0 =======================
-(No Changes)
+
+New features
+
+* LUCENE-5867: Added BooleanSimilarity. (Robert Muir, Adrien Grand)
 
 Improvements
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/3e15233b/lucene/core/src/java/org/apache/lucene/search/similarities/BooleanSimilarity.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/search/similarities/BooleanSimilarity.java b/lucene/core/src/java/org/apache/lucene/search/similarities/BooleanSimilarity.java
new file mode 100644
index 0000000..a7b7614
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/search/similarities/BooleanSimilarity.java
@@ -0,0 +1,95 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.search.similarities;
+
+import java.io.IOException;
+
+import org.apache.lucene.index.FieldInvertState;
+import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.search.CollectionStatistics;
+import org.apache.lucene.search.Explanation;
+import org.apache.lucene.search.TermStatistics;
+import org.apache.lucene.util.BytesRef;
+
+/**
+ * Simple similarity that gives terms a score that is equal to their query
+ * boost. This similarity is typically used with disabled norms since neither
+ * document statistics nor index statistics are used for scoring. That said,
+ * if norms are enabled, they will be computed the same way as
+ * {@link SimilarityBase} and {@link BM25Similarity} with
+ * {@link SimilarityBase#setDiscountOverlaps(boolean) discounted overlaps}
+ * so that the {@link Similarity} can be changed after the index has been
+ * created.
+ */
+public class BooleanSimilarity extends Similarity {
+
+  private static final Similarity BM25_SIM = new BM25Similarity();
+
+  /** Sole constructor */
+  public BooleanSimilarity() {}
+
+  @Override
+  public long computeNorm(FieldInvertState state) {
+    return BM25_SIM.computeNorm(state);
+  }
+
+  @Override
+  public SimWeight computeWeight(float boost, CollectionStatistics collectionStats, TermStatistics... termStats) {
+    return new BooleanWeight(boost);
+  }
+
+  private static class BooleanWeight extends SimWeight {
+    final float boost;
+
+    BooleanWeight(float boost) {
+      this.boost = boost;
+    }
+  }
+
+  @Override
+  public SimScorer simScorer(SimWeight weight, LeafReaderContext context) throws IOException {
+    final float boost = ((BooleanWeight) weight).boost;
+
+    return new SimScorer() {
+
+      @Override
+      public float score(int doc, float freq) throws IOException {
+        return boost;
+      }
+
+      @Override
+      public Explanation explain(int doc, Explanation freq) throws IOException {
+        Explanation queryBoostExpl = Explanation.match(boost, "query boost");
+        return Explanation.match(
+            queryBoostExpl.getValue(),
+            "score(" + getClass().getSimpleName() + ", doc=" + doc + "), computed from:",
+            queryBoostExpl);
+      }
+
+      @Override
+      public float computeSlopFactor(int distance) {
+        return 1f;
+      }
+
+      @Override
+      public float computePayloadFactor(int doc, int start, int end, BytesRef payload) {
+        return 1f;
+      }
+    };
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/3e15233b/lucene/core/src/test/org/apache/lucene/search/similarities/TestBooleanSimilarity.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/test/org/apache/lucene/search/similarities/TestBooleanSimilarity.java b/lucene/core/src/test/org/apache/lucene/search/similarities/TestBooleanSimilarity.java
new file mode 100644
index 0000000..15b1448
--- /dev/null
+++ b/lucene/core/src/test/org/apache/lucene/search/similarities/TestBooleanSimilarity.java
@@ -0,0 +1,117 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.search.similarities;
+
+import java.io.IOException;
+
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field.Store;
+import org.apache.lucene.document.StringField;
+import org.apache.lucene.document.TextField;
+import org.apache.lucene.index.DirectoryReader;
+import org.apache.lucene.index.FieldInvertState;
+import org.apache.lucene.index.RandomIndexWriter;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.BoostQuery;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.PhraseQuery;
+import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.search.TopDocs;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util.TestUtil;
+
+public class TestBooleanSimilarity extends LuceneTestCase {
+
+  public void testTermScoreIsEqualToBoost() throws IOException {
+    Directory dir = newDirectory();
+    RandomIndexWriter w = new RandomIndexWriter(random(), dir,
+        newIndexWriterConfig());
+    Document doc = new Document();
+    doc.add(new StringField("foo", "bar", Store.NO));
+    doc.add(new StringField("foo", "baz", Store.NO));
+    w.addDocument(doc);
+    doc = new Document();
+    doc.add(new StringField("foo", "bar", Store.NO));
+    doc.add(new StringField("foo", "bar", Store.NO));
+    w.addDocument(doc);
+
+    DirectoryReader reader = w.getReader();
+    w.close();
+    IndexSearcher searcher = newSearcher(reader);
+    searcher.setSimilarity(new BooleanSimilarity());
+    TopDocs topDocs = searcher.search(new TermQuery(new Term("foo", "bar")), 2);
+    assertEquals(2, topDocs.totalHits);
+    assertEquals(1f, topDocs.scoreDocs[0].score, 0f);
+    assertEquals(1f, topDocs.scoreDocs[1].score, 0f);
+
+    topDocs = searcher.search(new TermQuery(new Term("foo", "baz")), 1);
+    assertEquals(1, topDocs.totalHits);
+    assertEquals(1f, topDocs.scoreDocs[0].score, 0f);
+
+    topDocs = searcher.search(new BoostQuery(new TermQuery(new Term("foo", "baz")), 3f), 1);
+    assertEquals(1, topDocs.totalHits);
+    assertEquals(3f, topDocs.scoreDocs[0].score, 0f);
+
+    reader.close();
+    dir.close();
+  }
+
+  public void testPhraseScoreIsEqualToBoost() throws IOException {
+    Directory dir = newDirectory();
+    RandomIndexWriter w = new RandomIndexWriter(random(), dir,
+        newIndexWriterConfig().setSimilarity(new BooleanSimilarity()));
+    Document doc = new Document();
+    doc.add(new TextField("foo", "bar baz quux", Store.NO));
+    w.addDocument(doc);
+
+    DirectoryReader reader = w.getReader();
+    w.close();
+    IndexSearcher searcher = newSearcher(reader);
+    searcher.setSimilarity(new BooleanSimilarity());
+
+    PhraseQuery query = new PhraseQuery(2, "foo", "bar", "quux");
+
+    TopDocs topDocs = searcher.search(query, 2);
+    assertEquals(1, topDocs.totalHits);
+    assertEquals(1f, topDocs.scoreDocs[0].score, 0f);
+
+    topDocs = searcher.search(new BoostQuery(query, 7), 2);
+    assertEquals(1, topDocs.totalHits);
+    assertEquals(7f, topDocs.scoreDocs[0].score, 0f);
+
+    reader.close();
+    dir.close();
+  }
+
+  public void testSameNormsAsBM25() {
+    BooleanSimilarity sim1 = new BooleanSimilarity();
+    BM25Similarity sim2 = new BM25Similarity();
+    sim2.setDiscountOverlaps(true);
+    for (int iter = 0; iter < 100; ++iter) {
+      final int length = TestUtil.nextInt(random(), 1, 100);
+      final int position = random().nextInt(length);
+      final int numOverlaps = random().nextInt(50);
+      final float boost = random().nextFloat() * 10;
+      FieldInvertState state = new FieldInvertState("foo", position, length, numOverlaps, 100, boost);
+      assertEquals(
+          sim2.computeNorm(state),
+          sim1.computeNorm(state),
+          0f);
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/3e15233b/lucene/test-framework/src/java/org/apache/lucene/search/similarities/RandomSimilarity.java
----------------------------------------------------------------------
diff --git a/lucene/test-framework/src/java/org/apache/lucene/search/similarities/RandomSimilarity.java b/lucene/test-framework/src/java/org/apache/lucene/search/similarities/RandomSimilarity.java
index 136d7e5..43b6c3c 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/search/similarities/RandomSimilarity.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/search/similarities/RandomSimilarity.java
@@ -91,6 +91,7 @@ public class RandomSimilarity extends PerFieldSimilarityWrapper {
     allSims = new ArrayList<>();
     allSims.add(new ClassicSimilarity());
     allSims.add(new BM25Similarity());
+    allSims.add(new BooleanSimilarity());
     for (BasicModel basicModel : BASIC_MODELS) {
       for (AfterEffect afterEffect : AFTER_EFFECTS) {
         for (Normalization normalization : NORMALIZATIONS) {

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/3e15233b/lucene/test-framework/src/java/org/apache/lucene/util/TestRuleSetupAndRestoreClassEnv.java
----------------------------------------------------------------------
diff --git a/lucene/test-framework/src/java/org/apache/lucene/util/TestRuleSetupAndRestoreClassEnv.java b/lucene/test-framework/src/java/org/apache/lucene/util/TestRuleSetupAndRestoreClassEnv.java
index 0e4facc..96b4353 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/util/TestRuleSetupAndRestoreClassEnv.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/util/TestRuleSetupAndRestoreClassEnv.java
@@ -36,7 +36,6 @@ import org.apache.lucene.codecs.lucene70.Lucene70Codec;
 import org.apache.lucene.codecs.mockrandom.MockRandomPostingsFormat;
 import org.apache.lucene.codecs.simpletext.SimpleTextCodec;
 import org.apache.lucene.index.RandomCodec;
-import org.apache.lucene.search.similarities.ClassicSimilarity;
 import org.apache.lucene.search.similarities.RandomSimilarity;
 import org.apache.lucene.search.similarities.Similarity;
 import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
@@ -213,7 +212,7 @@ final class TestRuleSetupAndRestoreClassEnv extends AbstractBeforeAfterRule {
     TimeZone randomTimeZone = randomTimeZone(random());
     timeZone = testTimeZone.equals("random") ? randomTimeZone : TimeZone.getTimeZone(testTimeZone);
     TimeZone.setDefault(timeZone);
-    similarity = random().nextBoolean() ? new ClassicSimilarity() : new RandomSimilarity(random());
+    similarity = new RandomSimilarity(random());
 
     // Check codec restrictions once at class level.
     try {

[4/4] lucene-solr:master: LUCENE-7524: More detailed explanation of idf.

Posted by jp...@apache.org.

LUCENE-7524: More detailed explanation of idf.


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/6b9f1131
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/6b9f1131
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/6b9f1131

Branch: refs/heads/master
Commit: 6b9f11311a2481743d8b2b42a875a11162a5e215
Parents: 3e15233
Author: Adrien Grand <jp...@gmail.com>
Authored: Thu Nov 10 14:08:26 2016 +0100
Committer: Adrien Grand <jp...@gmail.com>
Committed: Thu Nov 10 15:01:49 2016 +0100

----------------------------------------------------------------------
 lucene/CHANGES.txt                              |  3 +++
 .../search/similarities/BM25Similarity.java     | 20 ++++++++++----------
 .../search/similarities/ClassicSimilarity.java  | 13 +++++++++++++
 .../search/similarities/TFIDFSimilarity.java    | 12 +++++-------
 .../org/apache/lucene/search/CheckHits.java     | 10 ++++++----
 5 files changed, 37 insertions(+), 21 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/6b9f1131/lucene/CHANGES.txt
----------------------------------------------------------------------
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 97e6939..5b9fee7 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -77,6 +77,9 @@ Improvements
   now throws IllegalArgumentException instead of a cryptic exception
   that closes your IndexWriter (Steve Chen via Mike McCandless)
 
+* LUCENE-7524: Added more detailed explanation of how IDF is computed in
+  ClassicSimilarity and BM25Similarity. (Adrien Grand)
+
 ======================= Lucene 6.3.0 =======================
 
 API Changes

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/6b9f1131/lucene/core/src/java/org/apache/lucene/search/similarities/BM25Similarity.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/search/similarities/BM25Similarity.java b/lucene/core/src/java/org/apache/lucene/search/similarities/BM25Similarity.java
index ff390b3..6763118 100644
--- a/lucene/core/src/java/org/apache/lucene/search/similarities/BM25Similarity.java
+++ b/lucene/core/src/java/org/apache/lucene/search/similarities/BM25Similarity.java
@@ -175,7 +175,9 @@ public class BM25Similarity extends Similarity {
     final long df = termStats.docFreq();
     final long docCount = collectionStats.docCount() == -1 ? collectionStats.maxDoc() : collectionStats.docCount();
     final float idf = idf(df, docCount);
-    return Explanation.match(idf, "idf(docFreq=" + df + ", docCount=" + docCount + ")");
+    return Explanation.match(idf, "idf, computed as log(1 + (docCount - docFreq + 0.5) / (docFreq + 0.5)) from:",
+        Explanation.match(df, "docFreq"),
+        Explanation.match(docCount, "docCount"));
   }
 
   /**
@@ -192,16 +194,14 @@ public class BM25Similarity extends Similarity {
    *         for each term.
    */
   public Explanation idfExplain(CollectionStatistics collectionStats, TermStatistics termStats[]) {
-    final long docCount = collectionStats.docCount() == -1 ? collectionStats.maxDoc() : collectionStats.docCount();
-    float idf = 0.0f;
+    double idf = 0d; // sum into a double before casting into a float
     List<Explanation> details = new ArrayList<>();
     for (final TermStatistics stat : termStats ) {
-      final long df = stat.docFreq();
-      final float termIdf = idf(df, docCount);
-      details.add(Explanation.match(termIdf, "idf(docFreq=" + df + ", docCount=" + docCount + ")"));
-      idf += termIdf;
+      Explanation idfExplain = idfExplain(collectionStats, stat);
+      details.add(idfExplain);
+      idf += idfExplain.getValue();
     }
-    return Explanation.match(idf, "idf(), sum of:", details);
+    return Explanation.match((float) idf, "idf(), sum of:", details);
   }
 
   @Override
@@ -303,7 +303,7 @@ public class BM25Similarity extends Similarity {
       subs.add(Explanation.match(0, "parameter b (norms omitted for field)"));
       return Explanation.match(
           (freq.getValue() * (k1 + 1)) / (freq.getValue() + k1),
-          "tfNorm, computed from:", subs);
+          "tfNorm, computed as (freq * (k1 + 1)) / (freq + k1) from:", subs);
     } else {
       byte norm;
       if (norms.advanceExact(doc)) {
@@ -317,7 +317,7 @@ public class BM25Similarity extends Similarity {
       subs.add(Explanation.match(doclen, "fieldLength"));
       return Explanation.match(
           (freq.getValue() * (k1 + 1)) / (freq.getValue() + k1 * (1 - b + b * doclen/stats.avgdl)),
-          "tfNorm, computed from:", subs);
+          "tfNorm, computed as (freq * (k1 + 1)) / (freq + k1 * (1 - b + b * fieldLength / avgFieldLength)) from:", subs);
     }
   }
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/6b9f1131/lucene/core/src/java/org/apache/lucene/search/similarities/ClassicSimilarity.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/search/similarities/ClassicSimilarity.java b/lucene/core/src/java/org/apache/lucene/search/similarities/ClassicSimilarity.java
index fae85a0..5a1e237 100644
--- a/lucene/core/src/java/org/apache/lucene/search/similarities/ClassicSimilarity.java
+++ b/lucene/core/src/java/org/apache/lucene/search/similarities/ClassicSimilarity.java
@@ -18,6 +18,9 @@ package org.apache.lucene.search.similarities;
 
 
 import org.apache.lucene.index.FieldInvertState;
+import org.apache.lucene.search.CollectionStatistics;
+import org.apache.lucene.search.Explanation;
+import org.apache.lucene.search.TermStatistics;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.SmallFloat;
 
@@ -121,6 +124,16 @@ public class ClassicSimilarity extends TFIDFSimilarity {
     return 1;
   }
 
+  @Override
+  public Explanation idfExplain(CollectionStatistics collectionStats, TermStatistics termStats) {
+    final long df = termStats.docFreq();
+    final long docCount = collectionStats.docCount() == -1 ? collectionStats.maxDoc() : collectionStats.docCount();
+    final float idf = idf(df, docCount);
+    return Explanation.match(idf, "idf, computed as log((docCount+1)/(docFreq+1)) + 1 from:",
+        Explanation.match(df, "docFreq"),
+        Explanation.match(docCount, "docCount"));
+  }
+
   /** Implemented as <code>log((docCount+1)/(docFreq+1)) + 1</code>. */
   @Override
   public float idf(long docFreq, long docCount) {

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/6b9f1131/lucene/core/src/java/org/apache/lucene/search/similarities/TFIDFSimilarity.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/search/similarities/TFIDFSimilarity.java b/lucene/core/src/java/org/apache/lucene/search/similarities/TFIDFSimilarity.java
index cd8acd6..12ab1a2 100644
--- a/lucene/core/src/java/org/apache/lucene/search/similarities/TFIDFSimilarity.java
+++ b/lucene/core/src/java/org/apache/lucene/search/similarities/TFIDFSimilarity.java
@@ -484,16 +484,14 @@ public abstract class TFIDFSimilarity extends Similarity {
    *         for each term.
    */
   public Explanation idfExplain(CollectionStatistics collectionStats, TermStatistics termStats[]) {
-    final long docCount = collectionStats.docCount() == -1 ? collectionStats.maxDoc() : collectionStats.docCount();
-    float idf = 0.0f;
+    double idf = 0d; // sum into a double before casting into a float
     List<Explanation> subs = new ArrayList<>();
     for (final TermStatistics stat : termStats ) {
-      final long df = stat.docFreq();
-      final float termIdf = idf(df, docCount);
-      subs.add(Explanation.match(termIdf, "idf(docFreq=" + df + ", docCount=" + docCount + ")"));
-      idf += termIdf;
+      Explanation idfExplain = idfExplain(collectionStats, stat);
+      subs.add(idfExplain);
+      idf += idfExplain.getValue();
     }
-    return Explanation.match(idf, "idf(), sum of:", subs);
+    return Explanation.match((float) idf, "idf(), sum of:", subs);
   }
 
   /** Computes a score factor based on a term's document frequency (the number

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/6b9f1131/lucene/test-framework/src/java/org/apache/lucene/search/CheckHits.java
----------------------------------------------------------------------
diff --git a/lucene/test-framework/src/java/org/apache/lucene/search/CheckHits.java b/lucene/test-framework/src/java/org/apache/lucene/search/CheckHits.java
index 9a14b5d..dee7d84 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/search/CheckHits.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/search/CheckHits.java
@@ -368,8 +368,9 @@ public class CheckHits {
         boolean productOf = descr.endsWith("product of:");
         boolean sumOf = descr.endsWith("sum of:");
         boolean maxOf = descr.endsWith("max of:");
+        boolean computedOf = descr.matches(".*, computed as .* from:");
         boolean maxTimesOthers = false;
-        if (!(productOf || sumOf || maxOf)) {
+        if (!(productOf || sumOf || maxOf || computedOf)) {
           // maybe 'max plus x times others'
           int k1 = descr.indexOf("max plus ");
           if (k1>=0) {
@@ -387,9 +388,9 @@ public class CheckHits {
         // TODO: this is a TERRIBLE assertion!!!!
         Assert.assertTrue(
             q+": multi valued explanation description=\""+descr
-            +"\" must be 'max of plus x times others' or end with 'product of'"
+            +"\" must be 'max of plus x times others', 'computed as x from:' or end with 'product of'"
             +" or 'sum of:' or 'max of:' - "+expl,
-            productOf || sumOf || maxOf || maxTimesOthers);
+            productOf || sumOf || maxOf || computedOf || maxTimesOthers);
         float sum = 0;
         float product = 1;
         float max = 0;
@@ -410,7 +411,8 @@ public class CheckHits {
         } else if (maxTimesOthers) {
           combined = max + x * (sum - max);
         } else {
-            Assert.assertTrue("should never get here!",false);
+          Assert.assertTrue("should never get here!", computedOf);
+          combined = value;
         }
         Assert.assertEquals(q+": actual subDetails combined=="+combined+
             " != value="+value+" Explanation: "+expl,