You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@lucene.apache.org by to...@apache.org on 2018/12/11 13:26:41 UTC

[1/5] lucene-solr:master: Revert "Pre-commit fixes for LUCENE-8374 (JavaDoc + arguments)". LUCENE-8374 was committed without consensus and is expected to be superseded by LUCENE-8585.

Repository: lucene-solr
Updated Branches:
  refs/heads/master 05d728f57 -> 8a20705b8


Revert "Pre-commit fixes for LUCENE-8374 (JavaDoc + arguments)".
LUCENE-8374 was committed without consensus and is expected to be superseded by LUCENE-8585.

This reverts commit 6c111611118ceda0837f25a27e5b4549f2693457.


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/870bb11c
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/870bb11c
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/870bb11c

Branch: refs/heads/master
Commit: 870bb11cc85c961f9ca9fb638143d8189a5abd6a
Parents: 05d728f
Author: Toke Eskildsen <to...@apache.org>
Authored: Tue Dec 11 14:13:17 2018 +0100
Committer: Toke Eskildsen <to...@apache.org>
Committed: Tue Dec 11 14:13:17 2018 +0100

----------------------------------------------------------------------
 .../lucene/codecs/lucene70/IndexedDISICache.java | 10 +---------
 .../codecs/lucene70/IndexedDISICacheFactory.java | 19 +------------------
 .../lucene/codecs/lucene70/LongCompressor.java   |  7 +------
 3 files changed, 3 insertions(+), 33 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/870bb11c/lucene/core/src/java/org/apache/lucene/codecs/lucene70/IndexedDISICache.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene70/IndexedDISICache.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene70/IndexedDISICache.java
index 80867a3..fdf93cf 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene70/IndexedDISICache.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene70/IndexedDISICache.java
@@ -118,9 +118,7 @@ public class IndexedDISICache implements Accountable {
     this.name = "";
   }
 
-  /**
-   * Shared between all structures that are too small to meaningfully use jump-tables.
-   */
+  // Used to represent no caching.
   public static final IndexedDISICache EMPTY = new IndexedDISICache();
 
   /**
@@ -158,10 +156,6 @@ public class IndexedDISICache implements Accountable {
        return target >> RANK_BLOCK_BITS << RANK_BLOCK_BITS;
   }
 
-  /**
-   * Offsets stated the block starts.
-   * @return true if the cache has offsets.
-   */
   public boolean hasOffsets() {
     return blockCache != null;
   }
@@ -305,7 +299,6 @@ public class IndexedDISICache implements Accountable {
   }
 
   /**
-   * Creation stats intended for human inspection.
    * @return Human readable details from the creation of the cache instance.
    */
   public String getCreationStats() {
@@ -313,7 +306,6 @@ public class IndexedDISICache implements Accountable {
   }
 
   /**
-   * Cache name, as stated in the constructor.
    * @return Human-readable name for the cache instance.
    */
   public String getName() {

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/870bb11c/lucene/core/src/java/org/apache/lucene/codecs/lucene70/IndexedDISICacheFactory.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene70/IndexedDISICacheFactory.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene70/IndexedDISICacheFactory.java
index f177210..610529a 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene70/IndexedDISICacheFactory.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene70/IndexedDISICacheFactory.java
@@ -48,12 +48,6 @@ public class IndexedDISICacheFactory implements Accountable {
   private final Map<String, VaryingBPVJumpTable> vBPVPool = new HashMap<>();
 
   /**
-   * Creates a {@link IndexedDISICache} and {@link VaryingBPVJumpTable} holding factory,
-   * intended for shared use within a single segment.
-   */
-  public IndexedDISICacheFactory() { }
-
-  /**
    * Create a cached {@link IndexedDISI} instance.
    * @param data   persistent data containing the DISI-structure.
    * @param cost   cost as defined for IndexedDISI.
@@ -152,24 +146,13 @@ public class IndexedDISICacheFactory implements Accountable {
     return cache;
   }
 
-  /**
-   * Cache statistics intended for external inspection.
-   * @return the number of total blocks where jumps are accelerated by jump-tables.
-   */
+  // Statistics
   public long getDISIBlocksWithOffsetsCount() {
     return disiPool.values().stream().filter(IndexedDISICache::hasOffsets).count();
   }
-  /**
-   * Cache statistics intended for external inspection.
-   * @return the total number of DENSE blocks where index-counts are accelerated by rank.
-   */
   public long getDISIBlocksWithRankCount() {
     return disiPool.values().stream().filter(IndexedDISICache::hasRank).count();
   }
-  /**
-   * Cache statistics intended for external inspection.
-   * @return the number of numeric blocks where jumps are accelerated by jump-tables
-   */
   public long getVaryingBPVCount() {
     return vBPVPool.size();
   }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/870bb11c/lucene/core/src/java/org/apache/lucene/codecs/lucene70/LongCompressor.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene70/LongCompressor.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene70/LongCompressor.java
index 7b12cc6..1c030a7 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene70/LongCompressor.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene70/LongCompressor.java
@@ -48,11 +48,6 @@ public class LongCompressor {
   private static final double DEFAULT_MIN_ZERO_VALUES_FRACTION_FOR_SPARSE = 0.2; // 20% (just guessing of a value here)
 
   /**
-   * LongCompressor exclusively uses static methods and is never instantiated.
-   */
-  private LongCompressor() { }
-
-  /**
    * Create a compact version of the given values.
    * @param values PackedInts with no special constraints.
    * @return a compact version of the given values or the given values if compression did not improve on heap overhead.
@@ -68,7 +63,7 @@ public class LongCompressor {
    * @return a compact version of the given values or the given values if compression did not improve on heap overhead.
    */
   public static PackedInts.Reader compress(PackedInts.Reader values, int length) {
-    return compress(values, length, true);
+    return compress(values, values.size(), true);
   }
 
   /**

[4/5] lucene-solr:master: Revert "LUCENE-8374 part 2/4: Reduce reads for sparse DocValues". LUCENE-8374 was committed without consensus and is expected to be superseded by LUCENE-8585.

Posted by to...@apache.org.

Revert "LUCENE-8374 part 2/4: Reduce reads for sparse DocValues".
LUCENE-8374 was committed without consensus and is expected to be superseded by LUCENE-8585.

This reverts commit 7ad027627a179daa7d8d56be191d5b287dfec6f4.


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/1da6d39b
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/1da6d39b
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/1da6d39b

Branch: refs/heads/master
Commit: 1da6d39b417148a3a5197931c92b6e8d409059bc
Parents: 6c5d87a
Author: Toke Eskildsen <to...@apache.org>
Authored: Tue Dec 11 14:14:22 2018 +0100
Committer: Toke Eskildsen <to...@apache.org>
Committed: Tue Dec 11 14:14:22 2018 +0100

----------------------------------------------------------------------
 .../codecs/lucene70/IndexedDISICache.java       | 102 +------
 .../lucene70/IndexedDISICacheFactory.java       |   5 -
 .../lucene/codecs/lucene70/LongCompressor.java  | 246 ---------------
 .../java/org/apache/lucene/util/RankBitSet.java | 302 -------------------
 .../codecs/lucene70/TestLongCompressor.java     |  87 ------
 .../org/apache/lucene/util/TestRankBitSet.java  | 107 -------
 6 files changed, 7 insertions(+), 842 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/1da6d39b/lucene/core/src/java/org/apache/lucene/codecs/lucene70/IndexedDISICache.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene70/IndexedDISICache.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene70/IndexedDISICache.java
index be0beb7..9a80689 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene70/IndexedDISICache.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene70/IndexedDISICache.java
@@ -33,7 +33,7 @@ import static org.apache.lucene.codecs.lucene70.IndexedDISI.MAX_ARRAY_LENGTH;
 /**
  * Caching of IndexedDISI with two strategies:
  *
- * A lookup table for block blockCache and index, and a rank structure for DENSE block lookups.
+ * A lookup table for block blockCache and index.
  *
  * The lookup table is an array of {@code long}s with an entry for each block. It allows for
  * direct jumping to the block, as opposed to iteration from the current position and forward
@@ -53,30 +53,8 @@ import static org.apache.lucene.codecs.lucene70.IndexedDISI.MAX_ARRAY_LENGTH;
  * In the case of non-existing blocks, the entry in the lookup table has index equal to the
  * previous entry and offset equal to the next non-empty block.
  *
- *
- * The rank structure for DENSE blocks is an array of unsigned {@code short}s with an entry
- * for each sub-block of 512 bits out of the 65536 bits in the outer block.
- *
- * Each rank-entry states the number of set bits within the block up to the bit before the
- * bit positioned at the start of the sub-block.
- * Note that that the rank entry of the first sub-block is always 0 and that the last entry can
- * at most be 65536-512 = 65024 and thus will always fit into an unsigned short.
- *
- * See https://en.wikipedia.org/wiki/Succinct_data_structure for details on rank structures.
- * The alternative to using the rank structure is iteration and summing of set bits for all
- * entries in the DENSE sub-block up until the wanted bit, with a worst-case of 1024 entries.
- * The rank cache overhead for a single DENSE block is 128 shorts (128*16 = 2048 bits) or
- * 1/32th.
- *
- * The ranks for the DENSE blocks are stored in a structure shared for the whole array of
- * blocks, DENSE or not. To avoid overhead that structure is itself sparse. See
- * {@link LongCompressor} for details on DENSE structure sparseness.
- *
- *
- * The performance overhead for creating a cache instance is equivalent to accessing all
- * DocValues values for the given field, i.e. it scales lineary to field size. On modern
- * hardware it is in the ballpark of 1ms for 5M values on modern hardware. Caveat lector:
- * At the point of writing, performance points are only available for 2 real-world setups.
+ * The performance overhead for creating a cache instance is equivalent to visiting every 65536th
+ * doc value for the given field, i.e. it scales lineary to field size.
  */
 public class IndexedDISICache implements Accountable {
   private static final int BLOCK = 65536;   // The number of docIDs that a single block represents
@@ -85,12 +63,6 @@ public class IndexedDISICache implements Accountable {
   private static final long BLOCK_INDEX_MASK = ~0L << BLOCK_INDEX_SHIFT; // The index bits in a lookup entry
   private static final long BLOCK_LOOKUP_MASK = ~BLOCK_INDEX_MASK; // The offset bits in a lookup entry
 
-  private static final int RANK_BLOCK = 512; // The number of docIDs/bits in each rank-sub-block within a DENSE block
-  static final int RANK_BLOCK_LONGS = 512/Long.SIZE; // The number of longs making up a rank-block (8)
-  private static final int RANK_BLOCK_BITS = 9;
-  private static final int RANKS_PER_BLOCK = BLOCK/RANK_BLOCK;
-
-  private PackedInts.Reader rank;   // One every 512 docs, sparsely represented as not all blocks are DENSE
   private long[] blockCache = null; // One every 65536 docs, contains index & slice position
   private String creationStats = "";
   private final String name; // Identifier for debug, log & inspection
@@ -114,7 +86,6 @@ public class IndexedDISICache implements Accountable {
 
   private IndexedDISICache() {
     this.blockCache = null;
-    this.rank = null;
     this.name = "";
   }
 
@@ -146,45 +117,10 @@ public class IndexedDISICache implements Accountable {
         -1 : (int)(blockCache[targetBlock] >>> BLOCK_INDEX_SHIFT);
   }
 
-  /**
-   * Given a target (docID), this method returns the first docID in the entry containing the target.
-   * @param target the docID for which an index is wanted.
-   * @return the docID where the rank is known. This will be lte target.
-   */
-  // TODO: This method requires a lot of knowledge of the intrinsics of the cache. Usage should be simplified
-  int denseRankPosition(int target) {
-       return target >> RANK_BLOCK_BITS << RANK_BLOCK_BITS;
-  }
-
   public boolean hasOffsets() {
     return blockCache != null;
   }
 
-  boolean hasRank() {
-    return rank != null;
-  }
-  
-  /**
-   * Get the rank (index) for all set bits up to just before the given rankPosition in the block.
-   * The caller is responsible for deriving the count of bits up to the docID target from the rankPosition.
-   * The caller is also responsible for keeping track of set bits up to the current block.
-   * Important: This only accepts rankPositions that aligns to {@link #RANK_BLOCK} boundaries.
-   * Note 1: Use {@link #denseRankPosition(int)} to obtain a calid rankPosition for a wanted docID.
-   * Note 2: The caller should seek to the rankPosition in the underlying slice to keep everything in sync.
-   * @param rankPosition a docID target that aligns to {@link #RANK_BLOCK}.
-   * @return the rank (index / set bits count) up to just before the given rankPosition.
-   *         If rank is disabled, -1 is returned.
-   */
-  // TODO: This method requires a lot of knowledge of the intrinsics of the cache. Usage should be simplified
-  int getRankInBlock(int rankPosition) {
-    if (rank == null) {
-      return -1;
-    }
-    assert rankPosition == denseRankPosition(rankPosition);
-    int rankIndex = rankPosition >> RANK_BLOCK_BITS;
-    return rankIndex >= rank.size() ? -1 : (int) rank.get(rankIndex);
-  }
-
   private void updateCaches(IndexInput slice) throws IOException {
     final long startOffset = slice.getFilePointer();
 
@@ -199,19 +135,17 @@ public class IndexedDISICache implements Accountable {
 
     slice.seek(startOffset); // Leave it as we found it
     creationStats = String.format(Locale.ENGLISH,
-        "name=%s, blocks=%d (ALL=%d, DENSE=%d, SPARSE=%d, EMPTY=%d), time=%dms, block=%d bytes, rank=%d bytes",
+        "name=%s, blocks=%d (ALL=%d, DENSE=%d, SPARSE=%d, EMPTY=%d), time=%dms, block=%d bytes",
         name,
         largestBlock+1, statBlockALL.get(), statBlockDENSE.get(), statBlockSPARSE.get(),
         (largestBlock+1-statBlockALL.get()-statBlockDENSE.get()-statBlockSPARSE.get()),
         (System.nanoTime()-startTime)/1000000,
-        blockCache == null ? 0 : blockCache.length*Long.BYTES,
-        rank == null ? 0 : rank.ramBytesUsed());
+        blockCache == null ? 0 : blockCache.length*Long.BYTES);
   }
 
   private int fillCache(
       IndexInput slice, AtomicInteger statBlockALL, AtomicInteger statBlockDENSE, AtomicInteger statBlockSPARSE)
       throws IOException {
-    char[] buildRank = new char[256];
     int largestBlock = -1;
     long index = 0;
     int rankIndex = -1;
@@ -246,27 +180,7 @@ public class IndexedDISICache implements Accountable {
       // The block is DENSE
       statBlockDENSE.incrementAndGet();
       long nextBlockOffset = slice.getFilePointer() + (1 << 13);
-      int setBits = 0;
-      int rankOrigo = blockIndex << 16 >> 9; // Double shift for clarity: The compiler will simplify it
-      for (int rankDelta = 0 ; rankDelta < RANKS_PER_BLOCK ; rankDelta++) { // 128 rank-entries in a block
-        rankIndex = rankOrigo + rankDelta;
-        buildRank = ArrayUtil.grow(buildRank, rankIndex+1);
-        buildRank[rankIndex] = (char)setBits;
-        for (int i = 0 ; i < 512/64 ; i++) { // 8 longs for each rank-entry
-          setBits += Long.bitCount(slice.readLong());
-        }
-      }
-      assert slice.getFilePointer() == nextBlockOffset;
-    }
-    // Compress the buildRank as it is potentially very sparse
-    if (rankIndex < 0) {
-      rank = null;
-    } else {
-      PackedInts.Mutable ranks = PackedInts.getMutable(rankIndex, 16, PackedInts.DEFAULT); // Char = 16 bit
-      for (int i = 0 ; i < rankIndex ; i++) {
-        ranks.set(i, buildRank[i]);
-      }
-      rank = LongCompressor.compress(ranks);
+      slice.seek(nextBlockOffset);
     }
 
     return largestBlock;
@@ -275,7 +189,6 @@ public class IndexedDISICache implements Accountable {
   private void freezeCaches(int largestBlock) {
     if (largestBlock == -1) { // No set bit: Disable the caches
       blockCache = null;
-      rank = null;
       return;
     }
 
@@ -315,8 +228,7 @@ public class IndexedDISICache implements Accountable {
   @Override
   public long ramBytesUsed() {
     return (blockCache == null ? 0 : RamUsageEstimator.sizeOf(blockCache)) +
-        (rank == null ? 0 : rank.ramBytesUsed()) +
         RamUsageEstimator.NUM_BYTES_OBJECT_REF*3 +
         RamUsageEstimator.NUM_BYTES_OBJECT_HEADER + creationStats.length()*2;
   }
-}
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/1da6d39b/lucene/core/src/java/org/apache/lucene/codecs/lucene70/IndexedDISICacheFactory.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene70/IndexedDISICacheFactory.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene70/IndexedDISICacheFactory.java
index 7a85727..6cf8af1 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene70/IndexedDISICacheFactory.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene70/IndexedDISICacheFactory.java
@@ -118,8 +118,6 @@ public class IndexedDISICacheFactory implements Accountable {
     IndexedDISICache cache = disiPool.get(key);
     if (cache == null) {
       // TODO: Avoid overlapping builds of the same cache
-      // Both BLOCK & DENSE caches are created as they might be requested later for the field,
-      // regardless of whether they are requested now
       cache = new IndexedDISICache(slice, name);
       disiPool.put(key, cache);
     }
@@ -130,9 +128,6 @@ public class IndexedDISICacheFactory implements Accountable {
   public long getDISIBlocksWithOffsetsCount() {
     return disiPool.values().stream().filter(IndexedDISICache::hasOffsets).count();
   }
-  public long getDISIBlocksWithRankCount() {
-    return disiPool.values().stream().filter(IndexedDISICache::hasRank).count();
-  }
 
   @Override
   public long ramBytesUsed() {

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/1da6d39b/lucene/core/src/java/org/apache/lucene/codecs/lucene70/LongCompressor.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene70/LongCompressor.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene70/LongCompressor.java
deleted file mode 100644
index 1c030a7..0000000
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene70/LongCompressor.java
+++ /dev/null
@@ -1,246 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.lucene.codecs.lucene70;
-
-import org.apache.lucene.util.MathUtil;
-import org.apache.lucene.util.RankBitSet;
-import org.apache.lucene.util.packed.PackedInts;
-
-/**
- * Utility class for generating compressed read-only in-memory representations of longs.
- * The representation is optimized towards random access primarily and space secondarily.
- *
- * The representation always applies delta-to-minvalue and greatest-common-divisor compression.
- *
- * Depending on the number of 0-entries and the length of the array, a sparse representation is
- * used, using rank to improve access speed. Sparseness introduces an O(1) access time overhead.
- * Sparseness can be turned off.
- */
-public class LongCompressor {
-
-  /**
-   * The minimum amount of total values in the data set for sparse to be active.
-   */
-  private static final int DEFAULT_MIN_TOTAL_VALUES_FOR_SPARSE = 10_000;
-
-  /**
-   * The minimum total amount of zero values in the data set for sparse to be active.
-   */
-  private static final int DEFAULT_MIN_ZERO_VALUES_FOR_SPARSE = 500;
-
-  /**
-   * The minimum fraction of the data set that must be zero for sparse to be active.
-   */
-  private static final double DEFAULT_MIN_ZERO_VALUES_FRACTION_FOR_SPARSE = 0.2; // 20% (just guessing of a value here)
-
-  /**
-   * Create a compact version of the given values.
-   * @param values PackedInts with no special constraints.
-   * @return a compact version of the given values or the given values if compression did not improve on heap overhead.
-   */
-  public static PackedInts.Reader compress(PackedInts.Reader values) {
-    return compress(values, values.size());
-  }
-
-  /**
-   * Create a compact version of the given values from index 0 to length-1.
-   * @param values PackedInts with no special constraints.
-   * @param length the number of values to compress.
-   * @return a compact version of the given values or the given values if compression did not improve on heap overhead.
-   */
-  public static PackedInts.Reader compress(PackedInts.Reader values, int length) {
-    return compress(values, values.size(), true);
-  }
-
-  /**
-   * Create a compact version of the given values from index 0 to length-1.
-   * @param values PackedInts with no special constraints.
-   * @param length the number of values to compress.
-   * @param allowSparse if true and is the default limits matches the input, a sparse representation will be created.
-   * @return a compact version of the given values or the given values if compression did not improve on heap overhead.
-   */
-  public static PackedInts.Reader compress(PackedInts.Reader values, int length, boolean allowSparse) {
-    return compress(values, length, allowSparse,
-        DEFAULT_MIN_TOTAL_VALUES_FOR_SPARSE,
-        DEFAULT_MIN_ZERO_VALUES_FOR_SPARSE,
-        DEFAULT_MIN_ZERO_VALUES_FRACTION_FOR_SPARSE);
-  }
-
-  /**
-   * Create a compact version of the given values from index 0 to length-1.
-   * @param values PackedInts with no special constraints.
-   * @param length the number of values to compress.
-   * @param allowSparse if true and is the default limits matches the input, a sparse representation will be created.
-   * @param minTotalSparse the minimum absolute number of 0-entries needed for a sparse representation.
-   *                       0-entries are counted after minValue compression: {@code 3, 5, 3, 7, 16} has two 0-entries.
-   * @return a compact version of the given values or the given values if compression did not improve on heap overhead.
-   */
-  public static PackedInts.Reader compress(
-      PackedInts.Reader values, int length, boolean allowSparse,
-      int minTotalSparse, int minZeroSparse, double minZeroFractionSparse) {
-    if (length == 0) {
-      return PackedInts.getMutable(0, 1, PackedInts.DEFAULT);
-    }
-
-    final long min = getMin(values, length);
-    final long gcd = getGCD(values, length, min);
-    final long maxCompressed = getMax(values, length, min, gcd);
-
-    int zeroCount;
-    if (!isPossiblySparseCandidate(length, allowSparse, minTotalSparse) ||
-        !isSparseCandidate(values, length, true, minTotalSparse,
-            (zeroCount = countZeroes(values, length, min, gcd)), minZeroSparse, minZeroFractionSparse)) {
-      // TODO: Add abort-early if it becomes apparent that no space saving is possible
-      PackedInts.Mutable inner =
-          PackedInts.getMutable(length, PackedInts.bitsRequired(maxCompressed), PackedInts.DEFAULT);
-      for (int i = 0 ; i < length ; i++) {
-        inner.set(i, (values.get(i)-min)/gcd);
-      }
-      PackedInts.Reader comp = new CompressedReader(inner, min, gcd);
-      // Sanity check that compression worked and if not, return the original input
-      return comp.ramBytesUsed() < values.ramBytesUsed() ? comp : values;
-    }
-
-    // Sparsify
-    RankBitSet rank = new RankBitSet(length);
-    PackedInts.Mutable inner =
-        PackedInts.getMutable(values.size()-zeroCount, PackedInts.bitsRequired(maxCompressed), PackedInts.DEFAULT);
-    int valueIndex = 0;
-    for (int i = 0 ; i < length ; i++) {
-      long value = (values.get(i)-min)/gcd;
-      if (value != 0) {
-        rank.set(i);
-        inner.set(valueIndex++, value);
-      }
-    }
-    rank.buildRankCache();
-    PackedInts.Reader comp = new CompressedReader(inner, min, gcd, rank);
-    // Sanity check that compression worked and if not, return the original input
-    return comp.ramBytesUsed() < values.ramBytesUsed() ? comp : values;
-  }
-
-  // Fast check
-  private static boolean isPossiblySparseCandidate(int length, boolean allowSparse, int minTotalSparse) {
-    return allowSparse && minTotalSparse <= length;
-  }
-
-  // Also fast, but requires zeroCount which is slow to calculate
-  private static boolean isSparseCandidate(
-      PackedInts.Reader values, int length, boolean allowSparse, int minTotalSparse,
-      int zeroCount, int minZeroSparse, double minZeroFractionSparse) {
-    return allowSparse && minTotalSparse <= length &&
-        minZeroSparse < zeroCount && minZeroFractionSparse < 1.0 * zeroCount / length;
-  }
-
-  // Not very fast as is requires #length divisions.
-  private static int countZeroes(PackedInts.Reader values, int length, long min, final long gcd) {
-    int zeroCount = 0;
-    for (int i = 0 ; i < length ; i++) {
-      if ((values.get(i)-min)/gcd == 0) { // Hope the case where gcd==1 gets JITted. We could add a switch to be sure?
-        zeroCount++;
-      }
-    }
-    return zeroCount;
-  }
-
-  static class CompressedReader extends PackedInts.Reader {
-    private final PackedInts.Reader inner;
-    final long min;
-    final long gcd;
-    final RankBitSet rank;
-
-    CompressedReader(PackedInts.Reader inner, long min, long gcd) {
-      this(inner, min, gcd, null);
-    }
-
-    CompressedReader(PackedInts.Reader inner, long min, long gcd, RankBitSet rank) {
-      this.inner = inner;
-      this.min = min;
-      this.gcd = gcd;
-      this.rank = rank;
-    }
-
-    @Override
-    public int size() {
-      return rank == null ? inner.size() : rank.length();
-    }
-
-    @Override
-    public long get(int docID) {
-      // No rank: The value at the index
-      // Rank but no set bit: min*gcd
-      // Rank and set bit: (The value at the rank + min) * gcd
-      return (rank == null ? inner.get(docID) : rank.get(docID) ? inner.get(rank.rank(docID)) : 0) * gcd + min;
-    }
-
-    @Override
-    public long ramBytesUsed() {
-      return inner.ramBytesUsed() + (rank == null ? 0 : rank.ramBytesUsed());
-    }
-  }
-
-  private static long getMin(PackedInts.Reader values, int length) {
-    long min = Long.MAX_VALUE;
-    for (int i = 0 ; i < length ; i++) {
-      if (min > values.get(i)) {
-        min = values.get(i);
-      }
-    }
-    return min;
-  }
-
-  // GCD-code takes & adjusted from Lucene70DocValuesConsumer
-  private static long getGCD(final PackedInts.Reader values, final int length, final long min) {
-    long gcd = -1;
-
-    for (int i = 0 ; i < length ; i++) {
-      long value = values.get(i)-min;
-      if (value == 0) {
-        continue;
-      }
-      if (gcd == -1) {
-        gcd = value;
-        continue;
-      }
-
-      if (value < Long.MIN_VALUE / 2 || value > Long.MAX_VALUE / 2) {
-        // in that case v - minValue might overflow and make the GCD computation return
-        // wrong results. Since these extreme values are unlikely, we just discard
-        // GCD computation for them
-        gcd = 1;
-      } else { // minValue needs to be set first
-        gcd = MathUtil.gcd(gcd, value);
-      }
-
-      if (gcd == 1) {
-        break;
-      }
-    }
-    return gcd == -1 ? 1 : gcd;
-  }
-
-  private static long getMax(final PackedInts.Reader values, final int length, final long min, final long gcd) {
-    long rawMax = Long.MIN_VALUE;
-    for (int i = 0 ; i < length ; i++) {
-      long value = values.get(i);
-      if (value > rawMax) {
-        rawMax = value;
-      }
-    }
-    return (rawMax-min)/gcd;
-  }
-}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/1da6d39b/lucene/core/src/java/org/apache/lucene/util/RankBitSet.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/util/RankBitSet.java b/lucene/core/src/java/org/apache/lucene/util/RankBitSet.java
deleted file mode 100644
index 3702c63..0000000
--- a/lucene/core/src/java/org/apache/lucene/util/RankBitSet.java
+++ /dev/null
@@ -1,302 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.lucene.util;
-
-import java.io.IOException;
-import java.util.Collection;
-
-import org.apache.lucene.search.DocIdSetIterator;
-
-/**
- * Wrapper for OpenBitSet which creates and exposes a rank cache. The rank-cache scales to 2 billion bits.
- *
- * The rankCache has a long for every 2048 bits and thus has an overhead of 3.17% relative to the given bit set.
- * Performance is O(1):
- * 1 lookup in cache,
- * a maximum of 3 sums,
- * a maximum of 8 Long.bitCounts.
- *
- * Creation performance is equivalent to a full count of all set bits in the bit set O(n).
- *
- * Note: {@link #buildRankCache()} must be called once after the bit set has been created or changed and before
- * calling {@link #rank(long)}.
- *
- * The design is based heavily on the article
- * Space-Efficient, High-Performance Rank and Select Structures on Uncompressed Bit Sequences
- * by Dong Zhou, David G. Andersen, Michael Kaminsky, Carnegie Mellon University, Intel Labs
- * http://www.cs.cmu.edu/~dga/papers/zhou-sea2013.pdf
- */
-// Note: If the total number of set bits is <= 65535, a faster rank cache would be a short for every 512 bits
-// This is not available in the current implementation.
-// Extending the rank beyond 2 billion bits could be done by dividing the bitmap into blocks of 2b bits and
-// introducing yet another table with a rank-origo for each 2b-block
-public class RankBitSet extends BitSet {
-  public static final int  LOWER_BITS = 32; // Must be capable of addressing full Java array
-  public static final long LOWER_MASK = ~(~1L << (LOWER_BITS-1));
-  public static final int LOWER_OVER_BITS = 11;
-  public static final long LOWER_OVER_MASK = ~(~1L << (LOWER_OVER_BITS-1));
-  public static final int LOWER_OVER_SIZE = 2048; // Overflow bits handled by a lower block
-
-  public static final int BASIC_BITS = 10; // Needs to hold counts from 0-512 (513-1023 are never used)
-  public static final long BASIC_MASK = ~(~1L << (BASIC_BITS-1));
-  public static final int BASIC_OVER_BITS = 9;
-  public static final long BASIC_OVER_MASK = ~(~1L << (BASIC_OVER_BITS-1));
-  public static final int BASIC_OVER_SIZE = 512; // Overflow bits handled by a basic block
-  public static final int BASIC_WORDS = BASIC_OVER_SIZE /Long.SIZE; // word == long
-  /**
-   * Each entry is made up of 1 long<br/>
-   * Bits 63-32: 32 bit first-level absolute index.<br/>
-   * Bits 30+31 are unused. These could be used to signal all-set or all-unset for the block to spare a few cycles?
-   * Bits 29-0: 3 * 10 bit (0-1023) second-level relative index. Only numbers 0-512 are used.
-   */
-  private long[] rankCache = null;
-  private final FixedBitSet inner;
-  private final int wlen; // Number of words (longs with FixedBitSet) in inner
-
-  public RankBitSet(int numBits) {
-    this(new FixedBitSet(numBits));
-  }
-
-  public RankBitSet(FixedBitSet inner) {
-    this.inner = inner;
-    wlen = inner.getBits().length;
-  }
-
-  /**
-   * Must be called after bits has changed and before {@link #rank} is called.
-   */
-  public void buildRankCache() {
-    rankCache = new long[(length() >>> LOWER_OVER_BITS)+1];
-    long total = 0;
-    int lower = 0 ;
-    while (lower * LOWER_OVER_SIZE < length()) { // Full lower block processing
-      final int origoWordIndex = (lower * LOWER_OVER_SIZE) >>> 6;
-    // TODO: Some conditionals could be spared by checking once if all basic blocks are within size
-      final long basic1 = origoWordIndex + BASIC_WORDS <= wlen ?
-          BitUtil.pop_array(inner.getBits(), origoWordIndex, BASIC_WORDS) : 0;
-      final long basic2 =  origoWordIndex + BASIC_WORDS*2 <= wlen ?
-          BitUtil.pop_array(inner.getBits(), origoWordIndex + BASIC_WORDS, BASIC_WORDS) : 0;
-      final long basic3 =  origoWordIndex + BASIC_WORDS*3 <= wlen ?
-          BitUtil.pop_array(inner.getBits(), origoWordIndex + BASIC_WORDS *2, BASIC_WORDS) : 0;
-      final long basic4 =  origoWordIndex + BASIC_WORDS*4 <= wlen ?
-          BitUtil.pop_array(inner.getBits(), origoWordIndex + BASIC_WORDS *3, BASIC_WORDS) : 0;
-      rankCache[lower] = total << (Long.SIZE-LOWER_BITS) |
-           basic1 << (BASIC_BITS *2) |
-           basic2 << BASIC_BITS |
-           basic3;
-      total += basic1 + basic2 + basic3 + basic4;
-      lower++;
-    }
-  }
-
-  /**
-   * Get the rank (number of set bits up to right before the index) for the given index in O(1).
-   * @param index offset in the originating bit set.
-   * @return the rank for the index.
-   */
-  public int rank(long index) {
-    final long cache = rankCache[((int) (index >>> LOWER_OVER_BITS))];
-    // lower cache (absolute)
-    long rank = cache >>> (Long.SIZE-LOWER_BITS);
-    int startBitIndex = (int) (index & ~LOWER_OVER_MASK);
-    // basic blocks (relative)
-    if (startBitIndex < index-BASIC_OVER_SIZE) {
-      rank += (cache >>> (BASIC_BITS*2)) & BASIC_MASK;
-      startBitIndex += BASIC_OVER_SIZE;
-      if (startBitIndex < index-BASIC_OVER_SIZE) {
-        rank += (cache >>> BASIC_BITS) & BASIC_MASK;
-        startBitIndex += BASIC_OVER_SIZE;
-        if (startBitIndex < index-BASIC_OVER_SIZE) {
-          rank += cache & BASIC_MASK;
-          startBitIndex += BASIC_OVER_SIZE;
-        }
-      }
-    }
-    // long.bitcount (relative)
-    while(startBitIndex < index-Long.SIZE) {
-      rank += Long.bitCount(inner.getBits()[startBitIndex >>> 6]);
-      startBitIndex += Long.SIZE;
-    }
-    // Single bits (relative)
-    if (startBitIndex < index) {
-/*      System.out.println(String.format(Locale.ENGLISH,
-          "startBitIndex=%d, index=%d, getBits()[startBitIndex>>>6=%d]=%s, index-startBitIndex=%d, mask=%s",
-          startBitIndex, index, startBitIndex>>>6, Long.toBinaryString(getBits()[startBitIndex>>>6]),
-          index-startBitIndex, Long.toBinaryString(~(~1L << (index-startBitIndex-1)))));*/
-      rank += Long.bitCount(inner.getBits()[startBitIndex >>> 6] & ~(~1L << (index-startBitIndex-1)));
-    }
-//    for (int i = startBitIndex ; i < index ; i++) {
-//      rank += fastGet(i) ? 1 : 0;
-//    }
-    return (int) rank;
-  }
-
-  @Override
-  public long ramBytesUsed() {
-    return RamUsageEstimator.alignObjectSize(RamUsageEstimator.NUM_BYTES_OBJECT_REF*2 +
-        Integer.BYTES + Long.BYTES) +
-        inner.ramBytesUsed() +
-        (rankCache == null ? 0 :
-            RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + Long.BYTES*rankCache.length);
-  }
-
-  /* Delegations to inner bit set below */
-
-  public static FixedBitSet ensureCapacity(FixedBitSet bits, int numBits) {
-    return FixedBitSet.ensureCapacity(bits, numBits);
-  }
-
-  public static int bits2words(int numBits) {
-    return FixedBitSet.bits2words(numBits);
-  }
-
-  public static long intersectionCount(FixedBitSet a, FixedBitSet b) {
-    return FixedBitSet.intersectionCount(a, b);
-  }
-
-  public static long unionCount(FixedBitSet a, FixedBitSet b) {
-    return FixedBitSet.unionCount(a, b);
-  }
-
-  public static long andNotCount(FixedBitSet a, FixedBitSet b) {
-    return FixedBitSet.andNotCount(a, b);
-  }
-
-  @Override
-  public int length() {
-    return inner.length();
-  }
-
-  public long[] getBits() {
-    return inner.getBits();
-  }
-
-  @Override
-  public int cardinality() {
-    return inner.cardinality();
-  }
-
-  @Override
-  public boolean get(int index) {
-    return inner.get(index);
-  }
-
-  @Override
-  public void set(int index) {
-    inner.set(index);
-  }
-
-  public boolean getAndSet(int index) {
-    return inner.getAndSet(index);
-  }
-
-  @Override
-  public void clear(int index) {
-    inner.clear(index);
-  }
-
-  public boolean getAndClear(int index) {
-    return inner.getAndClear(index);
-  }
-
-  @Override
-  public int nextSetBit(int index) {
-    return inner.nextSetBit(index);
-  }
-
-  @Override
-  public int prevSetBit(int index) {
-    return inner.prevSetBit(index);
-  }
-
-  @Override
-  public void or(DocIdSetIterator iter) throws IOException {
-    inner.or(iter);
-  }
-
-  public void or(FixedBitSet other) {
-    inner.or(other);
-  }
-
-  public void xor(FixedBitSet other) {
-    inner.xor(other);
-  }
-
-  public void xor(DocIdSetIterator iter) throws IOException {
-    inner.xor(iter);
-  }
-
-  public boolean intersects(FixedBitSet other) {
-    return inner.intersects(other);
-  }
-
-  public void and(FixedBitSet other) {
-    inner.and(other);
-  }
-
-  public void andNot(FixedBitSet other) {
-    inner.andNot(other);
-  }
-
-  public boolean scanIsEmpty() {
-    return inner.scanIsEmpty();
-  }
-
-  public void flip(int startIndex, int endIndex) {
-    inner.flip(startIndex, endIndex);
-  }
-
-  public void flip(int index) {
-    inner.flip(index);
-  }
-
-  public void set(int startIndex, int endIndex) {
-    inner.set(startIndex, endIndex);
-  }
-
-  @Override
-  public void clear(int startIndex, int endIndex) {
-    inner.clear(startIndex, endIndex);
-  }
-
-  @Override
-  public int hashCode() {
-    return inner.hashCode();
-  }
-
-  public static FixedBitSet copyOf(Bits bits) {
-    return FixedBitSet.copyOf(bits);
-  }
-
-  public Bits asReadOnlyBits() {
-    return inner.asReadOnlyBits();
-  }
-
-  public static BitSet of(DocIdSetIterator it, int maxDoc) throws IOException {
-    return BitSet.of(it, maxDoc);
-  }
-
-  @Override
-  public int approximateCardinality() {
-    return inner.approximateCardinality();
-  }
-
-  @Override
-  public Collection<Accountable> getChildResources() {
-    return inner.getChildResources();
-  }
-}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/1da6d39b/lucene/core/src/test/org/apache/lucene/codecs/lucene70/TestLongCompressor.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/test/org/apache/lucene/codecs/lucene70/TestLongCompressor.java b/lucene/core/src/test/org/apache/lucene/codecs/lucene70/TestLongCompressor.java
deleted file mode 100644
index bb87dc4..0000000
--- a/lucene/core/src/test/org/apache/lucene/codecs/lucene70/TestLongCompressor.java
+++ /dev/null
@@ -1,87 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.lucene.codecs.lucene70;
-
-import java.util.Locale;
-import java.util.Random;
-
-import org.apache.lucene.util.LuceneTestCase;
-import org.apache.lucene.util.packed.PackedInts;
-
-public class TestLongCompressor extends LuceneTestCase {
-
-  // Simple money test that builds a collection of random longs, compresses them with LongCompressor and
-  // checks if all values from the compressed version are equal to the source
-  public void testLongCompressorMonkey() {
-    final int RUNS = 10;
-    final int[] MAX_SIZES = new int[]{0, 1, 10, 1000, 100_000};
-    final int[] MAX_VALUE = new int[]{0, 1, 10, 1000, 100_000};
-    for (int run = 0 ; run < RUNS ; run++) {
-      for (int maxSize: MAX_SIZES) {
-        int size = maxSize == 0 ? 0 : random().nextInt(maxSize);
-        for (int maxValue: MAX_VALUE) {
-          int minValue = maxValue == 0 ? 0 : random().nextInt(maxValue);
-          double minChance = random().nextDouble();
-          longCompressorMonkeyTest(run, size, minValue, maxValue, minChance, random().nextLong());
-        }
-      }
-    }
-  }
-
-  public void testVerySparse() {
-    final int SIZE = 674932;
-    final int EVERY = SIZE/896;
-    PackedInts.Mutable ranks = PackedInts.getMutable(674932, 16, PackedInts.DEFAULT);
-    for (int i = 0 ; i < SIZE; i+=EVERY) {
-      ranks.set(i, random().nextInt(65535));
-    }
-    PackedInts.Reader sparsed = LongCompressor.compress(ranks);
-    assertFalse("The input and the sparsed should not be the same", ranks == sparsed);
-  }
-
-  private void longCompressorMonkeyTest(
-      int run, int size, int minValue, int maxValue, double minChance, long randomSeed) {
-    final String description = String.format(Locale.ENGLISH,
-        "run=%d, size=%d, minValue=%d, maxValue=%d, minChance=%1.2f, seed=%d",
-        run, size, minValue, maxValue, minChance, randomSeed);
-    Random innerRandom = new Random(randomSeed);
-    PackedInts.Mutable expected = PackedInts.getMutable(size, PackedInts.bitsRequired(maxValue), PackedInts.DEFAULT);
-    for (int i = 0 ; i < size ; i++) {
-      if (innerRandom.nextDouble() <= minChance) {
-        continue;
-      }
-      expected.set(i, maxValue-minValue == 0 ? minValue : innerRandom.nextInt(maxValue-minValue)+minValue);
-    }
-    assertSparseValid(description, expected);
-  }
-
-  private void assertSparseValid(String description, PackedInts.Reader values) {
-    try {
-      PackedInts.Reader sparsed = LongCompressor.compress(values, values.size());
-
-      for (int i = 0; i < values.size(); i++) {
-        assertEquals("The value at index " + i + " should be as expected for " + description,
-            values.get(i), sparsed.get(i));
-      }
-    } catch (Exception e) {
-      throw new RuntimeException("Unexpected Exception for " + description, e);
-    }
-  }
-
-
-}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/1da6d39b/lucene/core/src/test/org/apache/lucene/util/TestRankBitSet.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/test/org/apache/lucene/util/TestRankBitSet.java b/lucene/core/src/test/org/apache/lucene/util/TestRankBitSet.java
deleted file mode 100644
index 677cc22..0000000
--- a/lucene/core/src/test/org/apache/lucene/util/TestRankBitSet.java
+++ /dev/null
@@ -1,107 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.lucene.util;
-
-import java.util.Locale;
-import java.util.Random;
-
-public class TestRankBitSet extends LuceneTestCase {
-
-  public void testSingleWord() {
-    RankBitSet rank = new RankBitSet(60);
-    rank.set(20);
-    rank.buildRankCache();
-
-    assertEquals("The rank at index 20 should be correct", 0, rank.rank(20));
-    assertEquals("The rank at index 21 should be correct", 1, rank.rank(21));
-  }
-
-  public void testSecondWord() {
-    RankBitSet rank = new RankBitSet(100);
-    rank.set(70);
-    rank.buildRankCache();
-
-    assertEquals("The rank at index 70 should be correct", 0, rank.rank(70));
-    assertEquals("The rank at index 71 should be correct", 1, rank.rank(71));
-  }
-
-  public void testThirdWord() {
-    RankBitSet rank = new RankBitSet(200);
-    rank.set(130);
-    rank.buildRankCache();
-
-    assertEquals("The rank at index 130 should be correct", 0, rank.rank(130));
-    assertEquals("The rank at index 131 should be correct", 1, rank.rank(131));
-  }
-
-  public void testSecondLower() {
-    RankBitSet rank = new RankBitSet(3000);
-    rank.set(2500);
-    rank.buildRankCache();
-
-    assertEquals("The rank at index 2500 should be correct", 0, rank.rank(2500));
-    assertEquals("The rank at index 2500 should be correct", 1, rank.rank(2501));
-  }
-
-  public void testSpecific282() {
-    RankBitSet rank = new RankBitSet(448);
-    rank.set(282);
-    rank.buildRankCache();
-
-    assertEquals("The rank at index 288 should be correct", 1, rank.rank(288));
-  }
-
-  public void testSpecific1031() {
-    RankBitSet rank = new RankBitSet(1446);
-    rank.set(1031);
-    rank.buildRankCache();
-
-    assertEquals("The rank at index 1057 should be correct", 1, rank.rank(1057));
-  }
-
-  public void testMonkeys() {
-    monkey(20, 8000, 40);
-  }
-
-  @Slow
-  public void testManyMonkeys() {
-    monkey(20, 100000, 400);
-  }
-
-  public void monkey(int runs, int sizeMax, int setMax) {
-    Random random = random();
-    //Random random = new Random(87);
-    for (int run = 0 ; run < runs ; run++) {
-      final int size = random.nextInt(sizeMax-1)+1;
-      RankBitSet rank = new RankBitSet(size);
-      int doSet = random.nextInt(setMax);
-      for (int s = 0 ; s < doSet ; s++) {
-        int index = random.nextInt(size);
-        rank.set(index);
-      }
-      rank.buildRankCache();
-      int setbits = 0;
-      for (int i = 0 ; i < size ; i++) {
-        assertEquals(String.format(Locale.ENGLISH, "run=%d, index=%d/%d, setbits=%d", run, i, size, setbits),
-            setbits, rank.rank(i));
-        if (rank.get(i)) {
-          setbits++;
-        }
-      }
-    }
-  }
-}

[3/5] lucene-solr:master: Revert "LUCENE-8374 part 3/4: Reduce reads for sparse DocValues". LUCENE-8374 was committed without consensus and is expected to be superseded by LUCENE-8585.

Posted by to...@apache.org.

Revert "LUCENE-8374 part 3/4: Reduce reads for sparse DocValues".
LUCENE-8374 was committed without consensus and is expected to be superseded by LUCENE-8585.

This reverts commit 7949b98f802c9ab3a588a33cdb1771b83c9fcafb.


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/6c5d87a5
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/6c5d87a5
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/6c5d87a5

Branch: refs/heads/master
Commit: 6c5d87a505e4ed5803c41c900a374791bb033a0b
Parents: 3158d0c
Author: Toke Eskildsen <to...@apache.org>
Authored: Tue Dec 11 14:14:07 2018 +0100
Committer: Toke Eskildsen <to...@apache.org>
Committed: Tue Dec 11 14:14:07 2018 +0100

----------------------------------------------------------------------
 .../lucene/codecs/lucene70/IndexedDISI.java     |  49 ------
 .../codecs/lucene70/IndexedDISICache.java       |   2 +-
 .../lucene70/IndexedDISICacheFactory.java       |  90 +---------
 .../lucene70/Lucene70DocValuesProducer.java     | 165 +++++++++++--------
 .../lucene/codecs/lucene70/TestIndexedDISI.java |  36 ++--
 .../org/apache/lucene/index/TestDocValues.java  |   4 +-
 6 files changed, 114 insertions(+), 232 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/6c5d87a5/lucene/core/src/java/org/apache/lucene/codecs/lucene70/IndexedDISI.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene70/IndexedDISI.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene70/IndexedDISI.java
index 5eaefbc..114710e 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene70/IndexedDISI.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene70/IndexedDISI.java
@@ -303,9 +303,6 @@ final class IndexedDISI extends DocIdSetIterator {
         final int targetInBlock = target & 0xFFFF;
         final int targetWordIndex = targetInBlock >>> 6;
 
-        // If possible, skip ahead using the rank cache
-        disi.rankSkip(disi, target);
-
         for (int i = disi.wordIndex + 1; i <= targetWordIndex; ++i) {
           disi.word = disi.slice.readLong();
           disi.numberOfOnes += Long.bitCount(disi.word);
@@ -340,9 +337,6 @@ final class IndexedDISI extends DocIdSetIterator {
         final int targetInBlock = target & 0xFFFF;
         final int targetWordIndex = targetInBlock >>> 6;
 
-        // If possible, skip ahead using the rank cache
-        disi.rankSkip(disi, target);
-
         for (int i = disi.wordIndex + 1; i <= targetWordIndex; ++i) {
           disi.word = disi.slice.readLong();
           disi.numberOfOnes += Long.bitCount(disi.word);
@@ -379,47 +373,4 @@ final class IndexedDISI extends DocIdSetIterator {
     abstract boolean advanceExactWithinBlock(IndexedDISI disi, int target) throws IOException;
   }
 
-  /**
-   * If the distance between the current position and the target is > 8 words, the rank cache will
-   * be used to guarantee a worst-case of 1 rank-lookup and 7 word-read-and-count-bits operations.
-   * Note: This does not guarantee a skip up to target, only up to nearest rank boundary. It is the
-   * responsibility of the caller to iterate further to reach target.
-   * @param disi standard DISI.
-   * @param target the wanted docID for which to calculate set-flag and index.
-   * @throws IOException if a disi seek failed.
-   */
-  private void rankSkip(IndexedDISI disi, int target) throws IOException {
-    final int targetInBlock = target & 0xFFFF;
-    final int targetWordIndex = targetInBlock >>> 6;
-
-    // If the distance between the current position and the target is >= 8
-    // then it pays to use the rank to jump
-    if (!(disi.cache.hasRank() && targetWordIndex - disi.wordIndex >= IndexedDISICache.RANK_BLOCK_LONGS)) {
-      return;
-    }
-
-    int rankPos = disi.cache.denseRankPosition(target);
-    if (rankPos == -1) {
-      return;
-    }
-    int rank = disi.cache.getRankInBlock(rankPos);
-    if (rank == -1) {
-      return;
-    }
-    int rankIndex = disi.denseOrigoIndex + rank;
-    int rankWordIndex = (rankPos & 0xFFFF) >> 6;
-    long rankOffset = disi.blockStart + 4 + (rankWordIndex * 8);
-
-    long mark = disi.slice.getFilePointer();
-    disi.slice.seek(rankOffset);
-    long rankWord = disi.slice.readLong();
-    int rankNOO = rankIndex + Long.bitCount(rankWord);
-    rankOffset += Long.BYTES;
-
-
-    //disi.slice.seek(mark);
-    disi.wordIndex = rankWordIndex;
-    disi.word = rankWord;
-    disi.numberOfOnes = rankNOO;
-  }
 }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/6c5d87a5/lucene/core/src/java/org/apache/lucene/codecs/lucene70/IndexedDISICache.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene70/IndexedDISICache.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene70/IndexedDISICache.java
index fdf93cf..be0beb7 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene70/IndexedDISICache.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene70/IndexedDISICache.java
@@ -319,4 +319,4 @@ public class IndexedDISICache implements Accountable {
         RamUsageEstimator.NUM_BYTES_OBJECT_REF*3 +
         RamUsageEstimator.NUM_BYTES_OBJECT_HEADER + creationStats.length()*2;
   }
-}
\ No newline at end of file
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/6c5d87a5/lucene/core/src/java/org/apache/lucene/codecs/lucene70/IndexedDISICacheFactory.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene70/IndexedDISICacheFactory.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene70/IndexedDISICacheFactory.java
index 610529a..7a85727 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene70/IndexedDISICacheFactory.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene70/IndexedDISICacheFactory.java
@@ -44,8 +44,6 @@ public class IndexedDISICacheFactory implements Accountable {
 
   // jump-table and rank for DISI blocks
   private final Map<Long, IndexedDISICache> disiPool = new HashMap<>();
-  // jump-table for numerics with variable bits per value (dates, longs...)
-  private final Map<String, VaryingBPVJumpTable> vBPVPool = new HashMap<>();
 
   /**
    * Create a cached {@link IndexedDISI} instance.
@@ -77,24 +75,6 @@ public class IndexedDISICacheFactory implements Accountable {
   }
 
   /**
-   * Creates a cache (jump table) for variable bits per value numerics and returns it.
-   * If the cache has previously been created, the old cache is returned.
-   * @param name the name for the cache, typically the field name. Used as key for later retrieval.
-   * @param slice the long values with varying bits per value.
-   * @param valuesLength the length in bytes of the slice.
-   * @return a jump table for the longs in the given slice or null if the structure is not suitable for caching.
-   */
-  VaryingBPVJumpTable getVBPVJumpTable(String name, RandomAccessInput slice, long valuesLength) throws IOException {
-    VaryingBPVJumpTable jumpTable = vBPVPool.get(name);
-    if (jumpTable == null) {
-      // TODO: Avoid overlapping builds of the same jump table for performance reasons
-      jumpTable = new VaryingBPVJumpTable(slice, name, valuesLength);
-      vBPVPool.put(name, jumpTable);
-    }
-    return jumpTable;
-  }
-
-  /**
    * Creates a cache (jump table) for {@link IndexedDISI}.
    * If the cache has previously been created, the old cache is returned.
    * @param data   the slice to create a cache for.
@@ -153,26 +133,16 @@ public class IndexedDISICacheFactory implements Accountable {
   public long getDISIBlocksWithRankCount() {
     return disiPool.values().stream().filter(IndexedDISICache::hasRank).count();
   }
-  public long getVaryingBPVCount() {
-    return vBPVPool.size();
-  }
 
   @Override
   public long ramBytesUsed() {
     long mem = RamUsageEstimator.shallowSizeOf(this) +
-        RamUsageEstimator.shallowSizeOf(disiPool) +
-        RamUsageEstimator.shallowSizeOf(vBPVPool);
+        RamUsageEstimator.shallowSizeOf(disiPool);
     for (Map.Entry<Long, IndexedDISICache> cacheEntry: disiPool.entrySet()) {
       mem += RamUsageEstimator.shallowSizeOf(cacheEntry);
       mem += RamUsageEstimator.sizeOf(cacheEntry.getKey());
       mem += cacheEntry.getValue().ramBytesUsed();
     }
-    for (Map.Entry<String, VaryingBPVJumpTable> cacheEntry: vBPVPool.entrySet()) {
-      String key = cacheEntry.getKey();
-      mem += RamUsageEstimator.shallowSizeOf(cacheEntry);
-      mem += RamUsageEstimator.shallowSizeOf(key)+key.length()*2;
-      mem += cacheEntry.getValue().ramBytesUsed();
-    }
     return mem;
   }
 
@@ -181,63 +151,5 @@ public class IndexedDISICacheFactory implements Accountable {
    */
   void releaseAll() {
     disiPool.clear();
-    vBPVPool.clear();
-  }
-
-  /**
-   * Jump table used by Lucene70DocValuesProducer.VaryingBPVReader to avoid iterating all blocks from
-   * current to wanted index for numerics with variable bits per value. The jump table holds offsets for all blocks.
-   */
-  public static class VaryingBPVJumpTable implements Accountable {
-    // TODO: It is way overkill to use longs here for practically all indexes. Maybe a PackedInts representation?
-    long[] offsets = new long[10];
-    final String creationStats;
-
-    VaryingBPVJumpTable(RandomAccessInput slice, String name, long valuesLength) throws IOException {
-      final long startTime = System.nanoTime();
-
-      int block = -1;
-      long offset;
-      long blockEndOffset = 0;
-
-      int bitsPerValue;
-      do {
-        offset = blockEndOffset;
-
-        offsets = ArrayUtil.grow(offsets, block+2); // No-op if large enough
-        offsets[block+1] = offset;
-
-        bitsPerValue = slice.readByte(offset++);
-        offset += Long.BYTES; // Skip over delta as we do not resolve the values themselves at this point
-        if (bitsPerValue == 0) {
-          blockEndOffset = offset;
-        } else {
-          final int length = slice.readInt(offset);
-          offset += Integer.BYTES;
-          blockEndOffset = offset + length;
-        }
-        block++;
-      } while (blockEndOffset < valuesLength-Byte.BYTES);
-      offsets = ArrayUtil.copyOfSubArray(offsets, 0, block+1);
-      creationStats = String.format(Locale.ENGLISH,
-          "name=%s, blocks=%d, time=%dms",
-          name, offsets.length, (System.nanoTime()-startTime)/1000000);
-    }
-
-    /**
-     * @param block the logical block in the vBPV structure ( valueindex/16384 ).
-     * @return the index slice offset for the vBPV block (1 block = 16384 values) or -1 if not available.
-     */
-    long getBlockOffset(long block) {
-      // Technically a limitation in caching vs. VaryingBPVReader to limit to 2b blocks of 16K values
-      return offsets[(int) block];
-    }
-
-    @Override
-    public long ramBytesUsed() {
-      return RamUsageEstimator.shallowSizeOf(this) +
-          (offsets == null ? 0 : RamUsageEstimator.sizeOf(offsets)) + // offsets
-          RamUsageEstimator.NUM_BYTES_OBJECT_HEADER + creationStats.length()*2;  // creationStats
-    }
   }
 }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/6c5d87a5/lucene/core/src/java/org/apache/lucene/codecs/lucene70/Lucene70DocValuesProducer.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene70/Lucene70DocValuesProducer.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene70/Lucene70DocValuesProducer.java
index 9b2b9e0..812caba 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene70/Lucene70DocValuesProducer.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene70/Lucene70DocValuesProducer.java
@@ -465,18 +465,44 @@ final class Lucene70DocValuesProducer extends DocValuesProducer implements Close
           }
         };
       } else {
+        final RandomAccessInput slice = data.randomAccessSlice(entry.valuesOffset, entry.valuesLength);
         if (entry.blockShift >= 0) {
           // dense but split into blocks of different bits per value
+          final int shift = entry.blockShift;
+          final long mul = entry.gcd;
+          final int mask = (1 << shift) - 1;
           return new DenseNumericDocValues(maxDoc) {
-            final VaryingBPVReader vBPVReader = new VaryingBPVReader(entry);
+            int block = -1;
+            long delta;
+            long offset;
+            long blockEndOffset;
+            LongValues values;
 
             @Override
             public long longValue() throws IOException {
-              return vBPVReader.getLongValue(doc);
+              final int block = doc >>> shift;
+              if (this.block != block) {
+                int bitsPerValue;
+                do {
+                  offset = blockEndOffset;
+                  bitsPerValue = slice.readByte(offset++);
+                  delta = slice.readLong(offset);
+                  offset += Long.BYTES;
+                  if (bitsPerValue == 0) {
+                    blockEndOffset = offset;
+                  } else {
+                    final int length = slice.readInt(offset);
+                    offset += Integer.BYTES;
+                    blockEndOffset = offset + length;
+                  }
+                  this.block ++;
+                } while (this.block != block);
+                values = bitsPerValue == 0 ? LongValues.ZEROES : DirectReader.getInstance(slice, bitsPerValue, offset);
+              }
+              return mul * values.get(doc & mask) + delta;
             }
           };
         } else {
-          final RandomAccessInput slice = data.randomAccessSlice(entry.valuesOffset, entry.valuesLength);
           final LongValues values = DirectReader.getInstance(slice, entry.bitsPerValue);
           if (entry.table != null) {
             final long[] table = entry.table;
@@ -510,19 +536,45 @@ final class Lucene70DocValuesProducer extends DocValuesProducer implements Close
           }
         };
       } else {
+        final RandomAccessInput slice = data.randomAccessSlice(entry.valuesOffset, entry.valuesLength);
         if (entry.blockShift >= 0) {
           // sparse and split into blocks of different bits per value
+          final int shift = entry.blockShift;
+          final long mul = entry.gcd;
+          final int mask = (1 << shift) - 1;
           return new SparseNumericDocValues(disi) {
-            final VaryingBPVReader vBPVReader = new VaryingBPVReader(entry);
+            int block = -1;
+            long delta;
+            long offset;
+            long blockEndOffset;
+            LongValues values;
 
             @Override
             public long longValue() throws IOException {
               final int index = disi.index();
-              return vBPVReader.getLongValue(index);
+              final int block = index >>> shift;
+              if (this.block != block) {
+                int bitsPerValue;
+                do {
+                  offset = blockEndOffset;
+                  bitsPerValue = slice.readByte(offset++);
+                  delta = slice.readLong(offset);
+                  offset += Long.BYTES;
+                  if (bitsPerValue == 0) {
+                    blockEndOffset = offset;
+                  } else {
+                    final int length = slice.readInt(offset);
+                    offset += Integer.BYTES;
+                    blockEndOffset = offset + length;
+                  }
+                  this.block ++;
+                } while (this.block != block);
+                values = bitsPerValue == 0 ? LongValues.ZEROES : DirectReader.getInstance(slice, bitsPerValue, offset);
+              }
+              return mul * values.get(index & mask) + delta;
             }
           };
         } else {
-          final RandomAccessInput slice = data.randomAccessSlice(entry.valuesOffset, entry.valuesLength);
           final LongValues values = DirectReader.getInstance(slice, entry.bitsPerValue);
           if (entry.table != null) {
             final long[] table = entry.table;
@@ -556,20 +608,47 @@ final class Lucene70DocValuesProducer extends DocValuesProducer implements Close
         }
       };
     } else {
+      final RandomAccessInput slice = data.randomAccessSlice(entry.valuesOffset, entry.valuesLength);
       if (entry.blockShift >= 0) {
+        final int shift = entry.blockShift;
+        final long mul = entry.gcd;
+        final long mask = (1L << shift) - 1;
         return new LongValues() {
-          final VaryingBPVReader vBPVReader = new VaryingBPVReader(entry);
-          @Override
+          long block = -1;
+          long delta;
+          long offset;
+          long blockEndOffset;
+          LongValues values;
+
           public long get(long index) {
-            try {
-              return vBPVReader.getLongValue(index);
-            } catch (IOException e) {
-              throw new RuntimeException(e);
+            final long block = index >>> shift;
+            if (this.block != block) {
+              assert block > this.block : "Reading backwards is illegal: " + this.block + " < " + block;
+              int bitsPerValue;
+              do {
+                offset = blockEndOffset;
+                try {
+                  bitsPerValue = slice.readByte(offset++);
+                  delta = slice.readLong(offset);
+                  offset += Long.BYTES;
+                  if (bitsPerValue == 0) {
+                    blockEndOffset = offset;
+                  } else {
+                    final int length = slice.readInt(offset);
+                    offset += Integer.BYTES;
+                    blockEndOffset = offset + length;
+                  }
+                } catch (IOException e) {
+                  throw new RuntimeException(e);
+                }
+                this.block ++;
+              } while (this.block != block);
+              values = bitsPerValue == 0 ? LongValues.ZEROES : DirectReader.getInstance(slice, bitsPerValue, offset);
             }
+            return mul * values.get(index & mask) + delta;
           }
         };
       } else {
-        final RandomAccessInput slice = data.randomAccessSlice(entry.valuesOffset, entry.valuesLength);
         final LongValues values = DirectReader.getInstance(slice, entry.bitsPerValue);
         if (entry.table != null) {
           final long[] table = entry.table;
@@ -1378,64 +1457,4 @@ final class Lucene70DocValuesProducer extends DocValuesProducer implements Close
     CodecUtil.checksumEntireFile(data);
   }
 
-  /**
-   * Reader for longs split into blocks of different bits per values.
-   * The longs are requested by index and must be accessed in monotonically increasing order.
-   */
-  // Note: The order requirement could be removed as the jump-tables allow for backwards iteration.
-  private class VaryingBPVReader {
-    final RandomAccessInput slice;
-    final NumericEntry entry;
-    final int shift;
-    final long mul;
-    final int mask;
-
-    long block = -1;
-    long delta;
-    long offset;
-    long blockEndOffset;
-    LongValues values;
-
-    VaryingBPVReader(NumericEntry entry) throws IOException {
-      this.entry = entry;
-      slice = data.randomAccessSlice(entry.valuesOffset, entry.valuesLength);
-      shift = entry.blockShift;
-      mul = entry.gcd;
-      mask = (1 << shift) - 1;
-    }
-
-    long getLongValue(long index) throws IOException {
-      final long block = index >>> shift;
-      if (this.block != block) {
-        int bitsPerValue;
-        do {
-          // If the needed block is the one directly following the current block, it is cheaper to avoid the cache
-          if (block != this.block+1) {
-            IndexedDISICacheFactory.VaryingBPVJumpTable cache;
-            if ((cache = disiCacheFactory.getVBPVJumpTable(entry.name, slice, entry.valuesLength)) != null) {
-              long candidateOffset;
-              if ((candidateOffset = cache.getBlockOffset(block)) != -1) {
-                blockEndOffset = candidateOffset;
-                this.block = block - 1;
-              }
-            }
-          }
-          offset = blockEndOffset;
-          bitsPerValue = slice.readByte(offset++);
-          delta = slice.readLong(offset);
-          offset += Long.BYTES;
-          if (bitsPerValue == 0) {
-            blockEndOffset = offset;
-          } else {
-            final int length = slice.readInt(offset);
-            offset += Integer.BYTES;
-            blockEndOffset = offset + length;
-          }
-          this.block++;
-        } while (this.block != block);
-        values = bitsPerValue == 0 ? LongValues.ZEROES : DirectReader.getInstance(slice, bitsPerValue, offset);
-      }
-      return mul * values.get(index & mask) + delta;
-    }
-  }
 }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/6c5d87a5/lucene/core/src/test/org/apache/lucene/codecs/lucene70/TestIndexedDISI.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/test/org/apache/lucene/codecs/lucene70/TestIndexedDISI.java b/lucene/core/src/test/org/apache/lucene/codecs/lucene70/TestIndexedDISI.java
index 5201aad..aae3a7f 100644
--- a/lucene/core/src/test/org/apache/lucene/codecs/lucene70/TestIndexedDISI.java
+++ b/lucene/core/src/test/org/apache/lucene/codecs/lucene70/TestIndexedDISI.java
@@ -246,26 +246,26 @@ public class TestIndexedDISI extends LuceneTestCase {
 
   private void assertAdvanceExactRandomized(IndexedDISI disi, BitSetIterator disi2, int disi2length, int step)
       throws IOException {
-    int index = -1;
+        int index = -1;
     for (int target = 0; target < disi2length; ) {
-      target += TestUtil.nextInt(random(), 0, step);
-      int doc = disi2.docID();
-      while (doc < target) {
-        doc = disi2.nextDoc();
-        index++;
-      }
+          target += TestUtil.nextInt(random(), 0, step);
+          int doc = disi2.docID();
+          while (doc < target) {
+            doc = disi2.nextDoc();
+            index++;
+          }
 
-      boolean exists = disi.advanceExact(target);
-      assertEquals(doc == target, exists);
-      if (exists) {
-        assertEquals(index, disi.index());
-      } else if (random().nextBoolean()) {
-        assertEquals(doc, disi.nextDoc());
-        assertEquals(index, disi.index());
-        target = doc;
+          boolean exists = disi.advanceExact(target);
+          assertEquals(doc == target, exists);
+          if (exists) {
+            assertEquals(index, disi.index());
+          } else if (random().nextBoolean()) {
+            assertEquals(doc, disi.nextDoc());
+            assertEquals(index, disi.index());
+            target = doc;
+          }
+        }
       }
-    }
-  }
 
   private void assertSingleStepEquality(IndexedDISI disi, BitSetIterator disi2) throws IOException {
     int i = 0;
@@ -274,7 +274,7 @@ public class TestIndexedDISI extends LuceneTestCase {
       assertEquals(i++, disi.index());
     }
     assertEquals(DocIdSetIterator.NO_MORE_DOCS, disi.nextDoc());
-  }
+    }
 
   private void assertAdvanceEquality(IndexedDISI disi, BitSetIterator disi2, int step) throws IOException {
     int index = -1;

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/6c5d87a5/lucene/core/src/test/org/apache/lucene/index/TestDocValues.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/test/org/apache/lucene/index/TestDocValues.java b/lucene/core/src/test/org/apache/lucene/index/TestDocValues.java
index 98b7d9e..ebc045a 100644
--- a/lucene/core/src/test/org/apache/lucene/index/TestDocValues.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestDocValues.java
@@ -228,8 +228,8 @@ public class TestDocValues extends LuceneTestCase {
     }
     dir.close();
   }
-
-  /**
+  
+  /** 
    * field with binary docvalues
    */
   public void testBinaryField() throws Exception {

[5/5] lucene-solr:master: Revert "LUCENE-8374 part 1/4: Reduce reads for sparse DocValues". LUCENE-8374 was committed without consensus and is expected to be superseded by LUCENE-8585.

Posted by to...@apache.org.

Revert "LUCENE-8374 part 1/4: Reduce reads for sparse DocValues".
LUCENE-8374 was committed without consensus and is expected to be superseded by LUCENE-8585.

This reverts commit 58a7a8ada5cebeb261060c56cd6d0a9446478bf6.


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/8a20705b
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/8a20705b
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/8a20705b

Branch: refs/heads/master
Commit: 8a20705b82272352ffcef8a18a7e8f96b2c05a7b
Parents: 1da6d39
Author: Toke Eskildsen <to...@apache.org>
Authored: Tue Dec 11 14:17:57 2018 +0100
Committer: Toke Eskildsen <to...@apache.org>
Committed: Tue Dec 11 14:17:57 2018 +0100

----------------------------------------------------------------------
 lucene/CHANGES.txt                              |   3 -
 .../lucene/codecs/lucene70/IndexedDISI.java     |  65 +-----
 .../codecs/lucene70/IndexedDISICache.java       | 234 -------------------
 .../lucene70/IndexedDISICacheFactory.java       | 150 ------------
 .../lucene70/Lucene70DocValuesProducer.java     |  85 ++-----
 .../lucene/codecs/lucene70/TestIndexedDISI.java |  91 ++------
 .../org/apache/lucene/index/TestDocValues.java  | 105 ---------
 7 files changed, 49 insertions(+), 684 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/8a20705b/lucene/CHANGES.txt
----------------------------------------------------------------------
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 22e3e61..212607f 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -220,9 +220,6 @@ Optimizations
   to early terminate the iterator if the minimum score is greater than the constant
   score. (Christophe Bismuth via Jim Ferenczi)
 
-* LUCENE-8374: Reduce reads for sparse DocValues and whole number numeric DocValues.
-  (Toke Eskildsen)
-
 ======================= Lucene 7.7.0 =======================
 
 Build

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/8a20705b/lucene/core/src/java/org/apache/lucene/codecs/lucene70/IndexedDISI.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene70/IndexedDISI.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene70/IndexedDISI.java
index 114710e..6138896 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene70/IndexedDISI.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene70/IndexedDISI.java
@@ -50,9 +50,6 @@ import org.apache.lucene.util.RoaringDocIdSet;
 final class IndexedDISI extends DocIdSetIterator {
 
   static final int MAX_ARRAY_LENGTH = (1 << 12) - 1;
-  static final String NO_NAME = "n/a";
-
-  public final String name;
 
   private static void flush(int block, FixedBitSet buffer, int cardinality, IndexOutput out) throws IOException {
     assert block >= 0 && block < 65536;
@@ -101,49 +98,19 @@ final class IndexedDISI extends DocIdSetIterator {
   /** The slice that stores the {@link DocIdSetIterator}. */
   private final IndexInput slice;
   private final long cost;
-  private final IndexedDISICache cache;
 
   IndexedDISI(IndexInput in, long offset, long length, long cost) throws IOException {
-    this(in, offset, length, cost, NO_NAME);
-  }
-
-  IndexedDISI(IndexInput in, long offset, long length, long cost, String name) throws IOException {
-    this(in, offset, length, cost, null, name);
-  }
-
-  IndexedDISI(IndexInput in, long offset, long length, long cost, IndexedDISICache cache) throws IOException {
-    this(in, offset, length, cost, cache, NO_NAME);
-  }
-
-  IndexedDISI(IndexInput in, long offset, long length, long cost, IndexedDISICache cache, String name) throws IOException {
-    this(in.slice("docs", offset, length), cost, cache, name);
-  }
-
-  IndexedDISI(IndexInput slice, long cost) throws IOException {
-    this(slice, cost, NO_NAME);
-  }
-  // This constructor allows to pass the slice directly in case it helps reuse
-  // see eg. Lucene70 norms producer's merge instance
-  IndexedDISI(IndexInput slice, long cost, String name) throws IOException {
-    this(slice, cost, null, name);
-//    IndexedDISICacheFactory.debug(
-//        "Non-cached direct slice IndexedDISI with length " + slice.length() + ": " + slice.toString());
+    this(in.slice("docs", offset, length), cost);
   }
 
-  IndexedDISI(IndexInput slice, long cost, IndexedDISICache cache) throws IOException {
-    this(slice, cost, cache, NO_NAME);
-  }
   // This constructor allows to pass the slice directly in case it helps reuse
   // see eg. Lucene70 norms producer's merge instance
-  IndexedDISI(IndexInput slice, long cost, IndexedDISICache cache, String name) {
-    this.name = name;
+  IndexedDISI(IndexInput slice, long cost) throws IOException {
     this.slice = slice;
     this.cost = cost;
-    this.cache = cache == null ? IndexedDISICache.EMPTY : cache;
   }
 
   private int block = -1;
-  private long blockStart; // Used with the DENSE cache
   private long blockEnd;
   private int nextBlockIndex = -1;
   Method method;
@@ -159,8 +126,6 @@ final class IndexedDISI extends DocIdSetIterator {
   private int wordIndex = -1;
   // number of one bits encountered so far, including those of `word`
   private int numberOfOnes;
-  // Used with rank for jumps inside of DENSE
-  private int denseOrigoIndex;
 
   // ALL variables
   private int gap;
@@ -173,7 +138,6 @@ final class IndexedDISI extends DocIdSetIterator {
   @Override
   public int advance(int target) throws IOException {
     final int targetBlock = target & 0xFFFF0000;
-    // Note: The cache makes it easy to add support for random access. This has not been done as the API forbids it
     if (block < targetBlock) {
       advanceBlock(targetBlock);
     }
@@ -199,20 +163,6 @@ final class IndexedDISI extends DocIdSetIterator {
   }
 
   private void advanceBlock(int targetBlock) throws IOException {
-    if (targetBlock >= block+2) { // 1 block skip is (slightly) faster to do without block jump table
-      long offset = cache.getFilePointerForBlock(targetBlock >> IndexedDISICache.BLOCK_BITS);
-      if (offset != -1 && offset > slice.getFilePointer()) {
-        int origo = cache.getIndexForBlock(targetBlock >> IndexedDISICache.BLOCK_BITS);
-        if (origo != -1) {
-          this.nextBlockIndex = origo - 1; // -1 to compensate for the always-added 1 in readBlockHeader
-          slice.seek(offset);
-          readBlockHeader();
-          return;
-        }
-      }
-    }
-
-    // Fallback to non-cached
     do {
       slice.seek(blockEnd);
       readBlockHeader();
@@ -220,7 +170,6 @@ final class IndexedDISI extends DocIdSetIterator {
   }
 
   private void readBlockHeader() throws IOException {
-    blockStart = slice.getFilePointer();
     block = Short.toUnsignedInt(slice.readShort()) << 16;
     assert block >= 0;
     final int numValues = 1 + Short.toUnsignedInt(slice.readShort());
@@ -238,7 +187,6 @@ final class IndexedDISI extends DocIdSetIterator {
       blockEnd = slice.getFilePointer() + (1 << 13);
       wordIndex = -1;
       numberOfOnes = index + 1;
-      denseOrigoIndex = numberOfOnes;
     }
   }
 
@@ -302,7 +250,6 @@ final class IndexedDISI extends DocIdSetIterator {
       boolean advanceWithinBlock(IndexedDISI disi, int target) throws IOException {
         final int targetInBlock = target & 0xFFFF;
         final int targetWordIndex = targetInBlock >>> 6;
-
         for (int i = disi.wordIndex + 1; i <= targetWordIndex; ++i) {
           disi.word = disi.slice.readLong();
           disi.numberOfOnes += Long.bitCount(disi.word);
@@ -316,10 +263,7 @@ final class IndexedDISI extends DocIdSetIterator {
           return true;
         }
 
-        // There were no set bits at the wanted position. Move forward until one is reached
         while (++disi.wordIndex < 1024) {
-          // This could use the rank cache to skip empty spaces >= 512 bits, but it seems unrealistic
-          // that such blocks would be DENSE
           disi.word = disi.slice.readLong();
           if (disi.word != 0) {
             disi.index = disi.numberOfOnes;
@@ -328,15 +272,12 @@ final class IndexedDISI extends DocIdSetIterator {
             return true;
           }
         }
-        // No set bits in the block at or after the wanted position.
         return false;
       }
-
       @Override
       boolean advanceExactWithinBlock(IndexedDISI disi, int target) throws IOException {
         final int targetInBlock = target & 0xFFFF;
         final int targetWordIndex = targetInBlock >>> 6;
-
         for (int i = disi.wordIndex + 1; i <= targetWordIndex; ++i) {
           disi.word = disi.slice.readLong();
           disi.numberOfOnes += Long.bitCount(disi.word);
@@ -347,8 +288,6 @@ final class IndexedDISI extends DocIdSetIterator {
         disi.index = disi.numberOfOnes - Long.bitCount(leftBits);
         return (leftBits & 1L) != 0;
       }
-
-
     },
     ALL {
       @Override

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/8a20705b/lucene/core/src/java/org/apache/lucene/codecs/lucene70/IndexedDISICache.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene70/IndexedDISICache.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene70/IndexedDISICache.java
deleted file mode 100644
index 9a80689..0000000
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene70/IndexedDISICache.java
+++ /dev/null
@@ -1,234 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.lucene.codecs.lucene70;
-
-import java.io.IOException;
-import java.util.Arrays;
-import java.util.Locale;
-import java.util.concurrent.atomic.AtomicInteger;
-
-import org.apache.lucene.search.DocIdSetIterator;
-import org.apache.lucene.store.IndexInput;
-import org.apache.lucene.util.Accountable;
-import org.apache.lucene.util.ArrayUtil;
-import org.apache.lucene.util.RamUsageEstimator;
-import org.apache.lucene.util.packed.PackedInts;
-
-import static org.apache.lucene.codecs.lucene70.IndexedDISI.MAX_ARRAY_LENGTH;
-
-/**
- * Caching of IndexedDISI with two strategies:
- *
- * A lookup table for block blockCache and index.
- *
- * The lookup table is an array of {@code long}s with an entry for each block. It allows for
- * direct jumping to the block, as opposed to iteration from the current position and forward
- * one block at a time.
- *
- * Each long entry consists of 2 logical parts:
- *
- * The first 31 bits holds the index (number of set bits in the blocks) up to just before the
- * wanted block. The next 33 bits holds the offset into the underlying slice.
- * As there is a maximum of 2^16 blocks, it follows that the maximum size of any block must
- * not exceed 2^17 bits to avoid overflow. This is currently the case, with the largest
- * block being DENSE and using 2^16 + 32 bits, and is likely to continue to hold as using
- * more than double the amount of bits is unlikely to be an efficient representation.
- * The cache overhead is numDocs/1024 bytes.
- *
- * Note: There are 4 types of blocks: ALL, DENSE, SPARSE and non-existing (0 set bits).
- * In the case of non-existing blocks, the entry in the lookup table has index equal to the
- * previous entry and offset equal to the next non-empty block.
- *
- * The performance overhead for creating a cache instance is equivalent to visiting every 65536th
- * doc value for the given field, i.e. it scales lineary to field size.
- */
-public class IndexedDISICache implements Accountable {
-  private static final int BLOCK = 65536;   // The number of docIDs that a single block represents
-  static final int BLOCK_BITS = 16;
-  private static final long BLOCK_INDEX_SHIFT = 33; // Number of bits to shift a lookup entry to get the index
-  private static final long BLOCK_INDEX_MASK = ~0L << BLOCK_INDEX_SHIFT; // The index bits in a lookup entry
-  private static final long BLOCK_LOOKUP_MASK = ~BLOCK_INDEX_MASK; // The offset bits in a lookup entry
-
-  private long[] blockCache = null; // One every 65536 docs, contains index & slice position
-  private String creationStats = "";
-  private final String name; // Identifier for debug, log & inspection
-
-  // Flags for not-yet-defined-values used during building
-  private static final long BLOCK_EMPTY_INDEX = ~0L << BLOCK_INDEX_SHIFT;
-  private static final long BLOCK_EMPTY_LOOKUP = BLOCK_LOOKUP_MASK;
-  private static final long BLOCK_EMPTY = BLOCK_EMPTY_INDEX | BLOCK_EMPTY_LOOKUP;
-
-  /**
-   * Builds the stated caches for the given IndexInput.
-   *
-   * @param in positioned at the start of the logical underlying bitmap.
-   */
-  IndexedDISICache(IndexInput in, String name) throws IOException {
-    blockCache = new long[16];    // Will be extended when needed
-    Arrays.fill(blockCache, BLOCK_EMPTY);
-    this.name = name;
-    updateCaches(in);
-  }
-
-  private IndexedDISICache() {
-    this.blockCache = null;
-    this.name = "";
-  }
-
-  // Used to represent no caching.
-  public static final IndexedDISICache EMPTY = new IndexedDISICache();
-
-  /**
-   * If available, returns a position within the underlying {@link IndexInput} for the start of the block
-   * containing the wanted bit (the target) or the next non-EMPTY block, if the block representing the bit is empty.
-   * @param targetBlock the index for the block to resolve (docID / 65536).
-   * @return the offset for the block for target or -1 if it cannot be resolved.
-   */
-  long getFilePointerForBlock(int targetBlock) {
-    long offset = blockCache == null || blockCache.length <= targetBlock ?
-        -1 : blockCache[targetBlock] & BLOCK_LOOKUP_MASK;
-    return offset == BLOCK_EMPTY_LOOKUP ? -1 : offset;
-  }
-
-  /**
-   * If available, returns the index; number of set bits before the wanted block.
-   * @param targetBlock the block to resolve (docID / 65536).
-   * @return the index for the block or -1 if it cannot be resolved.
-   */
-  int getIndexForBlock(int targetBlock) {
-    if (blockCache == null || blockCache.length <= targetBlock) {
-      return -1;
-    }
-    return (blockCache[targetBlock] & BLOCK_INDEX_MASK) == BLOCK_EMPTY_INDEX ?
-        -1 : (int)(blockCache[targetBlock] >>> BLOCK_INDEX_SHIFT);
-  }
-
-  public boolean hasOffsets() {
-    return blockCache != null;
-  }
-
-  private void updateCaches(IndexInput slice) throws IOException {
-    final long startOffset = slice.getFilePointer();
-
-    final long startTime = System.nanoTime();
-    AtomicInteger statBlockALL = new AtomicInteger(0);
-    AtomicInteger statBlockDENSE = new AtomicInteger(0);
-    AtomicInteger statBlockSPARSE = new AtomicInteger(0);
-
-    // Fill phase
-    int largestBlock = fillCache(slice, statBlockALL, statBlockDENSE, statBlockSPARSE);
-    freezeCaches(largestBlock);
-
-    slice.seek(startOffset); // Leave it as we found it
-    creationStats = String.format(Locale.ENGLISH,
-        "name=%s, blocks=%d (ALL=%d, DENSE=%d, SPARSE=%d, EMPTY=%d), time=%dms, block=%d bytes",
-        name,
-        largestBlock+1, statBlockALL.get(), statBlockDENSE.get(), statBlockSPARSE.get(),
-        (largestBlock+1-statBlockALL.get()-statBlockDENSE.get()-statBlockSPARSE.get()),
-        (System.nanoTime()-startTime)/1000000,
-        blockCache == null ? 0 : blockCache.length*Long.BYTES);
-  }
-
-  private int fillCache(
-      IndexInput slice, AtomicInteger statBlockALL, AtomicInteger statBlockDENSE, AtomicInteger statBlockSPARSE)
-      throws IOException {
-    int largestBlock = -1;
-    long index = 0;
-    int rankIndex = -1;
-    while (slice.getFilePointer() < slice.length()) {
-      final long startFilePointer = slice.getFilePointer();
-
-      final int blockIndex = Short.toUnsignedInt(slice.readShort());
-      final int numValues = 1 + Short.toUnsignedInt(slice.readShort());
-
-      assert blockIndex > largestBlock;
-      if (blockIndex == DocIdSetIterator.NO_MORE_DOCS >>> 16) { // End reached
-        assert Short.toUnsignedInt(slice.readShort()) == (DocIdSetIterator.NO_MORE_DOCS & 0xFFFF);
-        break;
-      }
-      largestBlock = blockIndex;
-
-      blockCache = ArrayUtil.grow(blockCache, blockIndex+1); // No-op if large enough
-      blockCache[blockIndex] = (index << BLOCK_INDEX_SHIFT) | startFilePointer;
-      index += numValues;
-
-      if (numValues <= MAX_ARRAY_LENGTH) { // SPARSE
-        statBlockSPARSE.incrementAndGet();
-        slice.seek(slice.getFilePointer() + (numValues << 1));
-        continue;
-      }
-      if (numValues == 65536) { // ALL
-        statBlockALL.incrementAndGet();
-        // Already at next block offset
-        continue;
-      }
-
-      // The block is DENSE
-      statBlockDENSE.incrementAndGet();
-      long nextBlockOffset = slice.getFilePointer() + (1 << 13);
-      slice.seek(nextBlockOffset);
-    }
-
-    return largestBlock;
-  }
-
-  private void freezeCaches(int largestBlock) {
-    if (largestBlock == -1) { // No set bit: Disable the caches
-      blockCache = null;
-      return;
-    }
-
-    // Reduce size to minimum
-    if (blockCache.length-1 > largestBlock) {
-      long[] newBC = new long[Math.max(largestBlock - 1, 1)];
-      System.arraycopy(blockCache, 0, newBC, 0, newBC.length);
-      blockCache = newBC;
-    }
-
-    // Set non-defined blockCache entries (caused by blocks with 0 set bits) to the subsequently defined one
-    long latest = BLOCK_EMPTY;
-    for (int i = blockCache.length-1; i >= 0 ; i--) {
-      long current = blockCache[i];
-      if (current == BLOCK_EMPTY) {
-        blockCache[i] = latest;
-      } else {
-        latest = current;
-      }
-    }
-  }
-
-  /**
-   * @return Human readable details from the creation of the cache instance.
-   */
-  public String getCreationStats() {
-    return creationStats;
-  }
-
-  /**
-   * @return Human-readable name for the cache instance.
-   */
-  public String getName() {
-    return name;
-  }
-
-  @Override
-  public long ramBytesUsed() {
-    return (blockCache == null ? 0 : RamUsageEstimator.sizeOf(blockCache)) +
-        RamUsageEstimator.NUM_BYTES_OBJECT_REF*3 +
-        RamUsageEstimator.NUM_BYTES_OBJECT_HEADER + creationStats.length()*2;
-  }
-}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/8a20705b/lucene/core/src/java/org/apache/lucene/codecs/lucene70/IndexedDISICacheFactory.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene70/IndexedDISICacheFactory.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene70/IndexedDISICacheFactory.java
deleted file mode 100644
index 6cf8af1..0000000
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene70/IndexedDISICacheFactory.java
+++ /dev/null
@@ -1,150 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.lucene.codecs.lucene70;
-
-import java.io.IOException;
-import java.util.HashMap;
-import java.util.Locale;
-import java.util.Map;
-
-import org.apache.lucene.store.IndexInput;
-import org.apache.lucene.store.RandomAccessInput;
-import org.apache.lucene.util.Accountable;
-import org.apache.lucene.util.ArrayUtil;
-import org.apache.lucene.util.RamUsageEstimator;
-
-/**
- * Creates and stores caches for {@link IndexedDISI} and {@link Lucene70DocValuesProducer}.
- * The caches are stored in maps, where the key is made up from offset and length of a slice
- * in an underlying segment. Each segment uses their own IndexedDISICacheFactory.
- *
- * See {@link IndexedDISICache} for details on the caching.
- */
-public class IndexedDISICacheFactory implements Accountable {
-
-  /**
-   * If the slice with the DISI-data is less than this number of bytes, don't create a cache.
-   * This is a very low number as the DISI-structure very efficiently represents EMPTY and ALL blocks.
-   */
-  private static int MIN_LENGTH_FOR_CACHING = 50; // Set this very low: Could be 9 EMPTY followed by a SPARSE
-
-  // jump-table and rank for DISI blocks
-  private final Map<Long, IndexedDISICache> disiPool = new HashMap<>();
-
-  /**
-   * Create a cached {@link IndexedDISI} instance.
-   * @param data   persistent data containing the DISI-structure.
-   * @param cost   cost as defined for IndexedDISI.
-   * @param name   identifier for the DISI-structure for debug purposes.
-   * @return a cached IndexedDISI or a plain IndexedDISI, if caching is not applicable.
-   * @throws IOException if the DISI-structure could not be accessed.
-   */
-  IndexedDISI createCachedIndexedDISI(IndexInput data, long key, int cost, String name) throws IOException {
-    IndexedDISICache cache = getCache(data, key, name);
-    return new IndexedDISI(data, cost, cache, name);
-  }
-
-  /**
-   * Create a cached {@link IndexedDISI} instance.
-   * @param data   persistent data containing the DISI-structure.
-   * @param offset same as the offset that will also be used for creating an {@link IndexedDISI}.
-   * @param length same af the length that will also be used for creating an {@link IndexedDISI}.
-   * @param cost   cost as defined for IndexedDISI.
-   * @param name   identifier for the DISI-structure for debug purposes.
-   * @return a cached IndexedDISI or a plain IndexedDISI, if caching is not applicable.
-   * @throws IOException if the DISI-structure could not be accessed.
-   */
-  IndexedDISI createCachedIndexedDISI(IndexInput data, long offset, long length, long cost, String name)
-      throws IOException {
-    IndexedDISICache cache = getCache(data, offset, length, name);
-    return new IndexedDISI(data, offset, length, cost, cache, name);
-  }
-
-  /**
-   * Creates a cache (jump table) for {@link IndexedDISI}.
-   * If the cache has previously been created, the old cache is returned.
-   * @param data   the slice to create a cache for.
-   * @param offset same as the offset that will also be used for creating an {@link IndexedDISI}.
-   * @param length same af the length that will also be used for creating an {@link IndexedDISI}.
-   * @param name human readable designation, typically a field name. Used for debug, log and inspection.
-   * @return a cache for the given slice+offset+length or null if not suitable for caching.
-   */
-  public IndexedDISICache getCache(IndexInput data, long offset, long length, String name) throws IOException {
-    if (length < MIN_LENGTH_FOR_CACHING) {
-      return null;
-    }
-
-    long key = offset + length;
-    IndexedDISICache cache = disiPool.get(key);
-    if (cache == null) {
-      // TODO: Avoid overlapping builds of the same cache for performance reason
-      cache = new IndexedDISICache(data.slice("docs", offset, length), name);
-      disiPool.put(key, cache);
-    }
-    return cache;
-  }
-
-  /**
-   * Creates a cache (jump table) for {@link IndexedDISI}.
-   * If the cache has previously been created, the old cache is returned.
-   * @param slice the input slice.
-   * @param key identifier for the cache, unique within the segment that originated the slice.
-   *            Recommendation is offset+length for the slice, relative to the data mapping the segment.
-   *            Warning: Do not use slice.getFilePointer and slice.length as they are not guaranteed
-   *            to be unique within the segment (slice.getFilePointer is 0 when a sub-slice is created).
-   * @param name human readable designation, typically a field name. Used for debug, log and inspection.
-   * @return a cache for the given slice+offset+length or null if not suitable for caching.
-   */
-  public IndexedDISICache getCache(IndexInput slice, long key, String name) throws IOException {
-    final long length = slice.length();
-    if (length < MIN_LENGTH_FOR_CACHING) {
-      return null;
-    }
-
-    IndexedDISICache cache = disiPool.get(key);
-    if (cache == null) {
-      // TODO: Avoid overlapping builds of the same cache
-      cache = new IndexedDISICache(slice, name);
-      disiPool.put(key, cache);
-    }
-    return cache;
-  }
-
-  // Statistics
-  public long getDISIBlocksWithOffsetsCount() {
-    return disiPool.values().stream().filter(IndexedDISICache::hasOffsets).count();
-  }
-
-  @Override
-  public long ramBytesUsed() {
-    long mem = RamUsageEstimator.shallowSizeOf(this) +
-        RamUsageEstimator.shallowSizeOf(disiPool);
-    for (Map.Entry<Long, IndexedDISICache> cacheEntry: disiPool.entrySet()) {
-      mem += RamUsageEstimator.shallowSizeOf(cacheEntry);
-      mem += RamUsageEstimator.sizeOf(cacheEntry.getKey());
-      mem += cacheEntry.getValue().ramBytesUsed();
-    }
-    return mem;
-  }
-
-  /**
-   * Releases all caches.
-   */
-  void releaseAll() {
-    disiPool.clear();
-  }
-}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/8a20705b/lucene/core/src/java/org/apache/lucene/codecs/lucene70/Lucene70DocValuesProducer.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene70/Lucene70DocValuesProducer.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene70/Lucene70DocValuesProducer.java
index 812caba..b0f6e84 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene70/Lucene70DocValuesProducer.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene70/Lucene70DocValuesProducer.java
@@ -57,7 +57,6 @@ final class Lucene70DocValuesProducer extends DocValuesProducer implements Close
   private final Map<String,SortedNumericEntry> sortedNumerics = new HashMap<>();
   private long ramBytesUsed;
   private final IndexInput data;
-  private final IndexedDISICacheFactory disiCacheFactory = new IndexedDISICacheFactory();
   private final int maxDoc;
 
   /** expert: instantiates a new reader */
@@ -120,23 +119,23 @@ final class Lucene70DocValuesProducer extends DocValuesProducer implements Close
       }
       byte type = meta.readByte();
       if (type == Lucene70DocValuesFormat.NUMERIC) {
-        numerics.put(info.name, readNumeric(meta, info.name));
+        numerics.put(info.name, readNumeric(meta));
       } else if (type == Lucene70DocValuesFormat.BINARY) {
-        binaries.put(info.name, readBinary(meta, info.name));
+        binaries.put(info.name, readBinary(meta));
       } else if (type == Lucene70DocValuesFormat.SORTED) {
-        sorted.put(info.name, readSorted(meta, info.name));
+        sorted.put(info.name, readSorted(meta));
       } else if (type == Lucene70DocValuesFormat.SORTED_SET) {
-        sortedSets.put(info.name, readSortedSet(meta, info.name));
+        sortedSets.put(info.name, readSortedSet(meta));
       } else if (type == Lucene70DocValuesFormat.SORTED_NUMERIC) {
-        sortedNumerics.put(info.name, readSortedNumeric(meta, info.name));
+        sortedNumerics.put(info.name, readSortedNumeric(meta));
       } else {
         throw new CorruptIndexException("invalid type: " + type, meta);
       }
     }
   }
 
-  private NumericEntry readNumeric(ChecksumIndexInput meta, String name) throws IOException {
-    NumericEntry entry = new NumericEntry(name);
+  private NumericEntry readNumeric(ChecksumIndexInput meta) throws IOException {
+    NumericEntry entry = new NumericEntry();
     readNumeric(meta, entry);
     return entry;
   }
@@ -168,8 +167,8 @@ final class Lucene70DocValuesProducer extends DocValuesProducer implements Close
     entry.valuesLength = meta.readLong();
   }
 
-  private BinaryEntry readBinary(ChecksumIndexInput meta, String name) throws IOException {
-    BinaryEntry entry = new BinaryEntry(name);
+  private BinaryEntry readBinary(ChecksumIndexInput meta) throws IOException {
+    BinaryEntry entry = new BinaryEntry();
     entry.dataOffset = meta.readLong();
     entry.dataLength = meta.readLong();
     entry.docsWithFieldOffset = meta.readLong();
@@ -187,8 +186,8 @@ final class Lucene70DocValuesProducer extends DocValuesProducer implements Close
     return entry;
   }
 
-  private SortedEntry readSorted(ChecksumIndexInput meta, String name) throws IOException {
-    SortedEntry entry = new SortedEntry(name);
+  private SortedEntry readSorted(ChecksumIndexInput meta) throws IOException {
+    SortedEntry entry = new SortedEntry();
     entry.docsWithFieldOffset = meta.readLong();
     entry.docsWithFieldLength = meta.readLong();
     entry.numDocsWithField = meta.readInt();
@@ -199,12 +198,12 @@ final class Lucene70DocValuesProducer extends DocValuesProducer implements Close
     return entry;
   }
 
-  private SortedSetEntry readSortedSet(ChecksumIndexInput meta, String name) throws IOException {
-    SortedSetEntry entry = new SortedSetEntry(name);
+  private SortedSetEntry readSortedSet(ChecksumIndexInput meta) throws IOException {
+    SortedSetEntry entry = new SortedSetEntry();
     byte multiValued = meta.readByte();
     switch (multiValued) {
       case 0: // singlevalued
-        entry.singleValueEntry = readSorted(meta, name);
+        entry.singleValueEntry = readSorted(meta);
         return entry;
       case 1: // multivalued
         break;
@@ -246,8 +245,8 @@ final class Lucene70DocValuesProducer extends DocValuesProducer implements Close
     entry.termsIndexAddressesLength = meta.readLong();
   }
 
-  private SortedNumericEntry readSortedNumeric(ChecksumIndexInput meta, String name) throws IOException {
-    SortedNumericEntry entry = new SortedNumericEntry(name);
+  private SortedNumericEntry readSortedNumeric(ChecksumIndexInput meta) throws IOException {
+    SortedNumericEntry entry = new SortedNumericEntry();
     readNumeric(meta, entry);
     entry.numDocsWithField = meta.readInt();
     if (entry.numDocsWithField != entry.numValues) {
@@ -263,23 +262,9 @@ final class Lucene70DocValuesProducer extends DocValuesProducer implements Close
   @Override
   public void close() throws IOException {
     data.close();
-    disiCacheFactory.releaseAll();
   }
 
-  // Highly debatable if this is a sane construct as the name is only used for debug/logging/inspection purposes
-  // This was introduced in LUCENE-8374
-  private static class EntryImpl {
-    final String name;
-
-    public EntryImpl(String name) {
-      this.name = name;
-    }
-  }
-
-  private static class NumericEntry extends EntryImpl {
-    public NumericEntry(String name) {
-      super(name);
-    }
+  private static class NumericEntry {
     long[] table;
     int blockShift;
     byte bitsPerValue;
@@ -292,10 +277,7 @@ final class Lucene70DocValuesProducer extends DocValuesProducer implements Close
     long valuesLength;
   }
 
-  private static class BinaryEntry extends EntryImpl {
-    public BinaryEntry(String name) {
-      super(name);
-    }
+  private static class BinaryEntry {
     long dataOffset;
     long dataLength;
     long docsWithFieldOffset;
@@ -308,10 +290,7 @@ final class Lucene70DocValuesProducer extends DocValuesProducer implements Close
     DirectMonotonicReader.Meta addressesMeta;
   }
 
-  private static class TermsDictEntry extends EntryImpl {
-    public TermsDictEntry(String name) {
-      super(name);
-    }
+  private static class TermsDictEntry {
     long termsDictSize;
     int termsDictBlockShift;
     DirectMonotonicReader.Meta termsAddressesMeta;
@@ -329,9 +308,6 @@ final class Lucene70DocValuesProducer extends DocValuesProducer implements Close
   }
 
   private static class SortedEntry extends TermsDictEntry {
-    public SortedEntry(String name) {
-      super(name);
-    }
     long docsWithFieldOffset;
     long docsWithFieldLength;
     int numDocsWithField;
@@ -341,9 +317,6 @@ final class Lucene70DocValuesProducer extends DocValuesProducer implements Close
   }
 
   private static class SortedSetEntry extends TermsDictEntry {
-    public SortedSetEntry(String name) {
-      super(name);
-    }
     SortedEntry singleValueEntry;
     long docsWithFieldOffset;
     long docsWithFieldLength;
@@ -357,9 +330,6 @@ final class Lucene70DocValuesProducer extends DocValuesProducer implements Close
   }
 
   private static class SortedNumericEntry extends NumericEntry {
-    public SortedNumericEntry(String name) {
-      super(name);
-    }
     int numDocsWithField;
     DirectMonotonicReader.Meta addressesMeta;
     long addressesOffset;
@@ -368,7 +338,7 @@ final class Lucene70DocValuesProducer extends DocValuesProducer implements Close
 
   @Override
   public long ramBytesUsed() {
-    return ramBytesUsed + disiCacheFactory.ramBytesUsed();
+    return ramBytesUsed;
   }
 
   @Override
@@ -526,8 +496,7 @@ final class Lucene70DocValuesProducer extends DocValuesProducer implements Close
       }
     } else {
       // sparse
-      final IndexedDISI disi = disiCacheFactory.createCachedIndexedDISI(
-          data, entry.docsWithFieldOffset, entry.docsWithFieldLength, entry.numValues, entry.name);
+      final IndexedDISI disi = new IndexedDISI(data, entry.docsWithFieldOffset, entry.docsWithFieldLength, entry.numValues);
       if (entry.bitsPerValue == 0) {
         return new SparseNumericDocValues(disi) {
           @Override
@@ -798,8 +767,7 @@ final class Lucene70DocValuesProducer extends DocValuesProducer implements Close
       }
     } else {
       // sparse
-      final IndexedDISI disi = disiCacheFactory.createCachedIndexedDISI(
-          data, entry.docsWithFieldOffset, entry.docsWithFieldLength, entry.numDocsWithField, entry.name);
+      final IndexedDISI disi = new IndexedDISI(data, entry.docsWithFieldOffset, entry.docsWithFieldLength, entry.numDocsWithField);
       if (entry.minLength == entry.maxLength) {
         // fixed length
         final int length = entry.maxLength;
@@ -900,8 +868,7 @@ final class Lucene70DocValuesProducer extends DocValuesProducer implements Close
       };
     } else {
       // sparse
-      final IndexedDISI disi = disiCacheFactory.createCachedIndexedDISI(
-          data, entry.docsWithFieldOffset, entry.docsWithFieldLength, entry.numDocsWithField, entry.name);
+      final IndexedDISI disi = new IndexedDISI(data, entry.docsWithFieldOffset, entry.docsWithFieldLength, entry.numDocsWithField);
       return new BaseSortedDocValues(entry, data) {
 
         @Override
@@ -1269,8 +1236,7 @@ final class Lucene70DocValuesProducer extends DocValuesProducer implements Close
       };
     } else {
       // sparse
-      final IndexedDISI disi = disiCacheFactory.createCachedIndexedDISI(
-          data, entry.docsWithFieldOffset, entry.docsWithFieldLength, entry.numDocsWithField, entry.name);
+      final IndexedDISI disi = new IndexedDISI(data, entry.docsWithFieldOffset, entry.docsWithFieldLength, entry.numDocsWithField);
       return new SortedNumericDocValues() {
 
         boolean set;
@@ -1396,8 +1362,7 @@ final class Lucene70DocValuesProducer extends DocValuesProducer implements Close
       };
     } else {
       // sparse
-      final IndexedDISI disi = disiCacheFactory.createCachedIndexedDISI(
-          data, entry.docsWithFieldOffset, entry.docsWithFieldLength, entry.numDocsWithField, entry.name);
+      final IndexedDISI disi = new IndexedDISI(data, entry.docsWithFieldOffset, entry.docsWithFieldLength, entry.numDocsWithField);
       return new BaseSortedSetDocValues(entry, data) {
 
         boolean set;

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/8a20705b/lucene/core/src/test/org/apache/lucene/codecs/lucene70/TestIndexedDISI.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/test/org/apache/lucene/codecs/lucene70/TestIndexedDISI.java b/lucene/core/src/test/org/apache/lucene/codecs/lucene70/TestIndexedDISI.java
index aae3a7f..64bfbd5 100644
--- a/lucene/core/src/test/org/apache/lucene/codecs/lucene70/TestIndexedDISI.java
+++ b/lucene/core/src/test/org/apache/lucene/codecs/lucene70/TestIndexedDISI.java
@@ -150,39 +150,6 @@ public class TestIndexedDISI extends LuceneTestCase {
       }
     }
   }
-  public void testDenseMultiBlock() throws IOException {
-    try (Directory dir = newDirectory()) {
-      int maxDoc = 10 * 65536; // 10 blocks
-      FixedBitSet set = new FixedBitSet(maxDoc);
-      for (int i = 0; i < maxDoc; i += 2) { // Set every other to ensure dense
-        set.set(i);
-      }
-      doTest(set, dir);
-    }
-  }
-
-  public void testOneDocMissingFixed() throws IOException {
-    int maxDoc = 9699;
-    FixedBitSet set = new FixedBitSet(maxDoc);
-    set.set(0, maxDoc);
-    set.clear(1345);
-    try (Directory dir = newDirectory()) {
-
-      final int cardinality = set.cardinality();
-      long length;
-      try (IndexOutput out = dir.createOutput("foo", IOContext.DEFAULT)) {
-        IndexedDISI.writeBitSet(new BitSetIterator(set, cardinality), out);
-        length = out.getFilePointer();
-      }
-
-      int step = 16000;
-      try (IndexInput in = dir.openInput("foo", IOContext.DEFAULT)) {
-        IndexedDISI disi = new IndexedDISI(in, 0L, length, cardinality);
-        BitSetIterator disi2 = new BitSetIterator(set, cardinality);
-        assertAdvanceEquality(disi, disi2, step);
-      }
-    }
-  }
 
   public void testRandom() throws IOException {
     try (Directory dir = newDirectory()) {
@@ -221,14 +188,32 @@ public class TestIndexedDISI extends LuceneTestCase {
     try (IndexInput in = dir.openInput("foo", IOContext.DEFAULT)) {
       IndexedDISI disi = new IndexedDISI(in, 0L, length, cardinality);
       BitSetIterator disi2 = new BitSetIterator(set, cardinality);
-      assertSingleStepEquality(disi, disi2);
+      int i = 0;
+      for (int doc = disi2.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = disi2.nextDoc()) {
+        assertEquals(doc, disi.nextDoc());
+        assertEquals(i++, disi.index());
+      }
+      assertEquals(DocIdSetIterator.NO_MORE_DOCS, disi.nextDoc());
     }
 
     for (int step : new int[] {1, 10, 100, 1000, 10000, 100000}) {
       try (IndexInput in = dir.openInput("foo", IOContext.DEFAULT)) {
         IndexedDISI disi = new IndexedDISI(in, 0L, length, cardinality);
         BitSetIterator disi2 = new BitSetIterator(set, cardinality);
-        assertAdvanceEquality(disi, disi2, step);
+        int index = -1;
+        while (true) {
+          int target = disi2.docID() + step;
+          int doc;
+          do {
+            doc = disi2.nextDoc();
+            index++;
+          } while (doc < target);
+          assertEquals(doc, disi.advance(target));
+          if (doc == DocIdSetIterator.NO_MORE_DOCS) {
+            break;
+          }
+          assertEquals(index, disi.index());
+        }
       }
     }
 
@@ -236,18 +221,8 @@ public class TestIndexedDISI extends LuceneTestCase {
       try (IndexInput in = dir.openInput("foo", IOContext.DEFAULT)) {
         IndexedDISI disi = new IndexedDISI(in, 0L, length, cardinality);
         BitSetIterator disi2 = new BitSetIterator(set, cardinality);
-        int disi2length = set.length();
-        assertAdvanceExactRandomized(disi, disi2, disi2length, step);
-      }
-    }
-
-    dir.deleteFile("foo");
-  }
-
-  private void assertAdvanceExactRandomized(IndexedDISI disi, BitSetIterator disi2, int disi2length, int step)
-      throws IOException {
         int index = -1;
-    for (int target = 0; target < disi2length; ) {
+        for (int target = 0; target < set.length(); ) {
           target += TestUtil.nextInt(random(), 0, step);
           int doc = disi2.docID();
           while (doc < target) {
@@ -266,31 +241,9 @@ public class TestIndexedDISI extends LuceneTestCase {
           }
         }
       }
-
-  private void assertSingleStepEquality(IndexedDISI disi, BitSetIterator disi2) throws IOException {
-    int i = 0;
-    for (int doc = disi2.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = disi2.nextDoc()) {
-      assertEquals(doc, disi.nextDoc());
-      assertEquals(i++, disi.index());
-    }
-    assertEquals(DocIdSetIterator.NO_MORE_DOCS, disi.nextDoc());
     }
 
-  private void assertAdvanceEquality(IndexedDISI disi, BitSetIterator disi2, int step) throws IOException {
-    int index = -1;
-    while (true) {
-      int target = disi2.docID() + step;
-      int doc;
-      do {
-        doc = disi2.nextDoc();
-        index++;
-      } while (doc < target);
-      assertEquals(doc, disi.advance(target));
-      if (doc == DocIdSetIterator.NO_MORE_DOCS) {
-        break;
-      }
-      assertEquals("Expected equality using step " + step + " at docID " + doc, index, disi.index());
-    }
+    dir.deleteFile("foo");
   }
 
 }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/8a20705b/lucene/core/src/test/org/apache/lucene/index/TestDocValues.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/test/org/apache/lucene/index/TestDocValues.java b/lucene/core/src/test/org/apache/lucene/index/TestDocValues.java
index ebc045a..0214e54 100644
--- a/lucene/core/src/test/org/apache/lucene/index/TestDocValues.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestDocValues.java
@@ -18,13 +18,7 @@ package org.apache.lucene.index;
 
 
 import java.io.IOException;
-import java.nio.file.Files;
-import java.nio.file.Path;
-import java.nio.file.Paths;
-import java.util.ArrayList;
-import java.util.List;
 
-import org.apache.lucene.analysis.standard.StandardAnalyzer;
 import org.apache.lucene.document.BinaryDocValuesField;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
@@ -33,9 +27,7 @@ import org.apache.lucene.document.SortedDocValuesField;
 import org.apache.lucene.document.SortedNumericDocValuesField;
 import org.apache.lucene.document.SortedSetDocValuesField;
 import org.apache.lucene.document.StringField;
-import org.apache.lucene.search.DocIdSetIterator;
 import org.apache.lucene.store.Directory;
-import org.apache.lucene.store.MMapDirectory;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.LuceneTestCase;
@@ -131,103 +123,6 @@ public class TestDocValues extends LuceneTestCase {
     iw.close();
     dir.close();
   }
-
-  /**
-   * Triggers varying bits per value codec representation for numeric.
-   */
-  public void testNumericFieldVaryingBPV() throws Exception {
-    Directory dir = newDirectory();
-    IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null));
-    long generatedSum = 0;
-    for (int bpv = 2 ; bpv < 24 ; bpv+=3) {
-      for (int i = 0 ; i < 66000 ; i++) {
-        Document doc = new Document();
-        int max = 1 << (bpv - 1);
-        int value =  random().nextInt(max) | max;
-        generatedSum += value;
-        //System.out.println("--- " + value);
-        doc.add(new NumericDocValuesField("foo", value));
-        iw.addDocument(doc);
-      }
-    }
-    iw.flush();
-    iw.forceMerge(1, true);
-    iw.commit();
-    DirectoryReader dr = DirectoryReader.open(iw);
-    LeafReader r = getOnlyLeafReader(dr);
-
-    // ok
-    NumericDocValues numDV = DocValues.getNumeric(r, "foo");
-
-    assertNotNull(numDV);
-    long sum = 0;
-    while (numDV.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
-      sum += numDV.longValue();
-    }
-    assertEquals("The sum of retrieved values should match the input", generatedSum, sum);
-
-//    assertNotNull(DocValues.getSortedNumeric(r, "foo"));
-
-    dr.close();
-    iw.close();
-    dir.close();
-  }
-
-  // LUCENE-8374 had a bug where a vBPV-block with BPV==0 as the very end of the numeric DocValues made it fail
-  public void testNumericEntryZeroesLastBlock() throws IOException {
-    List<Long> docValues = new ArrayList<>(2*16384);
-    for (int id = 0 ; id < 2*16384 ; id++) { // 2 vBPV-blocks for the dv-field
-      if (id < 16384) { // First vBPV-block just has semi-ramdom values
-        docValues.add((long) (id % 1000));
-      } else {          // Second block is all zeroes, resulting in an extreme "1-byte for the while block"-representation
-        docValues.add(0L);
-      }
-    }
-    assertRandomAccessDV("Last block BPV=0", docValues);
-  }
-
-  private void assertRandomAccessDV(String designation, List<Long> docValues) throws IOException {
-    // Create corpus
-    Path zeroPath = Paths.get(System.getProperty("java.io.tmpdir"),"plain_" + random().nextInt());
-    Directory zeroDir = new MMapDirectory(zeroPath);
-    IndexWriterConfig iwc = new IndexWriterConfig(new StandardAnalyzer());
-    //iwc.setCodec(Codec.forName("Lucene70"));
-    IndexWriter iw = new IndexWriter(zeroDir, iwc);
-
-    for (int id = 0 ; id < docValues.size() ; id++) {
-      Document doc = new Document();
-      doc.add(new StringField("id", Integer.toString(id), Field.Store.YES));
-      doc.add(new NumericDocValuesField("dv", docValues.get(id)));
-      iw.addDocument(doc);
-    }
-    iw.flush();
-    iw.commit();
-    iw.forceMerge(1, true);
-    iw.close();
-
-    DirectoryReader dr = DirectoryReader.open(zeroDir);
-    for (int id = 0 ; id < docValues.size() ; id++) {
-      int readerIndex = dr.readerIndex(id);
-      // We create a new reader each time as we want to test vBPV-skipping and not sequential iteration
-      NumericDocValues numDV = dr.leaves().get(readerIndex).reader().getNumericDocValues("dv");
-      assertTrue(designation + ": There should be a value for docID " + id, numDV.advanceExact(id));
-      assertEquals(designation + ": The value for docID " + id + " should be as expected",
-          docValues.get(id), Long.valueOf(numDV.longValue()));
-    }
-    dr.close();
-
-    // Clean up
-    deleteAndClose(zeroDir);
-    Files.delete(zeroPath);
-  }
-
-  private void deleteAndClose(Directory dir) throws IOException {
-    String[] files = dir.listAll();
-    for (String file: files) {
-      dir.deleteFile(file);
-    }
-    dir.close();
-  }
   
   /** 
    * field with binary docvalues

[2/5] lucene-solr:master: Revert "LUCENE-8374 part 4/4: Reduce reads for sparse DocValues". LUCENE-8374 was committed without consensus and is expected to be superseded by LUCENE-8585.

Posted by to...@apache.org.

Revert "LUCENE-8374 part 4/4: Reduce reads for sparse DocValues".
LUCENE-8374 was committed without consensus and is expected to be superseded by LUCENE-8585.

This reverts commit e356d793caf2a899f23261baba922d4a08b362ed.


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/3158d0c4
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/3158d0c4
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/3158d0c4

Branch: refs/heads/master
Commit: 3158d0c485449400d35a0095db15acdce9f8db5c
Parents: 870bb11
Author: Toke Eskildsen <to...@apache.org>
Authored: Tue Dec 11 14:13:37 2018 +0100
Committer: Toke Eskildsen <to...@apache.org>
Committed: Tue Dec 11 14:13:37 2018 +0100

----------------------------------------------------------------------
 .../apache/lucene/codecs/lucene70/Lucene70NormsProducer.java | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/3158d0c4/lucene/core/src/java/org/apache/lucene/codecs/lucene70/Lucene70NormsProducer.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene70/Lucene70NormsProducer.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene70/Lucene70NormsProducer.java
index 321fcff..c7310e8 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene70/Lucene70NormsProducer.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene70/Lucene70NormsProducer.java
@@ -45,7 +45,6 @@ final class Lucene70NormsProducer extends NormsProducer implements Cloneable {
   private final Map<Integer,NormsEntry> norms = new HashMap<>();
   private final int maxDoc;
   private IndexInput data;
-  private final IndexedDISICacheFactory disiCacheFactory = new IndexedDISICacheFactory();
   private boolean merging;
   private Map<Integer, IndexInput> disiInputs;
   private Map<Integer, RandomAccessInput> dataInputs;
@@ -294,9 +293,7 @@ final class Lucene70NormsProducer extends NormsProducer implements Cloneable {
     } else {
       // sparse
       final IndexInput disiInput = getDisiInput(field, entry);
-      final IndexedDISI disi = disiCacheFactory.createCachedIndexedDISI(
-          disiInput, entry.docsWithFieldOffset + entry.docsWithFieldLength, entry.numDocsWithField, field.name);
-
+      final IndexedDISI disi = new IndexedDISI(disiInput, entry.numDocsWithField);
       if (entry.bytesPerNorm == 0) {
         return new SparseNormsIterator(disi) {
           @Override
@@ -345,12 +342,11 @@ final class Lucene70NormsProducer extends NormsProducer implements Cloneable {
   @Override
   public void close() throws IOException {
     data.close();
-    disiCacheFactory.releaseAll();
   }
 
   @Override
   public long ramBytesUsed() {
-    return 64L * norms.size() + disiCacheFactory.ramBytesUsed(); // good enough
+    return 64L * norms.size(); // good enough
   }
 
   @Override