You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by iv...@apache.org on 2020/06/10 06:21:27 UTC

[lucene-solr] branch branch_8x updated: LUCENE-9398: Always keep BKD index off-heap. BKD reader does not implement Accountable any more (#1558)

This is an automated email from the ASF dual-hosted git repository.

ivera pushed a commit to branch branch_8x
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git


The following commit(s) were added to refs/heads/branch_8x by this push:
     new c143082  LUCENE-9398: Always keep BKD index off-heap. BKD reader does not implement Accountable any more (#1558)
c143082 is described below

commit c143082159912a3bab11c3c1daddf20ce7ef9abe
Author: Ignacio Vera <iv...@apache.org>
AuthorDate: Wed Jun 10 08:13:12 2020 +0200

    LUCENE-9398: Always keep BKD index off-heap. BKD reader does not implement Accountable any more (#1558)
---
 lucene/CHANGES.txt                                 |   2 +
 .../codecs/lucene60/Lucene60PointsReader.java      |  22 +--
 .../codecs/lucene86/Lucene86PointsReader.java      |  22 +--
 .../java/org/apache/lucene/util/bkd/BKDReader.java | 193 ++++-----------------
 .../test/org/apache/lucene/util/bkd/TestBKD.java   |  18 +-
 5 files changed, 43 insertions(+), 214 deletions(-)

diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 70e0b16..f370774 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -157,6 +157,8 @@ Other
 
 * LUCENE-9232: Fix or suppress 13 resource leak precommit warnings in lucene/replicator (Andras Salamon via Erick Erickson)
 
+* LUCENE-9398: Always keep BKD index off-heap. BKD reader does not implement Accountable any more. (Ignacio Vera)
+
 Build
 
 * Upgrade forbiddenapis to version 3.0.1.  (Uwe Schindler)
diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene60/Lucene60PointsReader.java b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene60/Lucene60PointsReader.java
index e24a33c..482c5f2 100644
--- a/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene60/Lucene60PointsReader.java
+++ b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene60/Lucene60PointsReader.java
@@ -19,11 +19,7 @@ package org.apache.lucene.codecs.lucene60;
 
 import java.io.Closeable;
 import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.Collections;
 import java.util.HashMap;
-import java.util.List;
 import java.util.Map;
 
 import org.apache.lucene.codecs.CodecUtil;
@@ -34,8 +30,6 @@ import org.apache.lucene.index.PointValues;
 import org.apache.lucene.index.SegmentReadState;
 import org.apache.lucene.store.ChecksumIndexInput;
 import org.apache.lucene.store.IndexInput;
-import org.apache.lucene.util.Accountable;
-import org.apache.lucene.util.Accountables;
 import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.bkd.BKDReader;
 
@@ -133,21 +127,7 @@ public class Lucene60PointsReader extends PointsReader implements Closeable {
 
   @Override
   public long ramBytesUsed() {
-    long sizeInBytes = 0;
-    for(BKDReader reader : readers.values()) {
-      sizeInBytes += reader.ramBytesUsed();
-    }
-    return sizeInBytes;
-  }
-
-  @Override
-  public Collection<Accountable> getChildResources() {
-    List<Accountable> resources = new ArrayList<>();
-    for(Map.Entry<Integer,BKDReader> ent : readers.entrySet()) {
-      resources.add(Accountables.namedAccountable(readState.fieldInfos.fieldInfo(ent.getKey()).name,
-                                                  ent.getValue()));
-    }
-    return Collections.unmodifiableList(resources);
+    return 0L;
   }
 
   @Override
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene86/Lucene86PointsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene86/Lucene86PointsReader.java
index 8ba2e4a..9aabc97 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene86/Lucene86PointsReader.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene86/Lucene86PointsReader.java
@@ -19,11 +19,7 @@ package org.apache.lucene.codecs.lucene86;
 
 import java.io.Closeable;
 import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.Collections;
 import java.util.HashMap;
-import java.util.List;
 import java.util.Map;
 
 import org.apache.lucene.codecs.CodecUtil;
@@ -35,8 +31,6 @@ import org.apache.lucene.index.PointValues;
 import org.apache.lucene.index.SegmentReadState;
 import org.apache.lucene.store.ChecksumIndexInput;
 import org.apache.lucene.store.IndexInput;
-import org.apache.lucene.util.Accountable;
-import org.apache.lucene.util.Accountables;
 import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.bkd.BKDReader;
 
@@ -133,21 +127,7 @@ public class Lucene86PointsReader extends PointsReader implements Closeable {
 
   @Override
   public long ramBytesUsed() {
-    long sizeInBytes = 0;
-    for(BKDReader reader : readers.values()) {
-      sizeInBytes += reader.ramBytesUsed();
-    }
-    return sizeInBytes;
-  }
-
-  @Override
-  public Collection<Accountable> getChildResources() {
-    List<Accountable> resources = new ArrayList<>();
-    for(Map.Entry<Integer,BKDReader> ent : readers.entrySet()) {
-      resources.add(Accountables.namedAccountable(readState.fieldInfos.fieldInfo(ent.getKey()).name,
-                                                  ent.getValue()));
-    }
-    return Collections.unmodifiableList(resources);
+    return 0L;
   }
 
   @Override
diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDReader.java b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDReader.java
index 459c97a..05a1aa6 100644
--- a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDReader.java
+++ b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDReader.java
@@ -23,11 +23,7 @@ import org.apache.lucene.codecs.CodecUtil;
 import org.apache.lucene.index.CorruptIndexException;
 import org.apache.lucene.index.PointValues;
 import org.apache.lucene.search.DocIdSetIterator;
-import org.apache.lucene.store.ByteArrayDataInput;
-import org.apache.lucene.store.ByteBufferIndexInput;
-import org.apache.lucene.store.DataInput;
 import org.apache.lucene.store.IndexInput;
-import org.apache.lucene.util.Accountable;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.FutureArrays;
 import org.apache.lucene.util.MathUtil;
@@ -36,103 +32,7 @@ import org.apache.lucene.util.MathUtil;
  *
  * @lucene.experimental */
 
-public final class BKDReader extends PointValues implements Accountable {
-
-  private static abstract class BKDInput extends DataInput implements Cloneable {
-    abstract long ramBytesUsed();
-
-    abstract int getPosition();
-    abstract void setPosition(int pos) throws IOException;
-
-    @Override
-    public BKDInput clone() {
-      return (BKDInput)super.clone();
-    }
-  }
-
-  private static class BKDOffHeapInput extends BKDInput implements Cloneable {
-
-    private final IndexInput packedIndex;
-
-    BKDOffHeapInput(IndexInput packedIndex) {
-      this.packedIndex = packedIndex;
-    }
-
-    @Override
-    public BKDOffHeapInput clone() {
-        return new BKDOffHeapInput(packedIndex.clone());
-    }
-
-    @Override
-    long ramBytesUsed() {
-      return 0;
-    }
-
-    @Override
-    int getPosition() {
-      return (int)packedIndex.getFilePointer();
-    }
-
-    @Override
-    void setPosition(int pos) throws IOException {
-        packedIndex.seek(pos);
-    }
-
-    @Override
-    public byte readByte() throws IOException {
-      return packedIndex.readByte();
-    }
-
-    @Override
-    public void readBytes(byte[] b, int offset, int len) throws IOException {
-      packedIndex.readBytes(b, offset, len);
-    }
-  }
-
-  private static class BKDOnHeapInput extends BKDInput implements Cloneable {
-
-    private final ByteArrayDataInput packedIndex;
-
-    BKDOnHeapInput(IndexInput packedIndex, int numBytes) throws IOException {
-      byte[] packedBytes = new byte[numBytes];
-      packedIndex.readBytes(packedBytes, 0, numBytes);
-      this.packedIndex = new ByteArrayDataInput(packedBytes);
-    }
-
-    private BKDOnHeapInput(ByteArrayDataInput packedIndex) {
-      this.packedIndex = packedIndex;
-    }
-
-    @Override
-    public BKDOnHeapInput clone() {
-      return new BKDOnHeapInput((ByteArrayDataInput)packedIndex.clone());
-    }
-
-    @Override
-    long ramBytesUsed() {
-      return packedIndex.length();
-    }
-
-    @Override
-    int getPosition() {
-      return packedIndex.getPosition();
-    }
-
-    @Override
-    void setPosition(int pos) {
-      packedIndex.setPosition(pos);
-    }
-
-    @Override
-    public byte readByte() throws IOException {
-      return packedIndex.readByte();
-    }
-
-    @Override
-    public void readBytes(byte[] b, int offset, int len) throws IOException {
-      packedIndex.readBytes(b, offset, len);
-    }
-  }
+public final class BKDReader extends PointValues {
 
   // Packed array of byte[] holding all split values in the full binary tree:
   final int leafNodeOffset;
@@ -151,18 +51,11 @@ public final class BKDReader extends PointValues implements Accountable {
   protected final int packedIndexBytesLength;
   final long minLeafBlockFP;
 
-  final BKDInput packedIndex;
+  final IndexInput packedIndex;
 
-  /** Caller must pre-seek the provided {@link IndexInput} to the index location that {@link BKDWriter#finish} returned */
+  /** Caller must pre-seek the provided {@link IndexInput} to the index location that {@link BKDWriter#finish} returned.
+   * BKD tree is always stored off-heap. */
   public BKDReader(IndexInput metaIn, IndexInput indexIn, IndexInput dataIn) throws IOException {
-    this(metaIn, indexIn, dataIn, indexIn instanceof ByteBufferIndexInput);
-  }
-
-  /**
-   * Caller must pre-seek the provided {@link IndexInput} to the index location that {@link BKDWriter#finish} returned
-   * and specify {@code true} to store BKD off-heap ({@code false} otherwise)
-   */
-  public BKDReader(IndexInput metaIn, IndexInput indexIn, IndexInput dataIn, boolean offHeap) throws IOException {
     version = CodecUtil.checkHeader(metaIn, BKDWriter.CODEC_NAME, BKDWriter.VERSION_START, BKDWriter.VERSION_CURRENT);
     numDataDims = metaIn.readVInt();
     if (version >= BKDWriter.VERSION_SELECTIVE_INDEXING) {
@@ -205,13 +98,7 @@ public final class BKDReader extends PointValues implements Accountable {
       minLeafBlockFP = indexIn.readVLong();
       indexIn.seek(indexStartPointer);
     }
-    IndexInput slice = indexIn.slice("packedIndex", indexStartPointer, numIndexBytes);
-    if (offHeap) {
-      packedIndex = new BKDOffHeapInput(slice);
-    } else {
-      packedIndex = new BKDOnHeapInput(slice, numIndexBytes);
-    }
-
+    this.packedIndex = indexIn.slice("packedIndex", indexStartPointer, numIndexBytes);
     this.in = dataIn;
   }
 
@@ -219,7 +106,7 @@ public final class BKDReader extends PointValues implements Accountable {
     return minLeafBlockFP;
   }
 
-  /** Used to walk the in-heap index. The format takes advantage of the limited
+  /** Used to walk the off-heap index. The format takes advantage of the limited
    *  access pattern to the BKD tree at search time, i.e. starting at the root
    *  node and recursing downwards one child at a time.
    *  @lucene.internal */
@@ -229,13 +116,11 @@ public final class BKDReader extends PointValues implements Accountable {
     private int level;
     private int splitDim;
     private final byte[][] splitPackedValueStack;
-    // used to read the packed byte[]
-    private final BKDInput in;
+    // used to read the packed tree off-heap
+    private final IndexInput in;
     // holds the minimum (left most) leaf block file pointer for each level we've recursed to:
     private final long[] leafBlockFPStack;
-    // holds the address, in the packed byte[] index, of the left-node of each level:
-    private final int[] leftNodePositions;
-    // holds the address, in the packed byte[] index, of the right-node of each level:
+    // holds the address, in the off-heap index, of the right-node of each level:
     private final int[] rightNodePositions;
     // holds the splitDim for each level:
     private final int[] splitDims;
@@ -249,52 +134,41 @@ public final class BKDReader extends PointValues implements Accountable {
     private final BytesRef scratch;
 
     IndexTree() {
+      this(packedIndex.clone(), 1, 1);
+      // read root node
+      readNodeData(false);
+    }
+
+    private IndexTree(IndexInput in, int nodeID, int level) {
       int treeDepth = getTreeDepth();
       splitPackedValueStack = new byte[treeDepth+1][];
-      nodeID = 1;
-      level = 1;
+      this.nodeID = nodeID;
+      this.level = level;
       splitPackedValueStack[level] = new byte[packedIndexBytesLength];
       leafBlockFPStack = new long[treeDepth+1];
-      leftNodePositions = new int[treeDepth+1];
       rightNodePositions = new int[treeDepth+1];
       splitValuesStack = new byte[treeDepth+1][];
       splitDims = new int[treeDepth+1];
       negativeDeltas = new boolean[numIndexDims*(treeDepth+1)];
-
-      in = packedIndex.clone();
+      this.in = in;
       splitValuesStack[0] = new byte[packedIndexBytesLength];
-      readNodeData(false);
       scratch = new BytesRef();
       scratch.length = bytesPerDim;
     }      
 
     public void pushLeft() {
-      int nodePosition = leftNodePositions[level];
       nodeID *= 2;
       level++;
-      if (splitPackedValueStack[level] == null) {
-        splitPackedValueStack[level] = new byte[packedIndexBytesLength];
-      }
-      System.arraycopy(negativeDeltas, (level-1)*numIndexDims, negativeDeltas, level*numIndexDims, numIndexDims);
-      assert splitDim != -1;
-      negativeDeltas[level*numIndexDims+splitDim] = true;
-      try {
-        in.setPosition(nodePosition);
-      } catch (IOException e) {
-        throw new UncheckedIOException(e);
-      }
       readNodeData(true);
     }
     
     /** Clone, but you are not allowed to pop up past the point where the clone happened. */
     @Override
     public IndexTree clone() {
-      IndexTree index = new IndexTree();
-      index.nodeID = nodeID;
-      index.level = level;
+      IndexTree index = new IndexTree(in.clone(), nodeID, level);
+      // copy node data
       index.splitDim = splitDim;
       index.leafBlockFPStack[level] = leafBlockFPStack[level];
-      index.leftNodePositions[level] = leftNodePositions[level];
       index.rightNodePositions[level] = rightNodePositions[level];
       index.splitValuesStack[index.level] = splitValuesStack[index.level].clone();
       System.arraycopy(negativeDeltas, level*numIndexDims, index.negativeDeltas, level*numIndexDims, numIndexDims);
@@ -303,17 +177,12 @@ public final class BKDReader extends PointValues implements Accountable {
     }
     
     public void pushRight() {
-      int nodePosition = rightNodePositions[level];
+      final int nodePosition = rightNodePositions[level];
+      assert nodePosition >= in.getFilePointer() : "nodePosition = " + nodePosition + " < currentPosition=" + in.getFilePointer();
       nodeID = nodeID * 2 + 1;
       level++;
-      if (splitPackedValueStack[level] == null) {
-        splitPackedValueStack[level] = new byte[packedIndexBytesLength];
-      }
-      System.arraycopy(negativeDeltas, (level-1)*numIndexDims, negativeDeltas, level*numIndexDims, numIndexDims);
-      assert splitDim != -1;
-      negativeDeltas[level*numIndexDims+splitDim] = false;
       try {
-        in.setPosition(nodePosition);
+        in.seek(nodePosition);
       } catch (IOException e) {
         throw new UncheckedIOException(e);
       }
@@ -401,6 +270,13 @@ public final class BKDReader extends PointValues implements Accountable {
     }
 
     private void readNodeData(boolean isLeft) {
+      if (splitPackedValueStack[level] == null) {
+        splitPackedValueStack[level] = new byte[packedIndexBytesLength];
+      }
+      System.arraycopy(negativeDeltas, (level-1)*numIndexDims, negativeDeltas, level*numIndexDims, numIndexDims);
+      assert splitDim != -1;
+      negativeDeltas[level*numIndexDims+splitDim] = isLeft;
+
       try {
         leafBlockFPStack[level] = leafBlockFPStack[level - 1];
 
@@ -443,9 +319,7 @@ public final class BKDReader extends PointValues implements Accountable {
           } else {
             leftNumBytes = 0;
           }
-
-          leftNodePositions[level] = in.getPosition();
-          rightNodePositions[level] = leftNodePositions[level] + leftNumBytes;
+          rightNodePositions[level] = Math.toIntExact(in.getFilePointer()) + leftNumBytes;
         }
       } catch (IOException e) {
         throw new UncheckedIOException(e);
@@ -870,11 +744,6 @@ public final class BKDReader extends PointValues implements Accountable {
   }
 
   @Override
-  public long ramBytesUsed() {
-    return packedIndex.ramBytesUsed();
-  }
-
-  @Override
   public byte[] getMinPackedValue() {
     return minPackedValue.clone();
   }
diff --git a/lucene/core/src/test/org/apache/lucene/util/bkd/TestBKD.java b/lucene/core/src/test/org/apache/lucene/util/bkd/TestBKD.java
index 3518398..f7e1b55 100644
--- a/lucene/core/src/test/org/apache/lucene/util/bkd/TestBKD.java
+++ b/lucene/core/src/test/org/apache/lucene/util/bkd/TestBKD.java
@@ -47,8 +47,6 @@ import org.apache.lucene.util.LuceneTestCase;
 import org.apache.lucene.util.NumericUtils;
 import org.apache.lucene.util.TestUtil;
 
-import static com.carrotsearch.randomizedtesting.RandomizedTest.randomBoolean;
-
 public class TestBKD extends LuceneTestCase {
 
   public void testBasicInts1D() throws Exception {
@@ -69,7 +67,7 @@ public class TestBKD extends LuceneTestCase {
 
       try (IndexInput in = dir.openInput("bkd", IOContext.DEFAULT)) {
         in.seek(indexFP);
-        BKDReader r = new BKDReader(in, in, in, false);//randomBoolean());
+        BKDReader r = new BKDReader(in, in, in);
 
         // Simple 1D range query:
         final int queryMin = 42;
@@ -173,7 +171,7 @@ public class TestBKD extends LuceneTestCase {
 
       try (IndexInput in = dir.openInput("bkd", IOContext.DEFAULT)) {
         in.seek(indexFP);
-        BKDReader r = new BKDReader(in, in, in, randomBoolean());
+        BKDReader r = new BKDReader(in, in, in);
 
         byte[] minPackedValue = r.getMinPackedValue();
         byte[] maxPackedValue = r.getMaxPackedValue();
@@ -303,7 +301,7 @@ public class TestBKD extends LuceneTestCase {
 
       try (IndexInput in = dir.openInput("bkd", IOContext.DEFAULT)) {
         in.seek(indexFP);
-        BKDReader r = new BKDReader(in, in, in, randomBoolean());
+        BKDReader r = new BKDReader(in, in, in);
 
         int iters = atLeast(100);
         for(int iter=0;iter<iters;iter++) {
@@ -799,7 +797,7 @@ public class TestBKD extends LuceneTestCase {
         List<BKDReader> readers = new ArrayList<>();
         for(long fp : toMerge) {
           in.seek(fp);
-          readers.add(new BKDReader(in, in, in, randomBoolean()));
+          readers.add(new BKDReader(in, in, in));
         }
         out = dir.createOutput("bkd2", IOContext.DEFAULT);
         Runnable finalizer = w.merge(out, out, out, docMaps, readers);
@@ -817,7 +815,7 @@ public class TestBKD extends LuceneTestCase {
       }
 
       in.seek(indexFP);
-      BKDReader r = new BKDReader(in, in, in, randomBoolean());
+      BKDReader r = new BKDReader(in, in, in);
 
       int iters = atLeast(100);
       for(int iter=0;iter<iters;iter++) {
@@ -1093,7 +1091,7 @@ public class TestBKD extends LuceneTestCase {
 
       IndexInput in = dir.openInput("bkd", IOContext.DEFAULT);
       in.seek(fp);
-      BKDReader r = new BKDReader(in, in, in, randomBoolean());
+      BKDReader r = new BKDReader(in, in, in);
       r.intersect(new IntersectVisitor() {
           int lastDocID = -1;
 
@@ -1211,7 +1209,7 @@ public class TestBKD extends LuceneTestCase {
 
       IndexInput in = dir.openInput("bkd", IOContext.DEFAULT);
       in.seek(fp);
-      BKDReader r = new BKDReader(in, in, in, randomBoolean());
+      BKDReader r = new BKDReader(in, in, in);
       int[] count = new int[1];
       r.intersect(new IntersectVisitor() {
 
@@ -1269,7 +1267,7 @@ public class TestBKD extends LuceneTestCase {
 
     IndexInput in = dir.openInput("bkd", IOContext.DEFAULT);
     in.seek(fp);
-    BKDReader r = new BKDReader(in, in, in, randomBoolean());
+    BKDReader r = new BKDReader(in, in, in);
     int[] count = new int[1];
     r.intersect(new IntersectVisitor() {