You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2015/08/11 22:33:50 UTC

svn commit: r1695370 [2/2] - in /lucene/dev/branches/lucene6699/lucene: sandbox/src/java/org/apache/lucene/bkdtree/ spatial3d/src/java/org/apache/lucene/bkdtree3d/ spatial3d/src/test/org/apache/lucene/bkdtree3d/

Added: lucene/dev/branches/lucene6699/lucene/spatial3d/src/java/org/apache/lucene/bkdtree3d/BKD3DTreeWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6699/lucene/spatial3d/src/java/org/apache/lucene/bkdtree3d/BKD3DTreeWriter.java?rev=1695370&view=auto
==============================================================================
--- lucene/dev/branches/lucene6699/lucene/spatial3d/src/java/org/apache/lucene/bkdtree3d/BKD3DTreeWriter.java (added)
+++ lucene/dev/branches/lucene6699/lucene/spatial3d/src/java/org/apache/lucene/bkdtree3d/BKD3DTreeWriter.java Tue Aug 11 20:33:49 2015
@@ -0,0 +1,954 @@
+package org.apache.lucene.bkdtree3d;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.nio.file.DirectoryStream;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.Arrays;
+import java.util.Comparator;
+
+import org.apache.lucene.store.ByteArrayDataInput;
+import org.apache.lucene.store.ByteArrayDataOutput;
+import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.BytesRefBuilder;
+import org.apache.lucene.util.IOUtils;
+import org.apache.lucene.util.InPlaceMergeSorter;
+import org.apache.lucene.util.LongBitSet;
+import org.apache.lucene.util.OfflineSorter.ByteSequencesWriter;
+import org.apache.lucene.util.OfflineSorter;
+import org.apache.lucene.util.RamUsageEstimator;
+
+// TODO
+//   - we could also index "auto-prefix terms" here, and use better compression, and maybe only use for the "fully contained" case so we'd
+//     only index docIDs
+//   - the index could be efficiently encoded as an FST, so we don't have wasteful
+//     (monotonic) long[] leafBlockFPs; or we could use MonotonicLongValues ... but then
+//     the index is already plenty small: 60M OSM points --> 1.1 MB with 128 points
+//     per leaf, and you can reduce that by putting more points per leaf
+//   - we can quantize the split values to 2 bytes (short): http://people.csail.mit.edu/tmertens/papers/qkdtree.pdf
+//   - we could use threads while building; the higher nodes are very parallelizable
+//   - generalize to N dimenions? i think there are reasonable use cases here, e.g.
+//     2 dimensional points to store houses, plus e.g. 3rd dimension for "household income"
+
+/** Recursively builds a BKD tree to assign all incoming points to smaller
+ *  and smaller rectangles until the number of points in a given
+ *  rectangle is &lt= the <code>maxPointsInLeafNode</code>.  The tree is
+ *  fully balanced, which means the leaf nodes will have between 50% and 100% of
+ *  the requested <code>maxPointsInLeafNode</code>, except for the adversarial case
+ *  of indexing exactly the same point many times.
+ *
+ *  <p>
+ *  See <a href="https://www.cs.duke.edu/~pankaj/publications/papers/bkd-sstd.pdf">this paper</a> for details.
+ *
+ *  <p>This consumes heap during writing: it allocates a <code>LongBitSet(numPoints)</code>, 
+ *  and for any nodes with fewer than <code>maxPointsSortInHeap</code>, it holds
+ *  the points in memory as simple java arrays.
+ *
+ *  <p>
+ *  <b>NOTE</b>: This can write at most Integer.MAX_VALUE * <code>maxPointsInLeafNode</code> total points.
+ *
+ * @lucene.experimental */
+
+class BKD3DTreeWriter {
+
+  // x (int), y (int), z (int) + ord (long) + docID (int)
+  static final int BYTES_PER_DOC = RamUsageEstimator.NUM_BYTES_LONG + 4 * RamUsageEstimator.NUM_BYTES_INT;
+
+  // nocommit removeme
+  static final boolean DEBUG = false;
+
+  public static final int DEFAULT_MAX_POINTS_IN_LEAF_NODE = 1024;
+
+  /** This works out to max of ~10 MB peak heap tied up during writing: */
+  public static final int DEFAULT_MAX_POINTS_SORT_IN_HEAP = 128*1024;;
+
+  private final byte[] scratchBytes = new byte[BYTES_PER_DOC];
+  private final ByteArrayDataOutput scratchBytesOutput = new ByteArrayDataOutput(scratchBytes);
+
+  private OfflineSorter.ByteSequencesWriter writer;
+  private GrowingHeapWriter heapWriter;
+
+  private Path tempInput;
+  private Path tempDir;
+  private final int maxPointsInLeafNode;
+  private final int maxPointsSortInHeap;
+
+  private long pointCount;
+  private int globalMinX = Integer.MAX_VALUE;
+  private int globalMaxX = Integer.MIN_VALUE;
+  private int globalMinY = Integer.MAX_VALUE;
+  private int globalMaxY = Integer.MIN_VALUE;
+  private int globalMinZ = Integer.MAX_VALUE;
+  private int globalMaxZ = Integer.MIN_VALUE;
+
+  public BKD3DTreeWriter() throws IOException {
+    this(DEFAULT_MAX_POINTS_IN_LEAF_NODE, DEFAULT_MAX_POINTS_SORT_IN_HEAP);
+  }
+
+  // TODO: instead of maxPointsSortInHeap, change to maxMBHeap ... the mapping is non-obvious:
+  public BKD3DTreeWriter(int maxPointsInLeafNode, int maxPointsSortInHeap) throws IOException {
+    verifyParams(maxPointsInLeafNode, maxPointsSortInHeap);
+    this.maxPointsInLeafNode = maxPointsInLeafNode;
+    this.maxPointsSortInHeap = maxPointsSortInHeap;
+
+    // We write first maxPointsSortInHeap in heap, then cutover to offline for additional points:
+    heapWriter = new GrowingHeapWriter(maxPointsSortInHeap);
+  }
+
+  public static void verifyParams(int maxPointsInLeafNode, int maxPointsSortInHeap) {
+    if (maxPointsInLeafNode <= 0) {
+      throw new IllegalArgumentException("maxPointsInLeafNode must be > 0; got " + maxPointsInLeafNode);
+    }
+    if (maxPointsInLeafNode > ArrayUtil.MAX_ARRAY_LENGTH) {
+      throw new IllegalArgumentException("maxPointsInLeafNode must be <= ArrayUtil.MAX_ARRAY_LENGTH (= " + ArrayUtil.MAX_ARRAY_LENGTH + "); got " + maxPointsInLeafNode);
+    }
+    if (maxPointsSortInHeap < maxPointsInLeafNode) {
+      throw new IllegalArgumentException("maxPointsSortInHeap must be >= maxPointsInLeafNode; got " + maxPointsSortInHeap + " vs maxPointsInLeafNode="+ maxPointsInLeafNode);
+    }
+    if (maxPointsSortInHeap > ArrayUtil.MAX_ARRAY_LENGTH) {
+      throw new IllegalArgumentException("maxPointsSortInHeap must be <= ArrayUtil.MAX_ARRAY_LENGTH (= " + ArrayUtil.MAX_ARRAY_LENGTH + "); got " + maxPointsSortInHeap);
+    }
+  }
+
+  /** If the current segment has too many points then we switchover to temp files / offline sort. */
+  private void switchToOffline() throws IOException {
+
+    // OfflineSorter isn't thread safe, but our own private tempDir works around this:
+    tempDir = Files.createTempDirectory(OfflineSorter.defaultTempDir(), BKD3DTreeWriter.class.getSimpleName());
+
+    // For each .add we just append to this input file, then in .finish we sort this input and resursively build the tree:
+    tempInput = tempDir.resolve("in");
+    writer = new OfflineSorter.ByteSequencesWriter(tempInput);
+    for(int i=0;i<pointCount;i++) {
+      scratchBytesOutput.reset(scratchBytes);
+      scratchBytesOutput.writeInt(heapWriter.xs[i]);
+      scratchBytesOutput.writeInt(heapWriter.ys[i]);
+      scratchBytesOutput.writeInt(heapWriter.zs[i]);
+      scratchBytesOutput.writeVInt(heapWriter.docIDs[i]);
+      scratchBytesOutput.writeVLong(i);
+      // TODO: can/should OfflineSorter optimize the fixed-width case?
+      writer.write(scratchBytes, 0, scratchBytes.length);
+    }
+
+    heapWriter = null;
+  }
+
+  public void add(int x, int y, int z, int docID) throws IOException {
+
+    if (pointCount >= maxPointsSortInHeap) {
+      if (writer == null) {
+        switchToOffline();
+      }
+      scratchBytesOutput.reset(scratchBytes);
+      scratchBytesOutput.writeInt(x);
+      scratchBytesOutput.writeInt(y);
+      scratchBytesOutput.writeInt(z);
+      scratchBytesOutput.writeVInt(docID);
+      scratchBytesOutput.writeVLong(pointCount);
+      writer.write(scratchBytes, 0, scratchBytes.length);
+    } else {
+      // Not too many points added yet, continue using heap:
+      heapWriter.append(x, y, z, pointCount, docID);
+    }
+
+    pointCount++;
+    globalMinX = Math.min(globalMinX, x);
+    globalMaxX = Math.max(globalMaxX, x);
+    globalMinY = Math.min(globalMinY, y);
+    globalMaxY = Math.max(globalMaxY, y);
+    globalMinZ = Math.min(globalMinZ, z);
+    globalMaxZ = Math.max(globalMaxZ, z);
+  }
+
+  /** Changes incoming {@link ByteSequencesWriter} file to to fixed-width-per-entry file, because we need to be able to slice
+   *  as we recurse in {@link #build}. */
+  private Writer convertToFixedWidth(Path in) throws IOException {
+    BytesRefBuilder scratch = new BytesRefBuilder();
+    scratch.grow(BYTES_PER_DOC);
+    BytesRef bytes = scratch.get();
+    ByteArrayDataInput dataReader = new ByteArrayDataInput();
+
+    OfflineSorter.ByteSequencesReader reader = null;
+    Writer sortedWriter = null;
+    boolean success = false;
+    try {
+      reader = new OfflineSorter.ByteSequencesReader(in);
+      sortedWriter = getWriter(pointCount);
+      for (long i=0;i<pointCount;i++) {
+        boolean result = reader.read(scratch);
+        assert result;
+        dataReader.reset(bytes.bytes, bytes.offset, bytes.length);
+        int x = dataReader.readInt();
+        int y = dataReader.readInt();
+        int z = dataReader.readInt();
+        int docID = dataReader.readVInt();
+        long ord = dataReader.readVLong();
+        assert docID >= 0: "docID=" + docID;
+        sortedWriter.append(x, y, z, ord, docID);
+      }
+      success = true;
+    } finally {
+      if (success) {
+        IOUtils.close(sortedWriter, reader);
+      } else {
+        IOUtils.closeWhileHandlingException(reader);
+        try {
+          sortedWriter.destroy();
+        } catch (Throwable t) {
+          // Suppress to keep throwing original exc
+        }
+      }
+    }
+
+    return sortedWriter;
+  }
+
+  /** dim: 0=x, 1=y, 2=z */
+  private Writer sort(int dim) throws IOException {
+    if (heapWriter != null) {
+
+      assert pointCount < Integer.MAX_VALUE;
+
+      // All buffered points are still in heap
+      new InPlaceMergeSorter() {
+        @Override
+        protected void swap(int i, int j) {
+          int docID = heapWriter.docIDs[i];
+          heapWriter.docIDs[i] = heapWriter.docIDs[j];
+          heapWriter.docIDs[j] = docID;
+
+          long ord = heapWriter.ords[i];
+          heapWriter.ords[i] = heapWriter.ords[j];
+          heapWriter.ords[j] = ord;
+
+          int x = heapWriter.xs[i];
+          heapWriter.xs[i] = heapWriter.xs[j];
+          heapWriter.xs[j] = x;
+
+          int y = heapWriter.ys[i];
+          heapWriter.ys[i] = heapWriter.ys[j];
+          heapWriter.ys[j] = y;
+
+          int z = heapWriter.zs[i];
+          heapWriter.zs[i] = heapWriter.zs[j];
+          heapWriter.zs[j] = z;
+        }
+
+        @Override
+        protected int compare(int i, int j) {
+          int cmp;
+          if (dim == 0) {
+            cmp = Integer.compare(heapWriter.xs[i], heapWriter.xs[j]);
+          } else if (dim == 1) {
+            cmp = Integer.compare(heapWriter.ys[i], heapWriter.ys[j]);
+          } else {
+            cmp = Integer.compare(heapWriter.zs[i], heapWriter.zs[j]);
+          }
+          if (cmp != 0) {
+            return cmp;
+          }
+
+          // Tie-break
+          cmp = Integer.compare(heapWriter.docIDs[i], heapWriter.docIDs[j]);
+          if (cmp != 0) {
+            return cmp;
+          }
+
+          return Long.compare(heapWriter.ords[i], heapWriter.ords[j]);
+        }
+      }.sort(0, (int) pointCount);
+
+      HeapWriter sorted = new HeapWriter((int) pointCount);
+      //System.out.println("sorted dim=" + dim);
+      for(int i=0;i<pointCount;i++) {
+        /*
+        System.out.println("  docID=" + heapWriter.docIDs[i] + 
+                           " x=" + heapWriter.xs[i] +
+                           " y=" + heapWriter.ys[i] +
+                           " z=" + heapWriter.zs[i]);
+        */
+        sorted.append(heapWriter.xs[i],
+                      heapWriter.ys[i],
+                      heapWriter.zs[i],
+                      heapWriter.ords[i],
+                      heapWriter.docIDs[i]);
+      }
+
+      return sorted;
+    } else {
+
+      // Offline sort:
+      assert tempDir != null;
+
+      final ByteArrayDataInput reader = new ByteArrayDataInput();
+      Comparator<BytesRef> cmp = new Comparator<BytesRef>() {
+        private final ByteArrayDataInput readerB = new ByteArrayDataInput();
+
+        @Override
+        public int compare(BytesRef a, BytesRef b) {
+          reader.reset(a.bytes, a.offset, a.length);
+          final int xa = reader.readInt();
+          final int ya = reader.readInt();
+          final int za = reader.readInt();
+          final int docIDA = reader.readVInt();
+          final long ordA = reader.readVLong();
+
+          reader.reset(b.bytes, b.offset, b.length);
+          final int xb = reader.readInt();
+          final int yb = reader.readInt();
+          final int zb = reader.readInt();
+          final int docIDB = reader.readVInt();
+          final long ordB = reader.readVLong();
+
+          int cmp;
+          if (dim == 0) {
+            cmp = Integer.compare(xa, xb);
+          } else if (dim == 1) {
+            cmp = Integer.compare(ya, yb);
+          } else {
+            cmp = Integer.compare(za, zb);
+          }
+          if (cmp != 0) {
+            return cmp;
+          }
+
+          // Tie-break
+          cmp = Integer.compare(docIDA, docIDB);
+          if (cmp != 0) {
+            return cmp;
+          }
+
+          return Long.compare(ordA, ordB);
+        }
+      };
+
+      Path sorted = tempDir.resolve("sorted");
+      boolean success = false;
+      try {
+        OfflineSorter sorter = new OfflineSorter(cmp, OfflineSorter.BufferSize.automatic(), tempDir, OfflineSorter.MAX_TEMPFILES);
+        sorter.sort(tempInput, sorted);
+        Writer writer = convertToFixedWidth(sorted);
+        success = true;
+        return writer;
+      } finally {
+        if (success) {
+          IOUtils.rm(sorted);
+        } else {
+          IOUtils.deleteFilesIgnoringExceptions(sorted);
+        }
+      }
+    }
+  }
+
+  /** Writes the BKD tree to the provided {@link IndexOutput} and returns the file offset where index was written. */
+  public long finish(IndexOutput out) throws IOException {
+    //System.out.println("\nBKDTreeWriter.finish pointCount=" + pointCount + " out=" + out + " heapWriter=" + heapWriter);
+
+    if (writer != null) {
+      writer.close();
+    }
+
+    LongBitSet bitSet = new LongBitSet(pointCount);
+
+    long countPerLeaf = pointCount;
+    long innerNodeCount = 1;
+
+    while (countPerLeaf > maxPointsInLeafNode) {
+      countPerLeaf /= 2;
+      innerNodeCount *= 2;
+    }
+
+    //System.out.println("innerNodeCount=" + innerNodeCount);
+
+    if (1+2*innerNodeCount >= Integer.MAX_VALUE) {
+      throw new IllegalStateException("too many nodes; increase maxPointsInLeafNode (currently " + maxPointsInLeafNode + ") and reindex");
+    }
+
+    innerNodeCount--;
+
+    int numLeaves = (int) (innerNodeCount+1);
+    //System.out.println("  numLeaves=" + numLeaves);
+
+    // Indexed by nodeID, but first (root) nodeID is 1
+    int[] splitValues = new int[numLeaves];
+
+    // +1 because leaf count is power of 2 (e.g. 8), and innerNodeCount is power of 2 minus 1 (e.g. 7)
+    long[] leafBlockFPs = new long[numLeaves];
+
+    // Make sure the math above "worked":
+    assert pointCount / splitValues.length <= maxPointsInLeafNode: "pointCount=" + pointCount + " splitValues.length=" + splitValues.length + " maxPointsInLeafNode=" + maxPointsInLeafNode;
+    //System.out.println("  avg pointsPerLeaf=" + (pointCount/splitValues.length));
+
+    // Sort all docs once by x, once by y, once by z:
+    Writer xSortedWriter = null;
+    Writer ySortedWriter = null;
+    Writer zSortedWriter = null;
+
+    boolean success = false;
+    try {
+      xSortedWriter = sort(0);
+      ySortedWriter = sort(1);
+      zSortedWriter = sort(2);
+      heapWriter = null;
+
+      build(1, numLeaves,
+            new PathSlice(xSortedWriter, 0, pointCount),
+            new PathSlice(ySortedWriter, 0, pointCount),
+            new PathSlice(zSortedWriter, 0, pointCount),
+            bitSet, out,
+            globalMinX, globalMaxX,
+            globalMinY, globalMaxY,
+            globalMinZ, globalMaxZ,
+            splitValues,
+            leafBlockFPs);
+      success = true;
+    } finally {
+      if (success) {
+        xSortedWriter.destroy();
+        ySortedWriter.destroy();
+        zSortedWriter.destroy();
+        IOUtils.rm(tempInput);
+      } else {
+        try {
+          xSortedWriter.destroy();
+        } catch (Throwable t) {
+          // Suppress to keep throwing original exc
+        }
+        try {
+          ySortedWriter.destroy();
+        } catch (Throwable t) {
+          // Suppress to keep throwing original exc
+        }
+        try {
+          zSortedWriter.destroy();
+        } catch (Throwable t) {
+          // Suppress to keep throwing original exc
+        }
+        IOUtils.deleteFilesIgnoringExceptions(tempInput);
+      }
+    }
+
+    //System.out.println("Total nodes: " + innerNodeCount);
+
+    // Write index:
+    long indexFP = out.getFilePointer();
+    //System.out.println("indexFP=" + indexFP);
+    out.writeInt(globalMinX);
+    out.writeInt(globalMaxX);
+    out.writeInt(globalMinY);
+    out.writeInt(globalMaxY);
+    out.writeInt(globalMinZ);
+    out.writeInt(globalMaxZ);
+    out.writeVInt(numLeaves);
+
+    // NOTE: splitValues[0] is unused, because nodeID is 1-based:
+    for (int i=0;i<splitValues.length;i++) {
+      out.writeInt(splitValues[i]);
+    }
+    for (int i=0;i<leafBlockFPs.length;i++) {
+      out.writeVLong(leafBlockFPs[i]);
+    }
+
+    if (tempDir != null) {
+      // If we had to go offline, we should have removed all temp files we wrote:
+      assert directoryIsEmpty(tempDir);
+      IOUtils.rm(tempDir);
+    }
+
+    return indexFP;
+  }
+
+  // Called only from assert
+  private boolean directoryIsEmpty(Path in) {
+    try (DirectoryStream<Path> dir = Files.newDirectoryStream(in)) {
+      for (Path path : dir) {
+        assert false: "dir=" + in + " still has file=" + path;
+        return false;
+      }
+    } catch (IOException ioe) {
+      // Just ignore: we are only called from assert
+    }
+    return true;
+  }
+
+  /** Sliced reference to points in an OfflineSorter.ByteSequencesWriter file. */
+  private static final class PathSlice {
+    final Writer writer;
+    final long start;
+    final long count;
+
+    public PathSlice(Writer writer, long start, long count) {
+      this.writer = writer;
+      this.start = start;
+      this.count = count;
+    }
+
+    @Override
+    public String toString() {
+      return "PathSlice(start=" + start + " count=" + count + " writer=" + writer + ")";
+    }
+  }
+
+  /** Marks bits for the ords (points) that belong in the left sub tree. */
+  private int markLeftTree(int splitDim, PathSlice source, LongBitSet bitSet,
+                           int minX, int maxX,
+                           int minY, int maxY,
+                           int minZ, int maxZ) throws IOException {
+
+    // This is the size of our left tree
+    long leftCount = source.count / 2;
+
+    // Read the split value:
+    //if (DEBUG) System.out.println("  leftCount=" + leftCount + " vs " + source.count);
+    Reader reader = source.writer.getReader(source.start + leftCount);
+    boolean success = false;
+    int splitValue;
+    try {
+      boolean result = reader.next();
+      assert result;
+
+      int x = reader.x();
+      assert x >= minX && x <= maxX: "x=" + x + " minX=" + minX + " maxX=" + maxX;
+
+      int y = reader.y();
+      assert y >= minY && y <= maxY: "y=" + y + " minY=" + minY + " maxY=" + maxY;
+
+      int z = reader.z();
+      assert z >= minZ && z <= maxZ: "z=" + z + " minZ=" + minZ + " maxZ=" + maxZ;
+
+      if (splitDim == 0) {
+        splitValue = x;
+      } else if (splitDim == 1) {
+        splitValue = y;
+      } else {
+        splitValue = z;
+      }
+      success = true;
+    } finally {
+      if (success) {
+        IOUtils.close(reader);
+      } else {
+        IOUtils.closeWhileHandlingException(reader);
+      }
+    }
+
+    // Mark ords that fall into the left half, and also handle the == boundary case:
+    assert bitSet.cardinality() == 0: "cardinality=" + bitSet.cardinality();
+
+    success = false;
+    reader = source.writer.getReader(source.start);
+    try {
+      int lastValue = Integer.MIN_VALUE;
+      for (int i=0;i<leftCount;i++) {
+        boolean result = reader.next();
+        assert result;
+        int x = reader.x();
+        int y = reader.y();
+        int z = reader.z();
+
+        int value;
+        if (splitDim == 0) {
+          value = x;
+        } else if (splitDim == 1) {
+          value = y;
+        } else {
+          value = z;
+        }
+
+        // Our input source is supposed to be sorted on the incoming dimension:
+        assert value >= lastValue;
+        lastValue = value;
+
+        assert value <= splitValue: "i=" + i + " value=" + value + " vs splitValue=" + splitValue;
+        long ord = reader.ord();
+        int docID = reader.docID();
+        assert docID >= 0: "docID=" + docID + " reader=" + reader;
+
+        // We should never see dup ords:
+        assert bitSet.get(ord) == false;
+        bitSet.set(ord);
+      }
+      success = true;
+    } finally {
+      if (success) {
+        IOUtils.close(reader);
+      } else {
+        IOUtils.closeWhileHandlingException(reader);
+      }
+    }
+
+    assert leftCount == bitSet.cardinality(): "leftCount=" + leftCount + " cardinality=" + bitSet.cardinality();
+
+    return splitValue;
+  }
+
+  // Split on the dim with the largest range:
+  static int getSplitDim(int minX, int maxX, int minY, int maxY, int minZ, int maxZ) {
+    long xRange = (long) maxX - (long) minX;
+    long yRange = (long) maxY - (long) minY;
+    long zRange = (long) maxZ - (long) minZ;
+
+    if (xRange > yRange) {
+      if (xRange > zRange) {
+        return 0;
+      } else {
+        return 2;
+      }
+    } else if (yRange > zRange) {
+      return 1;
+    } else {
+      return 2;
+    }
+  }
+
+  /** The incoming PathSlice for the dim we will split is already partitioned/sorted. */
+  private void build(int nodeID, int leafNodeOffset,
+                     PathSlice lastXSorted,
+                     PathSlice lastYSorted,
+                     PathSlice lastZSorted,
+                     LongBitSet bitSet,
+                     IndexOutput out,
+                     int minX, int maxX,
+                     int minY, int maxY,
+                     int minZ, int maxZ,
+                     int[] splitValues,
+                     long[] leafBlockFPs) throws IOException {
+
+    assert lastXSorted.count == lastYSorted.count;
+    assert lastXSorted.count == lastZSorted.count;
+
+    if (DEBUG) System.out.println("\nBUILD: nodeID=" + nodeID + " leafNodeOffset=" + leafNodeOffset + "\n  lastXSorted=" + lastXSorted + "\n  lastYSorted=" + lastYSorted + "\n  lastZSorted=" + lastZSorted + "\n  count=" + lastXSorted.count + " x=" + minX + " TO " + maxX + " y=" + minY + " TO " + maxY + " z=" + minZ + " TO " + maxZ);
+
+    if (nodeID >= leafNodeOffset) {
+      // Leaf node: write block
+      if (DEBUG) System.out.println("  leaf");
+      assert maxX > minX;
+      assert maxY > minY;
+      assert maxZ > minZ;
+
+      //System.out.println("\nleaf:\n  lat range: " + ((long) maxLatEnc-minLatEnc));
+      //System.out.println("  lon range: " + ((long) maxLonEnc-minLonEnc));
+
+      // Sort by docID in the leaf so we get sequentiality at search time (may not matter?):
+      Reader reader = lastXSorted.writer.getReader(lastXSorted.start);
+
+      // nocommit we can reuse this?
+      int[] docIDs = new int[(int) lastXSorted.count];
+
+      boolean success = false;
+      try {
+        for (int i=0;i<lastXSorted.count;i++) {
+
+          // NOTE: we discard ord at this point; we only needed it temporarily
+          // during building to uniquely identify each point to properly handle
+          // the multi-valued case (one docID having multiple values):
+
+          // We also discard lat/lon, since at search time, we reside on the
+          // wrapped doc values for this:
+
+          boolean result = reader.next();
+          assert result;
+          docIDs[i] = reader.docID();
+        }
+        success = true;
+      } finally {
+        if (success) {
+          IOUtils.close(reader);
+        } else {
+          IOUtils.closeWhileHandlingException(reader);
+        }
+      }
+
+      Arrays.sort(docIDs);
+
+      // Dedup docIDs: for the multi-valued case where more than one value for the doc
+      // wound up in this leaf cell, we only need to store the docID once:
+      int lastDocID = -1;
+      int uniqueCount = 0;
+      for(int i=0;i<docIDs.length;i++) {
+        int docID = docIDs[i];
+        if (docID != lastDocID) {
+          uniqueCount++;
+          lastDocID = docID;
+        }
+      }
+      assert uniqueCount <= lastXSorted.count;
+
+      long startFP = out.getFilePointer();
+      out.writeVInt(uniqueCount);
+
+      // Save the block file pointer:
+      leafBlockFPs[nodeID - leafNodeOffset] = startFP;
+      //System.out.println("    leafFP=" + startFP);
+
+      lastDocID = -1;
+      for (int i=0;i<docIDs.length;i++) {
+        // Absolute int encode; with "vInt of deltas" encoding, the .kdd size dropped from
+        // 697 MB -> 539 MB, but query time for 225 queries went from 1.65 sec -> 2.64 sec.
+        // I think if we also indexed prefix terms here we could do less costly compression
+        // on those lists:
+        int docID = docIDs[i];
+        if (docID != lastDocID) {
+          out.writeInt(docID);
+          //System.out.println("  write docID=" + docID);
+          lastDocID = docID;
+        }
+      }
+      //long endFP = out.getFilePointer();
+      //System.out.println("  bytes/doc: " + ((endFP - startFP) / count));
+    } else {
+
+      int splitDim = getSplitDim(minX, maxX, minY, maxY, minZ, maxZ);
+      //System.out.println("  splitDim=" + splitDim);
+
+      PathSlice source;
+
+      if (splitDim == 0) {
+        source = lastXSorted;
+      } else if (splitDim == 1) {
+        source = lastYSorted;
+      } else {
+        source = lastZSorted;
+      }
+
+      long count = source.count;
+
+      // We let ties go to either side, so we should never get down to count == 0, even
+      // in adversarial case (all values are the same):
+      assert count > 0;
+
+      // Inner node: partition/recurse
+      if (DEBUG) System.out.println("  non-leaf");
+
+      assert nodeID < splitValues.length: "nodeID=" + nodeID + " splitValues.length=" + splitValues.length;
+
+      int splitValue = markLeftTree(splitDim, source, bitSet,
+                                    minX, maxX,
+                                    minY, maxY,
+                                    minZ, maxZ);
+      long leftCount = count/2;
+
+      // TODO: we could save split value in here so we don't have to re-open file later:
+
+      // Partition the other (not split) dims into sorted left and right sets, so we can recurse.
+      // This is somewhat hairy: we partition the next X, Y set according to how we had just
+      // partitioned the Z set, etc.
+
+      Writer[] leftWriters = new Writer[3];
+      Writer[] rightWriters = new Writer[3];
+
+      for(int dim=0;dim<3;dim++) {
+        if (dim == splitDim) {
+          continue;
+        }
+
+        Writer leftWriter = null;
+        Writer rightWriter = null;
+        Reader reader = null;
+
+        boolean success = false;
+
+        int nextLeftCount = 0;
+
+        PathSlice nextSource;
+        if (dim == 0) {
+          nextSource = lastXSorted;
+        } else if (dim == 1) {
+          nextSource = lastYSorted;
+        } else {
+          nextSource = lastZSorted;
+        }
+
+        try {
+          leftWriter = getWriter(leftCount);
+          rightWriter = getWriter(nextSource.count - leftCount);
+
+          assert nextSource.count == count;
+          reader = nextSource.writer.getReader(nextSource.start);
+
+          // TODO: we could compute the split value here for each sub-tree and save an O(N) pass on recursion, but makes code hairier and only
+          // changes the constant factor of building, not the big-oh:
+          for (int i=0;i<count;i++) {
+            boolean result = reader.next();
+            assert result;
+            int x = reader.x();
+            int y = reader.y();
+            int z = reader.z();
+            long ord = reader.ord();
+            int docID = reader.docID();
+            assert docID >= 0: "docID=" + docID + " reader=" + reader;
+            //System.out.println("  i=" + i + " x=" + x + " ord=" + ord + " docID=" + docID);
+            if (bitSet.get(ord)) {
+              if (splitDim == 0) {
+                assert x <= splitValue: "x=" + x + " splitValue=" + splitValue;
+              } else if (splitDim == 1) {
+                assert y <= splitValue: "y=" + y + " splitValue=" + splitValue;
+              } else {
+                assert z <= splitValue: "z=" + z + " splitValue=" + splitValue;
+              }
+              leftWriter.append(x, y, z, ord, docID);
+              nextLeftCount++;
+            } else {
+              if (splitDim == 0) {
+                assert x >= splitValue: "x=" + x + " splitValue=" + splitValue;
+              } else if (splitDim == 1) {
+                assert y >= splitValue: "y=" + y + " splitValue=" + splitValue;
+              } else {
+                assert z >= splitValue: "z=" + z + " splitValue=" + splitValue;
+              }
+              rightWriter.append(x, y, z, ord, docID);
+            }
+          }
+          success = true;
+        } finally {
+          if (success) {
+            IOUtils.close(reader, leftWriter, rightWriter);
+          } else {
+            IOUtils.closeWhileHandlingException(reader, leftWriter, rightWriter);
+          }
+        }
+
+        assert leftCount == nextLeftCount: "leftCount=" + leftCount + " nextLeftCount=" + nextLeftCount;
+        leftWriters[dim] = leftWriter;
+        rightWriters[dim] = rightWriter;
+      }
+      bitSet.clear(0, pointCount);
+
+      long rightCount = count - leftCount;
+
+      boolean success = false;
+      try {
+        if (splitDim == 0) {
+          build(2*nodeID, leafNodeOffset,
+                new PathSlice(source.writer, source.start, leftCount),
+                new PathSlice(leftWriters[1], 0, leftCount),
+                new PathSlice(leftWriters[2], 0, leftCount),
+                bitSet,
+                out,
+                minX, splitValue,
+                minY, maxY,
+                minZ, maxZ,
+                splitValues, leafBlockFPs);
+          leftWriters[1].destroy();
+          leftWriters[2].destroy();
+
+          build(2*nodeID+1, leafNodeOffset,
+                new PathSlice(source.writer, source.start+leftCount, rightCount),
+                new PathSlice(rightWriters[1], 0, rightCount),
+                new PathSlice(rightWriters[2], 0, rightCount),
+                bitSet,
+                out,
+                splitValue, maxX,
+                minY, maxY,
+                minZ, maxZ,
+                splitValues, leafBlockFPs);
+          rightWriters[1].destroy();
+          rightWriters[2].destroy();
+        } else if (splitDim == 1) {
+          build(2*nodeID, leafNodeOffset,
+                new PathSlice(leftWriters[0], 0, leftCount),
+                new PathSlice(source.writer, source.start, leftCount),
+                new PathSlice(leftWriters[2], 0, leftCount),
+                bitSet,
+                out,
+                minX, maxX,
+                minY, splitValue,
+                minZ, maxZ,
+                splitValues, leafBlockFPs);
+          leftWriters[0].destroy();
+          leftWriters[2].destroy();
+
+          build(2*nodeID+1, leafNodeOffset,
+                new PathSlice(rightWriters[0], 0, rightCount),
+                new PathSlice(source.writer, source.start+leftCount, rightCount),    
+                new PathSlice(rightWriters[2], 0, rightCount),
+                bitSet,
+                out,
+                minX, maxX,
+                splitValue, maxY,
+                minZ, maxZ,
+                splitValues, leafBlockFPs);
+          rightWriters[0].destroy();
+          rightWriters[2].destroy();
+        } else {
+          build(2*nodeID, leafNodeOffset,
+                new PathSlice(leftWriters[0], 0, leftCount),
+                new PathSlice(leftWriters[1], 0, leftCount),
+                new PathSlice(source.writer, source.start, leftCount),
+                bitSet,
+                out,
+                minX, maxX,
+                minY, maxY,
+                minZ, splitValue,
+                splitValues, leafBlockFPs);
+          leftWriters[0].destroy();
+          leftWriters[1].destroy();
+
+          build(2*nodeID+1, leafNodeOffset,
+                new PathSlice(rightWriters[0], 0, rightCount),
+                new PathSlice(rightWriters[1], 0, rightCount),
+                new PathSlice(source.writer, source.start+leftCount, rightCount),    
+                bitSet,
+                out,
+                minX, maxX,
+                minY, maxY,
+                splitValue, maxZ,
+                splitValues, leafBlockFPs);
+          rightWriters[0].destroy();
+          rightWriters[1].destroy();
+        }
+        success = true;
+      } finally {
+        if (success == false) {
+          for(Writer writer : leftWriters) {
+            if (writer != null) {
+              try {
+                writer.destroy();
+              } catch (Throwable t) {
+                // Suppress to keep throwing original exc
+              }
+            }
+          }
+          for(Writer writer : rightWriters) {
+            if (writer != null) {
+              try {
+                writer.destroy();
+              } catch (Throwable t) {
+                // Suppress to keep throwing original exc
+              }
+            }
+          }
+        }
+      }
+
+      splitValues[nodeID] = splitValue;
+    }
+  }
+
+  Writer getWriter(long count) throws IOException {
+    if (count < maxPointsSortInHeap) {
+      return new HeapWriter((int) count);
+    } else {
+      return new OfflineWriter(tempDir, count);
+    }
+  }
+}

Added: lucene/dev/branches/lucene6699/lucene/spatial3d/src/java/org/apache/lucene/bkdtree3d/GrowingHeapWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6699/lucene/spatial3d/src/java/org/apache/lucene/bkdtree3d/GrowingHeapWriter.java?rev=1695370&view=auto
==============================================================================
--- lucene/dev/branches/lucene6699/lucene/spatial3d/src/java/org/apache/lucene/bkdtree3d/GrowingHeapWriter.java (added)
+++ lucene/dev/branches/lucene6699/lucene/spatial3d/src/java/org/apache/lucene/bkdtree3d/GrowingHeapWriter.java Tue Aug 11 20:33:49 2015
@@ -0,0 +1,92 @@
+package org.apache.lucene.bkdtree3d;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.RamUsageEstimator;
+
+final class GrowingHeapWriter implements Writer {
+  int[] xs;
+  int[] ys;
+  int[] zs;
+  int[] docIDs;
+  long[] ords;
+  private int nextWrite;
+  final int maxSize;
+
+  public GrowingHeapWriter(int maxSize) {
+    xs = new int[16];
+    ys = new int[16];
+    zs = new int[16];
+    docIDs = new int[16];
+    ords = new long[16];
+    this.maxSize = maxSize;
+  }
+
+  private int[] growExact(int[] arr, int size) {
+    assert size > arr.length;
+    int[] newArr = new int[size];
+    System.arraycopy(arr, 0, newArr, 0, arr.length);
+    return newArr;
+  }
+
+  private long[] growExact(long[] arr, int size) {
+    assert size > arr.length;
+    long[] newArr = new long[size];
+    System.arraycopy(arr, 0, newArr, 0, arr.length);
+    return newArr;
+  }
+
+  @Override
+  public void append(int x, int y, int z, long ord, int docID) {
+    assert ord == nextWrite;
+    if (xs.length == nextWrite) {
+      int nextSize = Math.min(maxSize, ArrayUtil.oversize(nextWrite+1, RamUsageEstimator.NUM_BYTES_INT));
+      assert nextSize > nextWrite: "nextSize=" + nextSize + " vs nextWrite=" + nextWrite;
+      xs = growExact(xs, nextSize);
+      ys = growExact(ys, nextSize);
+      zs = growExact(zs, nextSize);
+      ords = growExact(ords, nextSize);
+      docIDs = growExact(docIDs, nextSize);
+    }
+    xs[nextWrite] = x;
+    ys[nextWrite] = y;
+    zs[nextWrite] = z;
+    ords[nextWrite] = ord;
+    docIDs[nextWrite] = docID;
+    nextWrite++;
+  }
+
+  @Override
+  public Reader getReader(long start) {
+    return new HeapReader(xs, ys, zs, ords, docIDs, (int) start, nextWrite);
+  }
+
+  @Override
+  public void close() {
+  }
+
+  @Override
+  public void destroy() {
+  }
+
+  @Override
+  public String toString() {
+    return "GrowingHeapWriter(count=" + nextWrite + " alloc=" + xs.length + ")";
+  }
+}

Added: lucene/dev/branches/lucene6699/lucene/spatial3d/src/java/org/apache/lucene/bkdtree3d/HeapReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6699/lucene/spatial3d/src/java/org/apache/lucene/bkdtree3d/HeapReader.java?rev=1695370&view=auto
==============================================================================
--- lucene/dev/branches/lucene6699/lucene/spatial3d/src/java/org/apache/lucene/bkdtree3d/HeapReader.java (added)
+++ lucene/dev/branches/lucene6699/lucene/spatial3d/src/java/org/apache/lucene/bkdtree3d/HeapReader.java Tue Aug 11 20:33:49 2015
@@ -0,0 +1,73 @@
+package org.apache.lucene.bkdtree3d;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+final class HeapReader implements Reader {
+  private int curRead;
+  final int[] xs;
+  final int[] ys;
+  final int[] zs;
+  final long[] ords;
+  final int[] docIDs;
+  final int end;
+
+  HeapReader(int[] xs, int[] ys, int[] zs, long[] ords, int[] docIDs, int start, int end) {
+    this.xs = xs;
+    this.ys = ys;
+    this.zs = zs;
+    this.ords = ords;
+    this.docIDs = docIDs;
+    curRead = start-1;
+    this.end = end;
+  }
+
+  @Override
+  public boolean next() {
+    curRead++;
+    return curRead < end;
+  }
+
+  @Override
+  public int x() {
+    return xs[curRead];
+  }
+
+  @Override
+  public int y() {
+    return ys[curRead];
+  }
+
+  @Override
+  public int z() {
+    return zs[curRead];
+  }
+
+  @Override
+  public int docID() {
+    return docIDs[curRead];
+  }
+
+  @Override
+  public long ord() {
+    return ords[curRead];
+  }
+
+  @Override
+  public void close() {
+  }
+}

Added: lucene/dev/branches/lucene6699/lucene/spatial3d/src/java/org/apache/lucene/bkdtree3d/HeapWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6699/lucene/spatial3d/src/java/org/apache/lucene/bkdtree3d/HeapWriter.java?rev=1695370&view=auto
==============================================================================
--- lucene/dev/branches/lucene6699/lucene/spatial3d/src/java/org/apache/lucene/bkdtree3d/HeapWriter.java (added)
+++ lucene/dev/branches/lucene6699/lucene/spatial3d/src/java/org/apache/lucene/bkdtree3d/HeapWriter.java Tue Aug 11 20:33:49 2015
@@ -0,0 +1,66 @@
+package org.apache.lucene.bkdtree3d;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+final class HeapWriter implements Writer {
+  final int[] xs;
+  final int[] ys;
+  final int[] zs;
+  final int[] docIDs;
+  final long[] ords;
+  private int nextWrite;
+
+  public HeapWriter(int count) {
+    xs = new int[count];
+    ys = new int[count];
+    zs = new int[count];
+    docIDs = new int[count];
+    ords = new long[count];
+  }
+
+  @Override
+  public void append(int x, int y, int z, long ord, int docID) {
+    xs[nextWrite] = x;
+    ys[nextWrite] = y;
+    zs[nextWrite] = z;
+    ords[nextWrite] = ord;
+    docIDs[nextWrite] = docID;
+    nextWrite++;
+  }
+
+  @Override
+  public Reader getReader(long start) {
+    return new HeapReader(xs, ys, zs, ords, docIDs, (int) start, xs.length);
+  }
+
+  @Override
+  public void close() {
+    if (nextWrite != xs.length) {
+      throw new IllegalStateException("only wrote " + nextWrite + " values, but expected " + xs.length);
+    }
+  }
+
+  @Override
+  public void destroy() {
+  }
+
+  @Override
+  public String toString() {
+    return "HeapWriter(count=" + xs.length + ")";
+  }
+}

Added: lucene/dev/branches/lucene6699/lucene/spatial3d/src/java/org/apache/lucene/bkdtree3d/OfflineReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6699/lucene/spatial3d/src/java/org/apache/lucene/bkdtree3d/OfflineReader.java?rev=1695370&view=auto
==============================================================================
--- lucene/dev/branches/lucene6699/lucene/spatial3d/src/java/org/apache/lucene/bkdtree3d/OfflineReader.java (added)
+++ lucene/dev/branches/lucene6699/lucene/spatial3d/src/java/org/apache/lucene/bkdtree3d/OfflineReader.java Tue Aug 11 20:33:49 2015
@@ -0,0 +1,95 @@
+package org.apache.lucene.bkdtree3d;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.BufferedInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.file.Files;
+import java.nio.file.Path;
+
+import org.apache.lucene.store.InputStreamDataInput;
+
+final class OfflineReader implements Reader {
+  final InputStreamDataInput in;
+  long countLeft;
+  private int x;
+  private int y;
+  private int z;
+  private long ord;
+  private int docID;
+
+  OfflineReader(Path tempFile, long start, long count) throws IOException {
+    InputStream fis = Files.newInputStream(tempFile);
+    long seekFP = start * BKD3DTreeWriter.BYTES_PER_DOC;
+    long skipped = 0;
+    while (skipped < seekFP) {
+      long inc = fis.skip(seekFP - skipped);
+      skipped += inc;
+      if (inc == 0) {
+        throw new RuntimeException("skip returned 0");
+      }
+    }
+    in = new InputStreamDataInput(new BufferedInputStream(fis));
+    this.countLeft = count;
+  }
+
+  @Override
+  public boolean next() throws IOException {
+    if (countLeft == 0) {
+      return false;
+    }
+    countLeft--;
+    x = in.readInt();
+    y = in.readInt();
+    z = in.readInt();
+    ord = in.readLong();
+    docID = in.readInt();
+    return true;
+  }
+
+  @Override
+  public int x() {
+    return x;
+  }
+
+  @Override
+  public int y() {
+    return y;
+  }
+
+  @Override
+  public int z() {
+    return z;
+  }
+
+  @Override
+  public long ord() {
+    return ord;
+  }
+
+  @Override
+  public int docID() {
+    return docID;
+  }
+
+  @Override
+  public void close() throws IOException {
+    in.close();
+  }
+}

Added: lucene/dev/branches/lucene6699/lucene/spatial3d/src/java/org/apache/lucene/bkdtree3d/OfflineWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6699/lucene/spatial3d/src/java/org/apache/lucene/bkdtree3d/OfflineWriter.java?rev=1695370&view=auto
==============================================================================
--- lucene/dev/branches/lucene6699/lucene/spatial3d/src/java/org/apache/lucene/bkdtree3d/OfflineWriter.java (added)
+++ lucene/dev/branches/lucene6699/lucene/spatial3d/src/java/org/apache/lucene/bkdtree3d/OfflineWriter.java Tue Aug 11 20:33:49 2015
@@ -0,0 +1,76 @@
+package org.apache.lucene.bkdtree3d;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.BufferedOutputStream;
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+
+import org.apache.lucene.store.ByteArrayDataOutput;
+import org.apache.lucene.store.OutputStreamDataOutput;
+import org.apache.lucene.util.IOUtils;
+
+final class OfflineWriter implements Writer {
+
+  final Path tempFile;
+  final byte[] scratchBytes = new byte[BKD3DTreeWriter.BYTES_PER_DOC];
+  final ByteArrayDataOutput scratchBytesOutput = new ByteArrayDataOutput(scratchBytes);      
+  final OutputStreamDataOutput out;
+  final long count;
+  private long countWritten;
+
+  public OfflineWriter(Path tempDir, long count) throws IOException {
+    tempFile = Files.createTempFile(tempDir, "size" + count + ".", "");
+    out = new OutputStreamDataOutput(new BufferedOutputStream(Files.newOutputStream(tempFile)));
+    this.count = count;
+  }
+    
+  @Override
+  public void append(int x, int y, int z, long ord, int docID) throws IOException {
+    out.writeInt(x);
+    out.writeInt(y);
+    out.writeInt(z);
+    out.writeLong(ord);
+    out.writeInt(docID);
+    countWritten++;
+  }
+
+  @Override
+  public Reader getReader(long start) throws IOException {
+    return new OfflineReader(tempFile, start, count-start);
+  }
+
+  @Override
+  public void close() throws IOException {
+    out.close();
+    if (count != countWritten) {
+      throw new IllegalStateException("wrote " + countWritten + " values, but expected " + count);
+    }
+  }
+
+  @Override
+  public void destroy() throws IOException {
+    IOUtils.rm(tempFile);
+  }
+
+  @Override
+  public String toString() {
+    return "OfflineWriter(count=" + count + " tempFile=" + tempFile + ")";
+  }
+}

Added: lucene/dev/branches/lucene6699/lucene/spatial3d/src/java/org/apache/lucene/bkdtree3d/Reader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6699/lucene/spatial3d/src/java/org/apache/lucene/bkdtree3d/Reader.java?rev=1695370&view=auto
==============================================================================
--- lucene/dev/branches/lucene6699/lucene/spatial3d/src/java/org/apache/lucene/bkdtree3d/Reader.java (added)
+++ lucene/dev/branches/lucene6699/lucene/spatial3d/src/java/org/apache/lucene/bkdtree3d/Reader.java Tue Aug 11 20:33:49 2015
@@ -0,0 +1,31 @@
+package org.apache.lucene.bkdtree3d;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.Closeable;
+import java.io.IOException;
+
+/** Abstracts away whether OfflineSorter or simple arrays in heap are used. */
+interface Reader extends Closeable {
+  boolean next() throws IOException;
+  int x();
+  int y();
+  int z();
+  long ord();
+  int docID();
+}

Added: lucene/dev/branches/lucene6699/lucene/spatial3d/src/java/org/apache/lucene/bkdtree3d/Writer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6699/lucene/spatial3d/src/java/org/apache/lucene/bkdtree3d/Writer.java?rev=1695370&view=auto
==============================================================================
--- lucene/dev/branches/lucene6699/lucene/spatial3d/src/java/org/apache/lucene/bkdtree3d/Writer.java (added)
+++ lucene/dev/branches/lucene6699/lucene/spatial3d/src/java/org/apache/lucene/bkdtree3d/Writer.java Tue Aug 11 20:33:49 2015
@@ -0,0 +1,29 @@
+package org.apache.lucene.bkdtree3d;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.Closeable;
+import java.io.IOException;
+
+/** Abstracts away whether OfflineSorter or simple arrays in heap are used. */
+interface Writer extends Closeable {
+  void append(int x, int y, int z, long ord, int docID) throws IOException;
+  Reader getReader(long start) throws IOException;
+  void destroy() throws IOException;
+}
+

Added: lucene/dev/branches/lucene6699/lucene/spatial3d/src/test/org/apache/lucene/bkdtree3d/TestBKD3DTree.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6699/lucene/spatial3d/src/test/org/apache/lucene/bkdtree3d/TestBKD3DTree.java?rev=1695370&view=auto
==============================================================================
--- lucene/dev/branches/lucene6699/lucene/spatial3d/src/test/org/apache/lucene/bkdtree3d/TestBKD3DTree.java (added)
+++ lucene/dev/branches/lucene6699/lucene/spatial3d/src/test/org/apache/lucene/bkdtree3d/TestBKD3DTree.java Tue Aug 11 20:33:49 2015
@@ -0,0 +1,281 @@
+package org.apache.lucene.bkdtree3d;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.search.DocIdSet;
+import org.apache.lucene.search.DocIdSetIterator;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IOContext;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.util.FixedBitSet;
+import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util.TestUtil;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import static org.apache.lucene.bkdtree3d.BKD3DTreeDocValuesFormat.encodeValue;
+
+public class TestBKD3DTree extends LuceneTestCase {
+
+  public void testBasic() throws Exception {
+    Directory dir = newDirectory();
+    IndexOutput out = dir.createOutput("bkd", IOContext.DEFAULT);
+
+    BKD3DTreeWriter w = new BKD3DTreeWriter();
+
+    w.add(0, 0, 0, 0);
+    w.add(1, 1, 1, 1);
+    w.add(-1, -1, -1, 2);
+
+    long indexFP = w.finish(out);
+    out.close();
+
+    IndexInput in = dir.openInput("bkd", IOContext.DEFAULT);
+    in.seek(indexFP);
+    BKD3DTreeReader r = new BKD3DTreeReader(in, 3);
+
+    DocIdSet hits = r.intersect(Integer.MIN_VALUE, Integer.MAX_VALUE,
+                                Integer.MIN_VALUE, Integer.MAX_VALUE,
+                                Integer.MIN_VALUE, Integer.MAX_VALUE,
+
+                                new BKD3DTreeReader.ValueFilter() {
+
+                                  @Override
+                                  public boolean accept(int docID) {
+                                    return true;
+                                  }
+
+                                  @Override
+                                  public BKD3DTreeReader.Relation compare(int xMin, int xMax,
+                                                          int yMin, int yMax,
+                                                          int zMin, int zMax) {
+                                    return BKD3DTreeReader.Relation.INSIDE;
+                                  }
+
+                                });
+    DocIdSetIterator disi = hits.iterator();
+    assertEquals(0, disi.nextDoc());
+    assertEquals(1, disi.nextDoc());
+    assertEquals(2, disi.nextDoc());
+    assertEquals(DocIdSetIterator.NO_MORE_DOCS, disi.nextDoc());
+    in.close();
+    dir.close();
+  }
+
+  static class Point {
+    final double x;
+    final double y;
+    final double z;
+
+    public Point(double x, double y, double z) {
+      this.x = x;
+      this.y = y;
+      this.z = z;
+    }
+
+    @Override
+    public String toString() {
+      return "x=" + x + " y=" + y + " z=" + z;
+    }
+  }
+
+  private static class Range {
+    final double min;
+    final double max;
+
+    public Range(double min, double max) {
+      this.min = min;
+      this.max = max;
+    }
+
+    @Override
+    public String toString() {
+      return min + " TO " + max;
+    }
+  }
+
+  private double randomCoord() {
+    // nocommit
+    //return (random().nextDouble()*2.0022) - 1.0011;
+    return (random().nextDouble()*.002) - .001;
+  }
+
+  private Range randomRange() {
+    double x = randomCoord();
+    double y = randomCoord();
+    if (x < y) {
+      return new Range(x, y);
+    } else {
+      return new Range(y, x);
+    }
+  }
+
+  public void testRandom() throws Exception {
+    List<Point> points = new ArrayList<>();
+    // nocommit
+    //int numPoints = atLeast(10000);
+    int numPoints = atLeast(200);
+    Directory dir = newDirectory();
+    IndexOutput out = dir.createOutput("bkd", IOContext.DEFAULT);
+    int maxPointsInLeaf = TestUtil.nextInt(random(), 16, 2048); 
+
+    // nocommit
+    maxPointsInLeaf = 100;
+
+    int maxPointsSortInHeap = TestUtil.nextInt(random(), maxPointsInLeaf, 1024*1024);
+
+    BKD3DTreeWriter w = new BKD3DTreeWriter(maxPointsInLeaf, maxPointsSortInHeap);
+    for(int docID=0;docID<numPoints;docID++) {
+      Point point;
+      if (docID > 0 && random().nextInt(30) == 17) {
+        // Dup point
+        point = points.get(random().nextInt(points.size()));
+      } else {
+        point = new Point(randomCoord(),
+                          randomCoord(),
+                          randomCoord());
+      }
+
+      if (VERBOSE) {
+        System.out.println("  docID=" + docID + " point=" + point);
+        System.out.println("    x=" + encodeValue(point.x) +
+                           " y=" + encodeValue(point.y) +
+                           " z=" + encodeValue(point.z));
+      }
+
+      points.add(point);
+      w.add(encodeValue(point.x),
+            encodeValue(point.y),
+            encodeValue(point.z),
+            docID);
+    }
+
+    long indexFP = w.finish(out);
+    out.close();
+
+    IndexInput in = dir.openInput("bkd", IOContext.DEFAULT);
+    in.seek(indexFP);
+    BKD3DTreeReader r = new BKD3DTreeReader(in, numPoints);
+
+    int numIters = atLeast(100);
+    for(int iter=0;iter<numIters;iter++) {
+      // bbox
+      Range x = randomRange();
+      Range y = randomRange();
+      Range z = randomRange();
+
+      int xMinEnc = encodeValue(x.min);
+      int xMaxEnc = encodeValue(x.max);
+      int yMinEnc = encodeValue(y.min);
+      int yMaxEnc = encodeValue(y.max);
+      int zMinEnc = encodeValue(z.min);
+      int zMaxEnc = encodeValue(z.max);
+
+      if (VERBOSE) {
+        System.out.println("\nTEST: iter=" + iter + " bbox: x=" + x + " (" + xMinEnc + " TO " + xMaxEnc+ ")" + " y=" + y + " (" + yMinEnc + " TO " + yMaxEnc + ")"  + " z=" + z + " (" + zMinEnc + " TO " + zMaxEnc + ")" );
+      }
+
+      DocIdSet hits = r.intersect(xMinEnc, xMaxEnc,
+                                  yMinEnc, yMaxEnc,
+                                  zMinEnc, zMaxEnc,
+
+                                  new BKD3DTreeReader.ValueFilter() {
+
+                                    @Override
+                                    public boolean accept(int docID) {
+                                      Point point = points.get(docID);
+                                      //System.out.println("  accept docID=" + docID + " point=" + point + " (x=" + encodeValue(point.x) + " y=" + encodeValue(point.y) + " z=" + encodeValue(point.z) + ")");
+
+                                      int xEnc = encodeValue(point.x);
+                                      int yEnc = encodeValue(point.y);
+                                      int zEnc = encodeValue(point.z);
+
+                                      boolean accept = xEnc >= xMinEnc && xEnc <= xMaxEnc &&
+                                        yEnc >= yMinEnc && yEnc <= yMaxEnc &&
+                                        zEnc >= zMinEnc && zEnc <= zMaxEnc;
+                                      //System.out.println("    " + accept);
+
+                                      return accept;
+                                    }
+
+                                    @Override
+                                    public BKD3DTreeReader.Relation compare(int cellXMin, int cellXMax,
+                                                                            int cellYMin, int cellYMax,
+                                                                            int cellZMin, int cellZMax) {
+                                      if (cellXMin > xMaxEnc || cellXMax < xMinEnc) {
+                                        return BKD3DTreeReader.Relation.OUTSIDE;
+                                      }
+                                      if (cellYMin > yMaxEnc || cellYMax < yMinEnc) {
+                                        return BKD3DTreeReader.Relation.OUTSIDE;
+                                      }
+                                      if (cellZMin > zMaxEnc || cellZMax < zMinEnc) {
+                                        return BKD3DTreeReader.Relation.OUTSIDE;
+                                      }
+
+                                      if (cellXMin >= xMinEnc && cellXMax <= xMaxEnc &&
+                                          cellYMin >= yMinEnc && cellYMax <= yMaxEnc &&
+                                          cellZMin >= zMinEnc && cellZMax <= zMaxEnc) {
+                                        return BKD3DTreeReader.Relation.INSIDE;
+                                      }
+
+                                      return BKD3DTreeReader.Relation.CROSSES;
+                                    }
+                                  });
+
+      DocIdSetIterator disi = hits.iterator();
+      FixedBitSet matches = new FixedBitSet(numPoints);
+      while (true) {
+        int nextHit = disi.nextDoc();
+        if (nextHit == DocIdSetIterator.NO_MORE_DOCS) {
+          break;
+        }
+        matches.set(nextHit);
+      }
+      if (VERBOSE) {
+        System.out.println("  total hits: " + matches.cardinality());
+      }
+
+      for(int docID=0;docID<numPoints;docID++) {
+        Point point = points.get(docID);
+        boolean actual = matches.get(docID);
+
+        // nocommit no good the test is "using" some of the code it's supposed to test ...
+        // nocommit wait: this shouldn't be necessary?  bkd tree IS precise?
+        int xEnc = encodeValue(point.x);
+        int yEnc = encodeValue(point.y);
+        int zEnc = encodeValue(point.z);
+
+        boolean expected = xEnc >= xMinEnc && xEnc <= xMaxEnc &&
+          yEnc >= yMinEnc && yEnc <= yMaxEnc &&
+          zEnc >= zMinEnc && zEnc <= zMaxEnc;
+
+        if (expected != actual) {
+          System.out.println("docID=" + docID + " is wrong: expected=" + expected + " actual=" + actual);
+          System.out.println("  x=" + point.x + " (" + xEnc + ")" + " y=" + point.y + " (" + yEnc + ")" + " z=" + point.z + " (" + zEnc + ")");
+          fail("wrong match");
+        }
+      }
+    }
+
+    in.close();
+    dir.close();
+  }
+}
+