You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2015/08/11 22:33:50 UTC
svn commit: r1695370 [2/2] - in /lucene/dev/branches/lucene6699/lucene:
sandbox/src/java/org/apache/lucene/bkdtree/
spatial3d/src/java/org/apache/lucene/bkdtree3d/
spatial3d/src/test/org/apache/lucene/bkdtree3d/
Added: lucene/dev/branches/lucene6699/lucene/spatial3d/src/java/org/apache/lucene/bkdtree3d/BKD3DTreeWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6699/lucene/spatial3d/src/java/org/apache/lucene/bkdtree3d/BKD3DTreeWriter.java?rev=1695370&view=auto
==============================================================================
--- lucene/dev/branches/lucene6699/lucene/spatial3d/src/java/org/apache/lucene/bkdtree3d/BKD3DTreeWriter.java (added)
+++ lucene/dev/branches/lucene6699/lucene/spatial3d/src/java/org/apache/lucene/bkdtree3d/BKD3DTreeWriter.java Tue Aug 11 20:33:49 2015
@@ -0,0 +1,954 @@
+package org.apache.lucene.bkdtree3d;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.nio.file.DirectoryStream;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.Arrays;
+import java.util.Comparator;
+
+import org.apache.lucene.store.ByteArrayDataInput;
+import org.apache.lucene.store.ByteArrayDataOutput;
+import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.BytesRefBuilder;
+import org.apache.lucene.util.IOUtils;
+import org.apache.lucene.util.InPlaceMergeSorter;
+import org.apache.lucene.util.LongBitSet;
+import org.apache.lucene.util.OfflineSorter.ByteSequencesWriter;
+import org.apache.lucene.util.OfflineSorter;
+import org.apache.lucene.util.RamUsageEstimator;
+
+// TODO
+// - we could also index "auto-prefix terms" here, and use better compression, and maybe only use for the "fully contained" case so we'd
+// only index docIDs
+// - the index could be efficiently encoded as an FST, so we don't have wasteful
+// (monotonic) long[] leafBlockFPs; or we could use MonotonicLongValues ... but then
+// the index is already plenty small: 60M OSM points --> 1.1 MB with 128 points
+// per leaf, and you can reduce that by putting more points per leaf
+// - we can quantize the split values to 2 bytes (short): http://people.csail.mit.edu/tmertens/papers/qkdtree.pdf
+// - we could use threads while building; the higher nodes are very parallelizable
+// - generalize to N dimenions? i think there are reasonable use cases here, e.g.
+// 2 dimensional points to store houses, plus e.g. 3rd dimension for "household income"
+
+/** Recursively builds a BKD tree to assign all incoming points to smaller
+ * and smaller rectangles until the number of points in a given
+ * rectangle is <= the <code>maxPointsInLeafNode</code>. The tree is
+ * fully balanced, which means the leaf nodes will have between 50% and 100% of
+ * the requested <code>maxPointsInLeafNode</code>, except for the adversarial case
+ * of indexing exactly the same point many times.
+ *
+ * <p>
+ * See <a href="https://www.cs.duke.edu/~pankaj/publications/papers/bkd-sstd.pdf">this paper</a> for details.
+ *
+ * <p>This consumes heap during writing: it allocates a <code>LongBitSet(numPoints)</code>,
+ * and for any nodes with fewer than <code>maxPointsSortInHeap</code>, it holds
+ * the points in memory as simple java arrays.
+ *
+ * <p>
+ * <b>NOTE</b>: This can write at most Integer.MAX_VALUE * <code>maxPointsInLeafNode</code> total points.
+ *
+ * @lucene.experimental */
+
+class BKD3DTreeWriter {
+
+ // x (int), y (int), z (int) + ord (long) + docID (int)
+ static final int BYTES_PER_DOC = RamUsageEstimator.NUM_BYTES_LONG + 4 * RamUsageEstimator.NUM_BYTES_INT;
+
+ // nocommit removeme
+ static final boolean DEBUG = false;
+
+ public static final int DEFAULT_MAX_POINTS_IN_LEAF_NODE = 1024;
+
+ /** This works out to max of ~10 MB peak heap tied up during writing: */
+ public static final int DEFAULT_MAX_POINTS_SORT_IN_HEAP = 128*1024;;
+
+ private final byte[] scratchBytes = new byte[BYTES_PER_DOC];
+ private final ByteArrayDataOutput scratchBytesOutput = new ByteArrayDataOutput(scratchBytes);
+
+ private OfflineSorter.ByteSequencesWriter writer;
+ private GrowingHeapWriter heapWriter;
+
+ private Path tempInput;
+ private Path tempDir;
+ private final int maxPointsInLeafNode;
+ private final int maxPointsSortInHeap;
+
+ private long pointCount;
+ private int globalMinX = Integer.MAX_VALUE;
+ private int globalMaxX = Integer.MIN_VALUE;
+ private int globalMinY = Integer.MAX_VALUE;
+ private int globalMaxY = Integer.MIN_VALUE;
+ private int globalMinZ = Integer.MAX_VALUE;
+ private int globalMaxZ = Integer.MIN_VALUE;
+
+ public BKD3DTreeWriter() throws IOException {
+ this(DEFAULT_MAX_POINTS_IN_LEAF_NODE, DEFAULT_MAX_POINTS_SORT_IN_HEAP);
+ }
+
+ // TODO: instead of maxPointsSortInHeap, change to maxMBHeap ... the mapping is non-obvious:
+ public BKD3DTreeWriter(int maxPointsInLeafNode, int maxPointsSortInHeap) throws IOException {
+ verifyParams(maxPointsInLeafNode, maxPointsSortInHeap);
+ this.maxPointsInLeafNode = maxPointsInLeafNode;
+ this.maxPointsSortInHeap = maxPointsSortInHeap;
+
+ // We write first maxPointsSortInHeap in heap, then cutover to offline for additional points:
+ heapWriter = new GrowingHeapWriter(maxPointsSortInHeap);
+ }
+
+ public static void verifyParams(int maxPointsInLeafNode, int maxPointsSortInHeap) {
+ if (maxPointsInLeafNode <= 0) {
+ throw new IllegalArgumentException("maxPointsInLeafNode must be > 0; got " + maxPointsInLeafNode);
+ }
+ if (maxPointsInLeafNode > ArrayUtil.MAX_ARRAY_LENGTH) {
+ throw new IllegalArgumentException("maxPointsInLeafNode must be <= ArrayUtil.MAX_ARRAY_LENGTH (= " + ArrayUtil.MAX_ARRAY_LENGTH + "); got " + maxPointsInLeafNode);
+ }
+ if (maxPointsSortInHeap < maxPointsInLeafNode) {
+ throw new IllegalArgumentException("maxPointsSortInHeap must be >= maxPointsInLeafNode; got " + maxPointsSortInHeap + " vs maxPointsInLeafNode="+ maxPointsInLeafNode);
+ }
+ if (maxPointsSortInHeap > ArrayUtil.MAX_ARRAY_LENGTH) {
+ throw new IllegalArgumentException("maxPointsSortInHeap must be <= ArrayUtil.MAX_ARRAY_LENGTH (= " + ArrayUtil.MAX_ARRAY_LENGTH + "); got " + maxPointsSortInHeap);
+ }
+ }
+
+ /** If the current segment has too many points then we switchover to temp files / offline sort. */
+ private void switchToOffline() throws IOException {
+
+ // OfflineSorter isn't thread safe, but our own private tempDir works around this:
+ tempDir = Files.createTempDirectory(OfflineSorter.defaultTempDir(), BKD3DTreeWriter.class.getSimpleName());
+
+ // For each .add we just append to this input file, then in .finish we sort this input and resursively build the tree:
+ tempInput = tempDir.resolve("in");
+ writer = new OfflineSorter.ByteSequencesWriter(tempInput);
+ for(int i=0;i<pointCount;i++) {
+ scratchBytesOutput.reset(scratchBytes);
+ scratchBytesOutput.writeInt(heapWriter.xs[i]);
+ scratchBytesOutput.writeInt(heapWriter.ys[i]);
+ scratchBytesOutput.writeInt(heapWriter.zs[i]);
+ scratchBytesOutput.writeVInt(heapWriter.docIDs[i]);
+ scratchBytesOutput.writeVLong(i);
+ // TODO: can/should OfflineSorter optimize the fixed-width case?
+ writer.write(scratchBytes, 0, scratchBytes.length);
+ }
+
+ heapWriter = null;
+ }
+
+ public void add(int x, int y, int z, int docID) throws IOException {
+
+ if (pointCount >= maxPointsSortInHeap) {
+ if (writer == null) {
+ switchToOffline();
+ }
+ scratchBytesOutput.reset(scratchBytes);
+ scratchBytesOutput.writeInt(x);
+ scratchBytesOutput.writeInt(y);
+ scratchBytesOutput.writeInt(z);
+ scratchBytesOutput.writeVInt(docID);
+ scratchBytesOutput.writeVLong(pointCount);
+ writer.write(scratchBytes, 0, scratchBytes.length);
+ } else {
+ // Not too many points added yet, continue using heap:
+ heapWriter.append(x, y, z, pointCount, docID);
+ }
+
+ pointCount++;
+ globalMinX = Math.min(globalMinX, x);
+ globalMaxX = Math.max(globalMaxX, x);
+ globalMinY = Math.min(globalMinY, y);
+ globalMaxY = Math.max(globalMaxY, y);
+ globalMinZ = Math.min(globalMinZ, z);
+ globalMaxZ = Math.max(globalMaxZ, z);
+ }
+
+ /** Changes incoming {@link ByteSequencesWriter} file to to fixed-width-per-entry file, because we need to be able to slice
+ * as we recurse in {@link #build}. */
+ private Writer convertToFixedWidth(Path in) throws IOException {
+ BytesRefBuilder scratch = new BytesRefBuilder();
+ scratch.grow(BYTES_PER_DOC);
+ BytesRef bytes = scratch.get();
+ ByteArrayDataInput dataReader = new ByteArrayDataInput();
+
+ OfflineSorter.ByteSequencesReader reader = null;
+ Writer sortedWriter = null;
+ boolean success = false;
+ try {
+ reader = new OfflineSorter.ByteSequencesReader(in);
+ sortedWriter = getWriter(pointCount);
+ for (long i=0;i<pointCount;i++) {
+ boolean result = reader.read(scratch);
+ assert result;
+ dataReader.reset(bytes.bytes, bytes.offset, bytes.length);
+ int x = dataReader.readInt();
+ int y = dataReader.readInt();
+ int z = dataReader.readInt();
+ int docID = dataReader.readVInt();
+ long ord = dataReader.readVLong();
+ assert docID >= 0: "docID=" + docID;
+ sortedWriter.append(x, y, z, ord, docID);
+ }
+ success = true;
+ } finally {
+ if (success) {
+ IOUtils.close(sortedWriter, reader);
+ } else {
+ IOUtils.closeWhileHandlingException(reader);
+ try {
+ sortedWriter.destroy();
+ } catch (Throwable t) {
+ // Suppress to keep throwing original exc
+ }
+ }
+ }
+
+ return sortedWriter;
+ }
+
+ /** dim: 0=x, 1=y, 2=z */
+ private Writer sort(int dim) throws IOException {
+ if (heapWriter != null) {
+
+ assert pointCount < Integer.MAX_VALUE;
+
+ // All buffered points are still in heap
+ new InPlaceMergeSorter() {
+ @Override
+ protected void swap(int i, int j) {
+ int docID = heapWriter.docIDs[i];
+ heapWriter.docIDs[i] = heapWriter.docIDs[j];
+ heapWriter.docIDs[j] = docID;
+
+ long ord = heapWriter.ords[i];
+ heapWriter.ords[i] = heapWriter.ords[j];
+ heapWriter.ords[j] = ord;
+
+ int x = heapWriter.xs[i];
+ heapWriter.xs[i] = heapWriter.xs[j];
+ heapWriter.xs[j] = x;
+
+ int y = heapWriter.ys[i];
+ heapWriter.ys[i] = heapWriter.ys[j];
+ heapWriter.ys[j] = y;
+
+ int z = heapWriter.zs[i];
+ heapWriter.zs[i] = heapWriter.zs[j];
+ heapWriter.zs[j] = z;
+ }
+
+ @Override
+ protected int compare(int i, int j) {
+ int cmp;
+ if (dim == 0) {
+ cmp = Integer.compare(heapWriter.xs[i], heapWriter.xs[j]);
+ } else if (dim == 1) {
+ cmp = Integer.compare(heapWriter.ys[i], heapWriter.ys[j]);
+ } else {
+ cmp = Integer.compare(heapWriter.zs[i], heapWriter.zs[j]);
+ }
+ if (cmp != 0) {
+ return cmp;
+ }
+
+ // Tie-break
+ cmp = Integer.compare(heapWriter.docIDs[i], heapWriter.docIDs[j]);
+ if (cmp != 0) {
+ return cmp;
+ }
+
+ return Long.compare(heapWriter.ords[i], heapWriter.ords[j]);
+ }
+ }.sort(0, (int) pointCount);
+
+ HeapWriter sorted = new HeapWriter((int) pointCount);
+ //System.out.println("sorted dim=" + dim);
+ for(int i=0;i<pointCount;i++) {
+ /*
+ System.out.println(" docID=" + heapWriter.docIDs[i] +
+ " x=" + heapWriter.xs[i] +
+ " y=" + heapWriter.ys[i] +
+ " z=" + heapWriter.zs[i]);
+ */
+ sorted.append(heapWriter.xs[i],
+ heapWriter.ys[i],
+ heapWriter.zs[i],
+ heapWriter.ords[i],
+ heapWriter.docIDs[i]);
+ }
+
+ return sorted;
+ } else {
+
+ // Offline sort:
+ assert tempDir != null;
+
+ final ByteArrayDataInput reader = new ByteArrayDataInput();
+ Comparator<BytesRef> cmp = new Comparator<BytesRef>() {
+ private final ByteArrayDataInput readerB = new ByteArrayDataInput();
+
+ @Override
+ public int compare(BytesRef a, BytesRef b) {
+ reader.reset(a.bytes, a.offset, a.length);
+ final int xa = reader.readInt();
+ final int ya = reader.readInt();
+ final int za = reader.readInt();
+ final int docIDA = reader.readVInt();
+ final long ordA = reader.readVLong();
+
+ reader.reset(b.bytes, b.offset, b.length);
+ final int xb = reader.readInt();
+ final int yb = reader.readInt();
+ final int zb = reader.readInt();
+ final int docIDB = reader.readVInt();
+ final long ordB = reader.readVLong();
+
+ int cmp;
+ if (dim == 0) {
+ cmp = Integer.compare(xa, xb);
+ } else if (dim == 1) {
+ cmp = Integer.compare(ya, yb);
+ } else {
+ cmp = Integer.compare(za, zb);
+ }
+ if (cmp != 0) {
+ return cmp;
+ }
+
+ // Tie-break
+ cmp = Integer.compare(docIDA, docIDB);
+ if (cmp != 0) {
+ return cmp;
+ }
+
+ return Long.compare(ordA, ordB);
+ }
+ };
+
+ Path sorted = tempDir.resolve("sorted");
+ boolean success = false;
+ try {
+ OfflineSorter sorter = new OfflineSorter(cmp, OfflineSorter.BufferSize.automatic(), tempDir, OfflineSorter.MAX_TEMPFILES);
+ sorter.sort(tempInput, sorted);
+ Writer writer = convertToFixedWidth(sorted);
+ success = true;
+ return writer;
+ } finally {
+ if (success) {
+ IOUtils.rm(sorted);
+ } else {
+ IOUtils.deleteFilesIgnoringExceptions(sorted);
+ }
+ }
+ }
+ }
+
+ /** Writes the BKD tree to the provided {@link IndexOutput} and returns the file offset where index was written. */
+ public long finish(IndexOutput out) throws IOException {
+ //System.out.println("\nBKDTreeWriter.finish pointCount=" + pointCount + " out=" + out + " heapWriter=" + heapWriter);
+
+ if (writer != null) {
+ writer.close();
+ }
+
+ LongBitSet bitSet = new LongBitSet(pointCount);
+
+ long countPerLeaf = pointCount;
+ long innerNodeCount = 1;
+
+ while (countPerLeaf > maxPointsInLeafNode) {
+ countPerLeaf /= 2;
+ innerNodeCount *= 2;
+ }
+
+ //System.out.println("innerNodeCount=" + innerNodeCount);
+
+ if (1+2*innerNodeCount >= Integer.MAX_VALUE) {
+ throw new IllegalStateException("too many nodes; increase maxPointsInLeafNode (currently " + maxPointsInLeafNode + ") and reindex");
+ }
+
+ innerNodeCount--;
+
+ int numLeaves = (int) (innerNodeCount+1);
+ //System.out.println(" numLeaves=" + numLeaves);
+
+ // Indexed by nodeID, but first (root) nodeID is 1
+ int[] splitValues = new int[numLeaves];
+
+ // +1 because leaf count is power of 2 (e.g. 8), and innerNodeCount is power of 2 minus 1 (e.g. 7)
+ long[] leafBlockFPs = new long[numLeaves];
+
+ // Make sure the math above "worked":
+ assert pointCount / splitValues.length <= maxPointsInLeafNode: "pointCount=" + pointCount + " splitValues.length=" + splitValues.length + " maxPointsInLeafNode=" + maxPointsInLeafNode;
+ //System.out.println(" avg pointsPerLeaf=" + (pointCount/splitValues.length));
+
+ // Sort all docs once by x, once by y, once by z:
+ Writer xSortedWriter = null;
+ Writer ySortedWriter = null;
+ Writer zSortedWriter = null;
+
+ boolean success = false;
+ try {
+ xSortedWriter = sort(0);
+ ySortedWriter = sort(1);
+ zSortedWriter = sort(2);
+ heapWriter = null;
+
+ build(1, numLeaves,
+ new PathSlice(xSortedWriter, 0, pointCount),
+ new PathSlice(ySortedWriter, 0, pointCount),
+ new PathSlice(zSortedWriter, 0, pointCount),
+ bitSet, out,
+ globalMinX, globalMaxX,
+ globalMinY, globalMaxY,
+ globalMinZ, globalMaxZ,
+ splitValues,
+ leafBlockFPs);
+ success = true;
+ } finally {
+ if (success) {
+ xSortedWriter.destroy();
+ ySortedWriter.destroy();
+ zSortedWriter.destroy();
+ IOUtils.rm(tempInput);
+ } else {
+ try {
+ xSortedWriter.destroy();
+ } catch (Throwable t) {
+ // Suppress to keep throwing original exc
+ }
+ try {
+ ySortedWriter.destroy();
+ } catch (Throwable t) {
+ // Suppress to keep throwing original exc
+ }
+ try {
+ zSortedWriter.destroy();
+ } catch (Throwable t) {
+ // Suppress to keep throwing original exc
+ }
+ IOUtils.deleteFilesIgnoringExceptions(tempInput);
+ }
+ }
+
+ //System.out.println("Total nodes: " + innerNodeCount);
+
+ // Write index:
+ long indexFP = out.getFilePointer();
+ //System.out.println("indexFP=" + indexFP);
+ out.writeInt(globalMinX);
+ out.writeInt(globalMaxX);
+ out.writeInt(globalMinY);
+ out.writeInt(globalMaxY);
+ out.writeInt(globalMinZ);
+ out.writeInt(globalMaxZ);
+ out.writeVInt(numLeaves);
+
+ // NOTE: splitValues[0] is unused, because nodeID is 1-based:
+ for (int i=0;i<splitValues.length;i++) {
+ out.writeInt(splitValues[i]);
+ }
+ for (int i=0;i<leafBlockFPs.length;i++) {
+ out.writeVLong(leafBlockFPs[i]);
+ }
+
+ if (tempDir != null) {
+ // If we had to go offline, we should have removed all temp files we wrote:
+ assert directoryIsEmpty(tempDir);
+ IOUtils.rm(tempDir);
+ }
+
+ return indexFP;
+ }
+
+ // Called only from assert
+ private boolean directoryIsEmpty(Path in) {
+ try (DirectoryStream<Path> dir = Files.newDirectoryStream(in)) {
+ for (Path path : dir) {
+ assert false: "dir=" + in + " still has file=" + path;
+ return false;
+ }
+ } catch (IOException ioe) {
+ // Just ignore: we are only called from assert
+ }
+ return true;
+ }
+
+ /** Sliced reference to points in an OfflineSorter.ByteSequencesWriter file. */
+ private static final class PathSlice {
+ final Writer writer;
+ final long start;
+ final long count;
+
+ public PathSlice(Writer writer, long start, long count) {
+ this.writer = writer;
+ this.start = start;
+ this.count = count;
+ }
+
+ @Override
+ public String toString() {
+ return "PathSlice(start=" + start + " count=" + count + " writer=" + writer + ")";
+ }
+ }
+
+ /** Marks bits for the ords (points) that belong in the left sub tree. */
+ private int markLeftTree(int splitDim, PathSlice source, LongBitSet bitSet,
+ int minX, int maxX,
+ int minY, int maxY,
+ int minZ, int maxZ) throws IOException {
+
+ // This is the size of our left tree
+ long leftCount = source.count / 2;
+
+ // Read the split value:
+ //if (DEBUG) System.out.println(" leftCount=" + leftCount + " vs " + source.count);
+ Reader reader = source.writer.getReader(source.start + leftCount);
+ boolean success = false;
+ int splitValue;
+ try {
+ boolean result = reader.next();
+ assert result;
+
+ int x = reader.x();
+ assert x >= minX && x <= maxX: "x=" + x + " minX=" + minX + " maxX=" + maxX;
+
+ int y = reader.y();
+ assert y >= minY && y <= maxY: "y=" + y + " minY=" + minY + " maxY=" + maxY;
+
+ int z = reader.z();
+ assert z >= minZ && z <= maxZ: "z=" + z + " minZ=" + minZ + " maxZ=" + maxZ;
+
+ if (splitDim == 0) {
+ splitValue = x;
+ } else if (splitDim == 1) {
+ splitValue = y;
+ } else {
+ splitValue = z;
+ }
+ success = true;
+ } finally {
+ if (success) {
+ IOUtils.close(reader);
+ } else {
+ IOUtils.closeWhileHandlingException(reader);
+ }
+ }
+
+ // Mark ords that fall into the left half, and also handle the == boundary case:
+ assert bitSet.cardinality() == 0: "cardinality=" + bitSet.cardinality();
+
+ success = false;
+ reader = source.writer.getReader(source.start);
+ try {
+ int lastValue = Integer.MIN_VALUE;
+ for (int i=0;i<leftCount;i++) {
+ boolean result = reader.next();
+ assert result;
+ int x = reader.x();
+ int y = reader.y();
+ int z = reader.z();
+
+ int value;
+ if (splitDim == 0) {
+ value = x;
+ } else if (splitDim == 1) {
+ value = y;
+ } else {
+ value = z;
+ }
+
+ // Our input source is supposed to be sorted on the incoming dimension:
+ assert value >= lastValue;
+ lastValue = value;
+
+ assert value <= splitValue: "i=" + i + " value=" + value + " vs splitValue=" + splitValue;
+ long ord = reader.ord();
+ int docID = reader.docID();
+ assert docID >= 0: "docID=" + docID + " reader=" + reader;
+
+ // We should never see dup ords:
+ assert bitSet.get(ord) == false;
+ bitSet.set(ord);
+ }
+ success = true;
+ } finally {
+ if (success) {
+ IOUtils.close(reader);
+ } else {
+ IOUtils.closeWhileHandlingException(reader);
+ }
+ }
+
+ assert leftCount == bitSet.cardinality(): "leftCount=" + leftCount + " cardinality=" + bitSet.cardinality();
+
+ return splitValue;
+ }
+
+ // Split on the dim with the largest range:
+ static int getSplitDim(int minX, int maxX, int minY, int maxY, int minZ, int maxZ) {
+ long xRange = (long) maxX - (long) minX;
+ long yRange = (long) maxY - (long) minY;
+ long zRange = (long) maxZ - (long) minZ;
+
+ if (xRange > yRange) {
+ if (xRange > zRange) {
+ return 0;
+ } else {
+ return 2;
+ }
+ } else if (yRange > zRange) {
+ return 1;
+ } else {
+ return 2;
+ }
+ }
+
+ /** The incoming PathSlice for the dim we will split is already partitioned/sorted. */
+ private void build(int nodeID, int leafNodeOffset,
+ PathSlice lastXSorted,
+ PathSlice lastYSorted,
+ PathSlice lastZSorted,
+ LongBitSet bitSet,
+ IndexOutput out,
+ int minX, int maxX,
+ int minY, int maxY,
+ int minZ, int maxZ,
+ int[] splitValues,
+ long[] leafBlockFPs) throws IOException {
+
+ assert lastXSorted.count == lastYSorted.count;
+ assert lastXSorted.count == lastZSorted.count;
+
+ if (DEBUG) System.out.println("\nBUILD: nodeID=" + nodeID + " leafNodeOffset=" + leafNodeOffset + "\n lastXSorted=" + lastXSorted + "\n lastYSorted=" + lastYSorted + "\n lastZSorted=" + lastZSorted + "\n count=" + lastXSorted.count + " x=" + minX + " TO " + maxX + " y=" + minY + " TO " + maxY + " z=" + minZ + " TO " + maxZ);
+
+ if (nodeID >= leafNodeOffset) {
+ // Leaf node: write block
+ if (DEBUG) System.out.println(" leaf");
+ assert maxX > minX;
+ assert maxY > minY;
+ assert maxZ > minZ;
+
+ //System.out.println("\nleaf:\n lat range: " + ((long) maxLatEnc-minLatEnc));
+ //System.out.println(" lon range: " + ((long) maxLonEnc-minLonEnc));
+
+ // Sort by docID in the leaf so we get sequentiality at search time (may not matter?):
+ Reader reader = lastXSorted.writer.getReader(lastXSorted.start);
+
+ // nocommit we can reuse this?
+ int[] docIDs = new int[(int) lastXSorted.count];
+
+ boolean success = false;
+ try {
+ for (int i=0;i<lastXSorted.count;i++) {
+
+ // NOTE: we discard ord at this point; we only needed it temporarily
+ // during building to uniquely identify each point to properly handle
+ // the multi-valued case (one docID having multiple values):
+
+ // We also discard lat/lon, since at search time, we reside on the
+ // wrapped doc values for this:
+
+ boolean result = reader.next();
+ assert result;
+ docIDs[i] = reader.docID();
+ }
+ success = true;
+ } finally {
+ if (success) {
+ IOUtils.close(reader);
+ } else {
+ IOUtils.closeWhileHandlingException(reader);
+ }
+ }
+
+ Arrays.sort(docIDs);
+
+ // Dedup docIDs: for the multi-valued case where more than one value for the doc
+ // wound up in this leaf cell, we only need to store the docID once:
+ int lastDocID = -1;
+ int uniqueCount = 0;
+ for(int i=0;i<docIDs.length;i++) {
+ int docID = docIDs[i];
+ if (docID != lastDocID) {
+ uniqueCount++;
+ lastDocID = docID;
+ }
+ }
+ assert uniqueCount <= lastXSorted.count;
+
+ long startFP = out.getFilePointer();
+ out.writeVInt(uniqueCount);
+
+ // Save the block file pointer:
+ leafBlockFPs[nodeID - leafNodeOffset] = startFP;
+ //System.out.println(" leafFP=" + startFP);
+
+ lastDocID = -1;
+ for (int i=0;i<docIDs.length;i++) {
+ // Absolute int encode; with "vInt of deltas" encoding, the .kdd size dropped from
+ // 697 MB -> 539 MB, but query time for 225 queries went from 1.65 sec -> 2.64 sec.
+ // I think if we also indexed prefix terms here we could do less costly compression
+ // on those lists:
+ int docID = docIDs[i];
+ if (docID != lastDocID) {
+ out.writeInt(docID);
+ //System.out.println(" write docID=" + docID);
+ lastDocID = docID;
+ }
+ }
+ //long endFP = out.getFilePointer();
+ //System.out.println(" bytes/doc: " + ((endFP - startFP) / count));
+ } else {
+
+ int splitDim = getSplitDim(minX, maxX, minY, maxY, minZ, maxZ);
+ //System.out.println(" splitDim=" + splitDim);
+
+ PathSlice source;
+
+ if (splitDim == 0) {
+ source = lastXSorted;
+ } else if (splitDim == 1) {
+ source = lastYSorted;
+ } else {
+ source = lastZSorted;
+ }
+
+ long count = source.count;
+
+ // We let ties go to either side, so we should never get down to count == 0, even
+ // in adversarial case (all values are the same):
+ assert count > 0;
+
+ // Inner node: partition/recurse
+ if (DEBUG) System.out.println(" non-leaf");
+
+ assert nodeID < splitValues.length: "nodeID=" + nodeID + " splitValues.length=" + splitValues.length;
+
+ int splitValue = markLeftTree(splitDim, source, bitSet,
+ minX, maxX,
+ minY, maxY,
+ minZ, maxZ);
+ long leftCount = count/2;
+
+ // TODO: we could save split value in here so we don't have to re-open file later:
+
+ // Partition the other (not split) dims into sorted left and right sets, so we can recurse.
+ // This is somewhat hairy: we partition the next X, Y set according to how we had just
+ // partitioned the Z set, etc.
+
+ Writer[] leftWriters = new Writer[3];
+ Writer[] rightWriters = new Writer[3];
+
+ for(int dim=0;dim<3;dim++) {
+ if (dim == splitDim) {
+ continue;
+ }
+
+ Writer leftWriter = null;
+ Writer rightWriter = null;
+ Reader reader = null;
+
+ boolean success = false;
+
+ int nextLeftCount = 0;
+
+ PathSlice nextSource;
+ if (dim == 0) {
+ nextSource = lastXSorted;
+ } else if (dim == 1) {
+ nextSource = lastYSorted;
+ } else {
+ nextSource = lastZSorted;
+ }
+
+ try {
+ leftWriter = getWriter(leftCount);
+ rightWriter = getWriter(nextSource.count - leftCount);
+
+ assert nextSource.count == count;
+ reader = nextSource.writer.getReader(nextSource.start);
+
+ // TODO: we could compute the split value here for each sub-tree and save an O(N) pass on recursion, but makes code hairier and only
+ // changes the constant factor of building, not the big-oh:
+ for (int i=0;i<count;i++) {
+ boolean result = reader.next();
+ assert result;
+ int x = reader.x();
+ int y = reader.y();
+ int z = reader.z();
+ long ord = reader.ord();
+ int docID = reader.docID();
+ assert docID >= 0: "docID=" + docID + " reader=" + reader;
+ //System.out.println(" i=" + i + " x=" + x + " ord=" + ord + " docID=" + docID);
+ if (bitSet.get(ord)) {
+ if (splitDim == 0) {
+ assert x <= splitValue: "x=" + x + " splitValue=" + splitValue;
+ } else if (splitDim == 1) {
+ assert y <= splitValue: "y=" + y + " splitValue=" + splitValue;
+ } else {
+ assert z <= splitValue: "z=" + z + " splitValue=" + splitValue;
+ }
+ leftWriter.append(x, y, z, ord, docID);
+ nextLeftCount++;
+ } else {
+ if (splitDim == 0) {
+ assert x >= splitValue: "x=" + x + " splitValue=" + splitValue;
+ } else if (splitDim == 1) {
+ assert y >= splitValue: "y=" + y + " splitValue=" + splitValue;
+ } else {
+ assert z >= splitValue: "z=" + z + " splitValue=" + splitValue;
+ }
+ rightWriter.append(x, y, z, ord, docID);
+ }
+ }
+ success = true;
+ } finally {
+ if (success) {
+ IOUtils.close(reader, leftWriter, rightWriter);
+ } else {
+ IOUtils.closeWhileHandlingException(reader, leftWriter, rightWriter);
+ }
+ }
+
+ assert leftCount == nextLeftCount: "leftCount=" + leftCount + " nextLeftCount=" + nextLeftCount;
+ leftWriters[dim] = leftWriter;
+ rightWriters[dim] = rightWriter;
+ }
+ bitSet.clear(0, pointCount);
+
+ long rightCount = count - leftCount;
+
+ boolean success = false;
+ try {
+ if (splitDim == 0) {
+ build(2*nodeID, leafNodeOffset,
+ new PathSlice(source.writer, source.start, leftCount),
+ new PathSlice(leftWriters[1], 0, leftCount),
+ new PathSlice(leftWriters[2], 0, leftCount),
+ bitSet,
+ out,
+ minX, splitValue,
+ minY, maxY,
+ minZ, maxZ,
+ splitValues, leafBlockFPs);
+ leftWriters[1].destroy();
+ leftWriters[2].destroy();
+
+ build(2*nodeID+1, leafNodeOffset,
+ new PathSlice(source.writer, source.start+leftCount, rightCount),
+ new PathSlice(rightWriters[1], 0, rightCount),
+ new PathSlice(rightWriters[2], 0, rightCount),
+ bitSet,
+ out,
+ splitValue, maxX,
+ minY, maxY,
+ minZ, maxZ,
+ splitValues, leafBlockFPs);
+ rightWriters[1].destroy();
+ rightWriters[2].destroy();
+ } else if (splitDim == 1) {
+ build(2*nodeID, leafNodeOffset,
+ new PathSlice(leftWriters[0], 0, leftCount),
+ new PathSlice(source.writer, source.start, leftCount),
+ new PathSlice(leftWriters[2], 0, leftCount),
+ bitSet,
+ out,
+ minX, maxX,
+ minY, splitValue,
+ minZ, maxZ,
+ splitValues, leafBlockFPs);
+ leftWriters[0].destroy();
+ leftWriters[2].destroy();
+
+ build(2*nodeID+1, leafNodeOffset,
+ new PathSlice(rightWriters[0], 0, rightCount),
+ new PathSlice(source.writer, source.start+leftCount, rightCount),
+ new PathSlice(rightWriters[2], 0, rightCount),
+ bitSet,
+ out,
+ minX, maxX,
+ splitValue, maxY,
+ minZ, maxZ,
+ splitValues, leafBlockFPs);
+ rightWriters[0].destroy();
+ rightWriters[2].destroy();
+ } else {
+ build(2*nodeID, leafNodeOffset,
+ new PathSlice(leftWriters[0], 0, leftCount),
+ new PathSlice(leftWriters[1], 0, leftCount),
+ new PathSlice(source.writer, source.start, leftCount),
+ bitSet,
+ out,
+ minX, maxX,
+ minY, maxY,
+ minZ, splitValue,
+ splitValues, leafBlockFPs);
+ leftWriters[0].destroy();
+ leftWriters[1].destroy();
+
+ build(2*nodeID+1, leafNodeOffset,
+ new PathSlice(rightWriters[0], 0, rightCount),
+ new PathSlice(rightWriters[1], 0, rightCount),
+ new PathSlice(source.writer, source.start+leftCount, rightCount),
+ bitSet,
+ out,
+ minX, maxX,
+ minY, maxY,
+ splitValue, maxZ,
+ splitValues, leafBlockFPs);
+ rightWriters[0].destroy();
+ rightWriters[1].destroy();
+ }
+ success = true;
+ } finally {
+ if (success == false) {
+ for(Writer writer : leftWriters) {
+ if (writer != null) {
+ try {
+ writer.destroy();
+ } catch (Throwable t) {
+ // Suppress to keep throwing original exc
+ }
+ }
+ }
+ for(Writer writer : rightWriters) {
+ if (writer != null) {
+ try {
+ writer.destroy();
+ } catch (Throwable t) {
+ // Suppress to keep throwing original exc
+ }
+ }
+ }
+ }
+ }
+
+ splitValues[nodeID] = splitValue;
+ }
+ }
+
+ Writer getWriter(long count) throws IOException {
+ if (count < maxPointsSortInHeap) {
+ return new HeapWriter((int) count);
+ } else {
+ return new OfflineWriter(tempDir, count);
+ }
+ }
+}
Added: lucene/dev/branches/lucene6699/lucene/spatial3d/src/java/org/apache/lucene/bkdtree3d/GrowingHeapWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6699/lucene/spatial3d/src/java/org/apache/lucene/bkdtree3d/GrowingHeapWriter.java?rev=1695370&view=auto
==============================================================================
--- lucene/dev/branches/lucene6699/lucene/spatial3d/src/java/org/apache/lucene/bkdtree3d/GrowingHeapWriter.java (added)
+++ lucene/dev/branches/lucene6699/lucene/spatial3d/src/java/org/apache/lucene/bkdtree3d/GrowingHeapWriter.java Tue Aug 11 20:33:49 2015
@@ -0,0 +1,92 @@
+package org.apache.lucene.bkdtree3d;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.RamUsageEstimator;
+
+final class GrowingHeapWriter implements Writer {
+ int[] xs;
+ int[] ys;
+ int[] zs;
+ int[] docIDs;
+ long[] ords;
+ private int nextWrite;
+ final int maxSize;
+
+ public GrowingHeapWriter(int maxSize) {
+ xs = new int[16];
+ ys = new int[16];
+ zs = new int[16];
+ docIDs = new int[16];
+ ords = new long[16];
+ this.maxSize = maxSize;
+ }
+
+ private int[] growExact(int[] arr, int size) {
+ assert size > arr.length;
+ int[] newArr = new int[size];
+ System.arraycopy(arr, 0, newArr, 0, arr.length);
+ return newArr;
+ }
+
+ private long[] growExact(long[] arr, int size) {
+ assert size > arr.length;
+ long[] newArr = new long[size];
+ System.arraycopy(arr, 0, newArr, 0, arr.length);
+ return newArr;
+ }
+
+ @Override
+ public void append(int x, int y, int z, long ord, int docID) {
+ assert ord == nextWrite;
+ if (xs.length == nextWrite) {
+ int nextSize = Math.min(maxSize, ArrayUtil.oversize(nextWrite+1, RamUsageEstimator.NUM_BYTES_INT));
+ assert nextSize > nextWrite: "nextSize=" + nextSize + " vs nextWrite=" + nextWrite;
+ xs = growExact(xs, nextSize);
+ ys = growExact(ys, nextSize);
+ zs = growExact(zs, nextSize);
+ ords = growExact(ords, nextSize);
+ docIDs = growExact(docIDs, nextSize);
+ }
+ xs[nextWrite] = x;
+ ys[nextWrite] = y;
+ zs[nextWrite] = z;
+ ords[nextWrite] = ord;
+ docIDs[nextWrite] = docID;
+ nextWrite++;
+ }
+
+ @Override
+ public Reader getReader(long start) {
+ return new HeapReader(xs, ys, zs, ords, docIDs, (int) start, nextWrite);
+ }
+
+ @Override
+ public void close() {
+ }
+
+ @Override
+ public void destroy() {
+ }
+
+ @Override
+ public String toString() {
+ return "GrowingHeapWriter(count=" + nextWrite + " alloc=" + xs.length + ")";
+ }
+}
Added: lucene/dev/branches/lucene6699/lucene/spatial3d/src/java/org/apache/lucene/bkdtree3d/HeapReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6699/lucene/spatial3d/src/java/org/apache/lucene/bkdtree3d/HeapReader.java?rev=1695370&view=auto
==============================================================================
--- lucene/dev/branches/lucene6699/lucene/spatial3d/src/java/org/apache/lucene/bkdtree3d/HeapReader.java (added)
+++ lucene/dev/branches/lucene6699/lucene/spatial3d/src/java/org/apache/lucene/bkdtree3d/HeapReader.java Tue Aug 11 20:33:49 2015
@@ -0,0 +1,73 @@
+package org.apache.lucene.bkdtree3d;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+final class HeapReader implements Reader {
+ private int curRead;
+ final int[] xs;
+ final int[] ys;
+ final int[] zs;
+ final long[] ords;
+ final int[] docIDs;
+ final int end;
+
+ HeapReader(int[] xs, int[] ys, int[] zs, long[] ords, int[] docIDs, int start, int end) {
+ this.xs = xs;
+ this.ys = ys;
+ this.zs = zs;
+ this.ords = ords;
+ this.docIDs = docIDs;
+ curRead = start-1;
+ this.end = end;
+ }
+
+ @Override
+ public boolean next() {
+ curRead++;
+ return curRead < end;
+ }
+
+ @Override
+ public int x() {
+ return xs[curRead];
+ }
+
+ @Override
+ public int y() {
+ return ys[curRead];
+ }
+
+ @Override
+ public int z() {
+ return zs[curRead];
+ }
+
+ @Override
+ public int docID() {
+ return docIDs[curRead];
+ }
+
+ @Override
+ public long ord() {
+ return ords[curRead];
+ }
+
+ @Override
+ public void close() {
+ }
+}
Added: lucene/dev/branches/lucene6699/lucene/spatial3d/src/java/org/apache/lucene/bkdtree3d/HeapWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6699/lucene/spatial3d/src/java/org/apache/lucene/bkdtree3d/HeapWriter.java?rev=1695370&view=auto
==============================================================================
--- lucene/dev/branches/lucene6699/lucene/spatial3d/src/java/org/apache/lucene/bkdtree3d/HeapWriter.java (added)
+++ lucene/dev/branches/lucene6699/lucene/spatial3d/src/java/org/apache/lucene/bkdtree3d/HeapWriter.java Tue Aug 11 20:33:49 2015
@@ -0,0 +1,66 @@
+package org.apache.lucene.bkdtree3d;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+final class HeapWriter implements Writer {
+ final int[] xs;
+ final int[] ys;
+ final int[] zs;
+ final int[] docIDs;
+ final long[] ords;
+ private int nextWrite;
+
+ public HeapWriter(int count) {
+ xs = new int[count];
+ ys = new int[count];
+ zs = new int[count];
+ docIDs = new int[count];
+ ords = new long[count];
+ }
+
+ @Override
+ public void append(int x, int y, int z, long ord, int docID) {
+ xs[nextWrite] = x;
+ ys[nextWrite] = y;
+ zs[nextWrite] = z;
+ ords[nextWrite] = ord;
+ docIDs[nextWrite] = docID;
+ nextWrite++;
+ }
+
+ @Override
+ public Reader getReader(long start) {
+ return new HeapReader(xs, ys, zs, ords, docIDs, (int) start, xs.length);
+ }
+
+ @Override
+ public void close() {
+ if (nextWrite != xs.length) {
+ throw new IllegalStateException("only wrote " + nextWrite + " values, but expected " + xs.length);
+ }
+ }
+
+ @Override
+ public void destroy() {
+ }
+
+ @Override
+ public String toString() {
+ return "HeapWriter(count=" + xs.length + ")";
+ }
+}
Added: lucene/dev/branches/lucene6699/lucene/spatial3d/src/java/org/apache/lucene/bkdtree3d/OfflineReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6699/lucene/spatial3d/src/java/org/apache/lucene/bkdtree3d/OfflineReader.java?rev=1695370&view=auto
==============================================================================
--- lucene/dev/branches/lucene6699/lucene/spatial3d/src/java/org/apache/lucene/bkdtree3d/OfflineReader.java (added)
+++ lucene/dev/branches/lucene6699/lucene/spatial3d/src/java/org/apache/lucene/bkdtree3d/OfflineReader.java Tue Aug 11 20:33:49 2015
@@ -0,0 +1,95 @@
+package org.apache.lucene.bkdtree3d;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.BufferedInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.file.Files;
+import java.nio.file.Path;
+
+import org.apache.lucene.store.InputStreamDataInput;
+
+final class OfflineReader implements Reader {
+ final InputStreamDataInput in;
+ long countLeft;
+ private int x;
+ private int y;
+ private int z;
+ private long ord;
+ private int docID;
+
+ OfflineReader(Path tempFile, long start, long count) throws IOException {
+ InputStream fis = Files.newInputStream(tempFile);
+ long seekFP = start * BKD3DTreeWriter.BYTES_PER_DOC;
+ long skipped = 0;
+ while (skipped < seekFP) {
+ long inc = fis.skip(seekFP - skipped);
+ skipped += inc;
+ if (inc == 0) {
+ throw new RuntimeException("skip returned 0");
+ }
+ }
+ in = new InputStreamDataInput(new BufferedInputStream(fis));
+ this.countLeft = count;
+ }
+
+ @Override
+ public boolean next() throws IOException {
+ if (countLeft == 0) {
+ return false;
+ }
+ countLeft--;
+ x = in.readInt();
+ y = in.readInt();
+ z = in.readInt();
+ ord = in.readLong();
+ docID = in.readInt();
+ return true;
+ }
+
+ @Override
+ public int x() {
+ return x;
+ }
+
+ @Override
+ public int y() {
+ return y;
+ }
+
+ @Override
+ public int z() {
+ return z;
+ }
+
+ @Override
+ public long ord() {
+ return ord;
+ }
+
+ @Override
+ public int docID() {
+ return docID;
+ }
+
+ @Override
+ public void close() throws IOException {
+ in.close();
+ }
+}
Added: lucene/dev/branches/lucene6699/lucene/spatial3d/src/java/org/apache/lucene/bkdtree3d/OfflineWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6699/lucene/spatial3d/src/java/org/apache/lucene/bkdtree3d/OfflineWriter.java?rev=1695370&view=auto
==============================================================================
--- lucene/dev/branches/lucene6699/lucene/spatial3d/src/java/org/apache/lucene/bkdtree3d/OfflineWriter.java (added)
+++ lucene/dev/branches/lucene6699/lucene/spatial3d/src/java/org/apache/lucene/bkdtree3d/OfflineWriter.java Tue Aug 11 20:33:49 2015
@@ -0,0 +1,76 @@
+package org.apache.lucene.bkdtree3d;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.BufferedOutputStream;
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+
+import org.apache.lucene.store.ByteArrayDataOutput;
+import org.apache.lucene.store.OutputStreamDataOutput;
+import org.apache.lucene.util.IOUtils;
+
+final class OfflineWriter implements Writer {
+
+ final Path tempFile;
+ final byte[] scratchBytes = new byte[BKD3DTreeWriter.BYTES_PER_DOC];
+ final ByteArrayDataOutput scratchBytesOutput = new ByteArrayDataOutput(scratchBytes);
+ final OutputStreamDataOutput out;
+ final long count;
+ private long countWritten;
+
+ public OfflineWriter(Path tempDir, long count) throws IOException {
+ tempFile = Files.createTempFile(tempDir, "size" + count + ".", "");
+ out = new OutputStreamDataOutput(new BufferedOutputStream(Files.newOutputStream(tempFile)));
+ this.count = count;
+ }
+
+ @Override
+ public void append(int x, int y, int z, long ord, int docID) throws IOException {
+ out.writeInt(x);
+ out.writeInt(y);
+ out.writeInt(z);
+ out.writeLong(ord);
+ out.writeInt(docID);
+ countWritten++;
+ }
+
+ @Override
+ public Reader getReader(long start) throws IOException {
+ return new OfflineReader(tempFile, start, count-start);
+ }
+
+ @Override
+ public void close() throws IOException {
+ out.close();
+ if (count != countWritten) {
+ throw new IllegalStateException("wrote " + countWritten + " values, but expected " + count);
+ }
+ }
+
+ @Override
+ public void destroy() throws IOException {
+ IOUtils.rm(tempFile);
+ }
+
+ @Override
+ public String toString() {
+ return "OfflineWriter(count=" + count + " tempFile=" + tempFile + ")";
+ }
+}
Added: lucene/dev/branches/lucene6699/lucene/spatial3d/src/java/org/apache/lucene/bkdtree3d/Reader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6699/lucene/spatial3d/src/java/org/apache/lucene/bkdtree3d/Reader.java?rev=1695370&view=auto
==============================================================================
--- lucene/dev/branches/lucene6699/lucene/spatial3d/src/java/org/apache/lucene/bkdtree3d/Reader.java (added)
+++ lucene/dev/branches/lucene6699/lucene/spatial3d/src/java/org/apache/lucene/bkdtree3d/Reader.java Tue Aug 11 20:33:49 2015
@@ -0,0 +1,31 @@
+package org.apache.lucene.bkdtree3d;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.Closeable;
+import java.io.IOException;
+
+/** Abstracts away whether OfflineSorter or simple arrays in heap are used. */
+interface Reader extends Closeable {
+ boolean next() throws IOException;
+ int x();
+ int y();
+ int z();
+ long ord();
+ int docID();
+}
Added: lucene/dev/branches/lucene6699/lucene/spatial3d/src/java/org/apache/lucene/bkdtree3d/Writer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6699/lucene/spatial3d/src/java/org/apache/lucene/bkdtree3d/Writer.java?rev=1695370&view=auto
==============================================================================
--- lucene/dev/branches/lucene6699/lucene/spatial3d/src/java/org/apache/lucene/bkdtree3d/Writer.java (added)
+++ lucene/dev/branches/lucene6699/lucene/spatial3d/src/java/org/apache/lucene/bkdtree3d/Writer.java Tue Aug 11 20:33:49 2015
@@ -0,0 +1,29 @@
+package org.apache.lucene.bkdtree3d;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.Closeable;
+import java.io.IOException;
+
+/** Abstracts away whether OfflineSorter or simple arrays in heap are used. */
+interface Writer extends Closeable {
+ void append(int x, int y, int z, long ord, int docID) throws IOException;
+ Reader getReader(long start) throws IOException;
+ void destroy() throws IOException;
+}
+
Added: lucene/dev/branches/lucene6699/lucene/spatial3d/src/test/org/apache/lucene/bkdtree3d/TestBKD3DTree.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6699/lucene/spatial3d/src/test/org/apache/lucene/bkdtree3d/TestBKD3DTree.java?rev=1695370&view=auto
==============================================================================
--- lucene/dev/branches/lucene6699/lucene/spatial3d/src/test/org/apache/lucene/bkdtree3d/TestBKD3DTree.java (added)
+++ lucene/dev/branches/lucene6699/lucene/spatial3d/src/test/org/apache/lucene/bkdtree3d/TestBKD3DTree.java Tue Aug 11 20:33:49 2015
@@ -0,0 +1,281 @@
+package org.apache.lucene.bkdtree3d;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.search.DocIdSet;
+import org.apache.lucene.search.DocIdSetIterator;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IOContext;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.util.FixedBitSet;
+import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util.TestUtil;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import static org.apache.lucene.bkdtree3d.BKD3DTreeDocValuesFormat.encodeValue;
+
+public class TestBKD3DTree extends LuceneTestCase {
+
+ public void testBasic() throws Exception {
+ Directory dir = newDirectory();
+ IndexOutput out = dir.createOutput("bkd", IOContext.DEFAULT);
+
+ BKD3DTreeWriter w = new BKD3DTreeWriter();
+
+ w.add(0, 0, 0, 0);
+ w.add(1, 1, 1, 1);
+ w.add(-1, -1, -1, 2);
+
+ long indexFP = w.finish(out);
+ out.close();
+
+ IndexInput in = dir.openInput("bkd", IOContext.DEFAULT);
+ in.seek(indexFP);
+ BKD3DTreeReader r = new BKD3DTreeReader(in, 3);
+
+ DocIdSet hits = r.intersect(Integer.MIN_VALUE, Integer.MAX_VALUE,
+ Integer.MIN_VALUE, Integer.MAX_VALUE,
+ Integer.MIN_VALUE, Integer.MAX_VALUE,
+
+ new BKD3DTreeReader.ValueFilter() {
+
+ @Override
+ public boolean accept(int docID) {
+ return true;
+ }
+
+ @Override
+ public BKD3DTreeReader.Relation compare(int xMin, int xMax,
+ int yMin, int yMax,
+ int zMin, int zMax) {
+ return BKD3DTreeReader.Relation.INSIDE;
+ }
+
+ });
+ DocIdSetIterator disi = hits.iterator();
+ assertEquals(0, disi.nextDoc());
+ assertEquals(1, disi.nextDoc());
+ assertEquals(2, disi.nextDoc());
+ assertEquals(DocIdSetIterator.NO_MORE_DOCS, disi.nextDoc());
+ in.close();
+ dir.close();
+ }
+
+ static class Point {
+ final double x;
+ final double y;
+ final double z;
+
+ public Point(double x, double y, double z) {
+ this.x = x;
+ this.y = y;
+ this.z = z;
+ }
+
+ @Override
+ public String toString() {
+ return "x=" + x + " y=" + y + " z=" + z;
+ }
+ }
+
+ private static class Range {
+ final double min;
+ final double max;
+
+ public Range(double min, double max) {
+ this.min = min;
+ this.max = max;
+ }
+
+ @Override
+ public String toString() {
+ return min + " TO " + max;
+ }
+ }
+
+ private double randomCoord() {
+ // nocommit
+ //return (random().nextDouble()*2.0022) - 1.0011;
+ return (random().nextDouble()*.002) - .001;
+ }
+
+ private Range randomRange() {
+ double x = randomCoord();
+ double y = randomCoord();
+ if (x < y) {
+ return new Range(x, y);
+ } else {
+ return new Range(y, x);
+ }
+ }
+
+ public void testRandom() throws Exception {
+ List<Point> points = new ArrayList<>();
+ // nocommit
+ //int numPoints = atLeast(10000);
+ int numPoints = atLeast(200);
+ Directory dir = newDirectory();
+ IndexOutput out = dir.createOutput("bkd", IOContext.DEFAULT);
+ int maxPointsInLeaf = TestUtil.nextInt(random(), 16, 2048);
+
+ // nocommit
+ maxPointsInLeaf = 100;
+
+ int maxPointsSortInHeap = TestUtil.nextInt(random(), maxPointsInLeaf, 1024*1024);
+
+ BKD3DTreeWriter w = new BKD3DTreeWriter(maxPointsInLeaf, maxPointsSortInHeap);
+ for(int docID=0;docID<numPoints;docID++) {
+ Point point;
+ if (docID > 0 && random().nextInt(30) == 17) {
+ // Dup point
+ point = points.get(random().nextInt(points.size()));
+ } else {
+ point = new Point(randomCoord(),
+ randomCoord(),
+ randomCoord());
+ }
+
+ if (VERBOSE) {
+ System.out.println(" docID=" + docID + " point=" + point);
+ System.out.println(" x=" + encodeValue(point.x) +
+ " y=" + encodeValue(point.y) +
+ " z=" + encodeValue(point.z));
+ }
+
+ points.add(point);
+ w.add(encodeValue(point.x),
+ encodeValue(point.y),
+ encodeValue(point.z),
+ docID);
+ }
+
+ long indexFP = w.finish(out);
+ out.close();
+
+ IndexInput in = dir.openInput("bkd", IOContext.DEFAULT);
+ in.seek(indexFP);
+ BKD3DTreeReader r = new BKD3DTreeReader(in, numPoints);
+
+ int numIters = atLeast(100);
+ for(int iter=0;iter<numIters;iter++) {
+ // bbox
+ Range x = randomRange();
+ Range y = randomRange();
+ Range z = randomRange();
+
+ int xMinEnc = encodeValue(x.min);
+ int xMaxEnc = encodeValue(x.max);
+ int yMinEnc = encodeValue(y.min);
+ int yMaxEnc = encodeValue(y.max);
+ int zMinEnc = encodeValue(z.min);
+ int zMaxEnc = encodeValue(z.max);
+
+ if (VERBOSE) {
+ System.out.println("\nTEST: iter=" + iter + " bbox: x=" + x + " (" + xMinEnc + " TO " + xMaxEnc+ ")" + " y=" + y + " (" + yMinEnc + " TO " + yMaxEnc + ")" + " z=" + z + " (" + zMinEnc + " TO " + zMaxEnc + ")" );
+ }
+
+ DocIdSet hits = r.intersect(xMinEnc, xMaxEnc,
+ yMinEnc, yMaxEnc,
+ zMinEnc, zMaxEnc,
+
+ new BKD3DTreeReader.ValueFilter() {
+
+ @Override
+ public boolean accept(int docID) {
+ Point point = points.get(docID);
+ //System.out.println(" accept docID=" + docID + " point=" + point + " (x=" + encodeValue(point.x) + " y=" + encodeValue(point.y) + " z=" + encodeValue(point.z) + ")");
+
+ int xEnc = encodeValue(point.x);
+ int yEnc = encodeValue(point.y);
+ int zEnc = encodeValue(point.z);
+
+ boolean accept = xEnc >= xMinEnc && xEnc <= xMaxEnc &&
+ yEnc >= yMinEnc && yEnc <= yMaxEnc &&
+ zEnc >= zMinEnc && zEnc <= zMaxEnc;
+ //System.out.println(" " + accept);
+
+ return accept;
+ }
+
+ @Override
+ public BKD3DTreeReader.Relation compare(int cellXMin, int cellXMax,
+ int cellYMin, int cellYMax,
+ int cellZMin, int cellZMax) {
+ if (cellXMin > xMaxEnc || cellXMax < xMinEnc) {
+ return BKD3DTreeReader.Relation.OUTSIDE;
+ }
+ if (cellYMin > yMaxEnc || cellYMax < yMinEnc) {
+ return BKD3DTreeReader.Relation.OUTSIDE;
+ }
+ if (cellZMin > zMaxEnc || cellZMax < zMinEnc) {
+ return BKD3DTreeReader.Relation.OUTSIDE;
+ }
+
+ if (cellXMin >= xMinEnc && cellXMax <= xMaxEnc &&
+ cellYMin >= yMinEnc && cellYMax <= yMaxEnc &&
+ cellZMin >= zMinEnc && cellZMax <= zMaxEnc) {
+ return BKD3DTreeReader.Relation.INSIDE;
+ }
+
+ return BKD3DTreeReader.Relation.CROSSES;
+ }
+ });
+
+ DocIdSetIterator disi = hits.iterator();
+ FixedBitSet matches = new FixedBitSet(numPoints);
+ while (true) {
+ int nextHit = disi.nextDoc();
+ if (nextHit == DocIdSetIterator.NO_MORE_DOCS) {
+ break;
+ }
+ matches.set(nextHit);
+ }
+ if (VERBOSE) {
+ System.out.println(" total hits: " + matches.cardinality());
+ }
+
+ for(int docID=0;docID<numPoints;docID++) {
+ Point point = points.get(docID);
+ boolean actual = matches.get(docID);
+
+ // nocommit no good the test is "using" some of the code it's supposed to test ...
+ // nocommit wait: this shouldn't be necessary? bkd tree IS precise?
+ int xEnc = encodeValue(point.x);
+ int yEnc = encodeValue(point.y);
+ int zEnc = encodeValue(point.z);
+
+ boolean expected = xEnc >= xMinEnc && xEnc <= xMaxEnc &&
+ yEnc >= yMinEnc && yEnc <= yMaxEnc &&
+ zEnc >= zMinEnc && zEnc <= zMaxEnc;
+
+ if (expected != actual) {
+ System.out.println("docID=" + docID + " is wrong: expected=" + expected + " actual=" + actual);
+ System.out.println(" x=" + point.x + " (" + xEnc + ")" + " y=" + point.y + " (" + yEnc + ")" + " z=" + point.z + " (" + zEnc + ")");
+ fail("wrong match");
+ }
+ }
+ }
+
+ in.close();
+ dir.close();
+ }
+}
+