You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by jp...@apache.org on 2021/05/14 08:10:52 UTC
[lucene-solr] 02/02: LUCENE-9932: Performance improvement for BKD index building

This is an automated email from the ASF dual-hosted git repository.

jpountz pushed a commit to branch branch_8x
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git

commit 86b6d35f7229010757924da3312ffb2fe72b17f4
Author: neoReMinD <xu...@gmail.com>
AuthorDate: Fri May 14 09:49:27 2021 +0200

    LUCENE-9932: Performance improvement for BKD index building
---
 lucene/CHANGES.txt                                 |   2 +
 .../apache/lucene/codecs/MutablePointValues.java   |   5 +
 .../org/apache/lucene/index/PointValuesWriter.java |  26 +++
 .../org/apache/lucene/util/MSBRadixSorter.java     |  10 +-
 .../apache/lucene/util/StableMSBRadixSorter.java   |  81 ++++++++
 .../lucene/util/bkd/MutablePointsReaderUtils.java  |  65 +++----
 .../lucene/util/TestStableMSBRadixSorter.java      | 211 +++++++++++++++++++++
 .../test/org/apache/lucene/util/bkd/TestBKD.java   |  22 +++
 .../util/bkd/TestMutablePointsReaderUtils.java     |  60 ++++--
 9 files changed, 428 insertions(+), 54 deletions(-)

diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index fb5c0ab..ac78502 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -58,6 +58,8 @@ Improvements
 Optimizations
 ---------------------
 
+* LUCENE-9932: Performance improvement for BKD index building (neoremind)
+
 * LUCENE-9827: Speed up merging of stored fields and term vectors for smaller segments.
   (Daniel Mitterdorfer, Dimitrios Liapis, Adrien Grand, Robert Muir)
 
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/MutablePointValues.java b/lucene/core/src/java/org/apache/lucene/codecs/MutablePointValues.java
index 8f4d69c..2d38ac5 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/MutablePointValues.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/MutablePointValues.java
@@ -39,4 +39,9 @@ public abstract class MutablePointValues extends PointValues {
   /** Swap the i-th and j-th values. */
   public abstract void swap(int i, int j);
 
+  /** Save the i-th value into the j-th position in temporary storage. */
+  public abstract void save(int i, int j);
+
+  /** Restore values between i-th and j-th(excluding) in temporary storage into original storage. */
+  public abstract void restore(int i, int j);
 }
diff --git a/lucene/core/src/java/org/apache/lucene/index/PointValuesWriter.java b/lucene/core/src/java/org/apache/lucene/index/PointValuesWriter.java
index d715872..ddbc6a6 100644
--- a/lucene/core/src/java/org/apache/lucene/index/PointValuesWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/index/PointValuesWriter.java
@@ -72,6 +72,7 @@ class PointValuesWriter {
   public void flush(SegmentWriteState state, Sorter.DocMap sortMap, PointsWriter writer) throws IOException {
     PointValues points = new MutablePointValues() {
       final int[] ords = new int[numPoints];
+      int[] temp;
       {
         for (int i = 0; i < numPoints; ++i) {
           ords[i] = i;
@@ -154,6 +155,21 @@ class PointValuesWriter {
         final long offset = (long) packedBytesLength * ords[i] + k;
         return bytes.readByte(offset);
       }
+
+      @Override
+      public void save(int i, int j) {
+        if (temp == null) {
+          temp = new int[ords.length];
+        }
+        temp[j] = ords[i];
+      }
+
+      @Override
+      public void restore(int i, int j) {
+        if (temp != null) {
+          System.arraycopy(temp, i, ords, i, j - i);
+        }
+      }
     };
 
     final PointValues values;
@@ -277,5 +293,15 @@ class PointValuesWriter {
     public void swap(int i, int j) {
       in.swap(i, j);
     }
+
+    @Override
+    public void save(int i, int j) {
+      in.save(i, j);
+    }
+
+    @Override
+    public void restore(int i, int j) {
+      in.restore(i, j);
+    }
   }
 }
diff --git a/lucene/core/src/java/org/apache/lucene/util/MSBRadixSorter.java b/lucene/core/src/java/org/apache/lucene/util/MSBRadixSorter.java
index f0e2a61..f1059a9 100644
--- a/lucene/core/src/java/org/apache/lucene/util/MSBRadixSorter.java
+++ b/lucene/core/src/java/org/apache/lucene/util/MSBRadixSorter.java
@@ -31,7 +31,7 @@ public abstract class MSBRadixSorter extends Sorter {
   // locality)
   private static final int LEVEL_THRESHOLD = 8;
   // size of histograms: 256 + 1 to indicate that the string is finished
-  private static final int HISTOGRAM_SIZE = 257;
+  protected static final int HISTOGRAM_SIZE = 257;
   // buckets below this size will be sorted with introsort
   private static final int LENGTH_THRESHOLD = 100;
 
@@ -40,7 +40,7 @@ public abstract class MSBRadixSorter extends Sorter {
   private final int[] endOffsets = new int[HISTOGRAM_SIZE];
   private final int[] commonPrefix;
 
-  private final int maxLength;
+  protected final int maxLength;
 
   /**
    * Sole constructor.
@@ -121,7 +121,7 @@ public abstract class MSBRadixSorter extends Sorter {
     sort(from, to, 0, 0);
   }
 
-  private void sort(int from, int to, int k, int l) {
+  protected void sort(int from, int to, int k, int l) {
     if (to - from <= LENGTH_THRESHOLD || l >= LEVEL_THRESHOLD) {
       introSort(from, to, k);
     } else {
@@ -195,7 +195,7 @@ public abstract class MSBRadixSorter extends Sorter {
   }
 
   /** Return a number for the k-th character between 0 and {@link #HISTOGRAM_SIZE}. */
-  private int getBucket(int i, int k) {
+  protected int getBucket(int i, int k) {
     return byteAt(i, k) + 1;
   }
 
@@ -268,7 +268,7 @@ public abstract class MSBRadixSorter extends Sorter {
    * @param startOffsets start offsets per bucket
    * @param endOffsets end offsets per bucket
    */
-  private void reorder(int from, int to, int[] startOffsets, int[] endOffsets, int k) {
+  protected void reorder(int from, int to, int[] startOffsets, int[] endOffsets, int k) {
     // reorder in place, like the dutch flag problem
     for (int i = 0; i < HISTOGRAM_SIZE; ++i) {
       final int limit = endOffsets[i];
diff --git a/lucene/core/src/java/org/apache/lucene/util/StableMSBRadixSorter.java b/lucene/core/src/java/org/apache/lucene/util/StableMSBRadixSorter.java
new file mode 100644
index 0000000..c6ec774
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/util/StableMSBRadixSorter.java
@@ -0,0 +1,81 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.util;
+
+/**
+ * Stable radix sorter for variable-length strings.
+ *
+ * @lucene.internal
+ */
+public abstract class StableMSBRadixSorter extends MSBRadixSorter {
+
+  private final int[] fixedStartOffsets;
+
+  public StableMSBRadixSorter(int maxLength) {
+    super(maxLength);
+    fixedStartOffsets = new int[HISTOGRAM_SIZE];
+  }
+
+  /** Save the i-th value into the j-th position in temporary storage. */
+  protected abstract void save(int i, int j);
+
+  /** Restore values between i-th and j-th(excluding) in temporary storage into original storage. */
+  protected abstract void restore(int i, int j);
+
+  @Override
+  protected Sorter getFallbackSorter(int k) {
+    return new InPlaceMergeSorter() {
+      @Override
+      protected void swap(int i, int j) {
+        StableMSBRadixSorter.this.swap(i, j);
+      }
+
+      @Override
+      protected int compare(int i, int j) {
+        for (int o = k; o < maxLength; ++o) {
+          final int b1 = byteAt(i, o);
+          final int b2 = byteAt(j, o);
+          if (b1 != b2) {
+            return b1 - b2;
+          } else if (b1 == -1) {
+            break;
+          }
+        }
+        return 0;
+      }
+    };
+  }
+
+  /**
+   * Reorder elements in stable way, since Dutch sort does not guarantee ordering for same values.
+   *
+   * <p>When this method returns, startOffsets and endOffsets are equal.
+   */
+  @Override
+  protected void reorder(int from, int to, int[] startOffsets, int[] endOffsets, int k) {
+    System.arraycopy(startOffsets, 0, fixedStartOffsets, 0, startOffsets.length);
+    for (int i = 0; i < HISTOGRAM_SIZE; ++i) {
+      final int limit = endOffsets[i];
+      for (int h1 = fixedStartOffsets[i]; h1 < limit; h1++) {
+        final int b = getBucket(from + h1, k);
+        final int h2 = startOffsets[b]++;
+        save(from + h1, from + h2);
+      }
+    }
+    restore(from, to);
+  }
+}
diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/MutablePointsReaderUtils.java b/lucene/core/src/java/org/apache/lucene/util/bkd/MutablePointsReaderUtils.java
index 3d54e69..e6bfd2a 100644
--- a/lucene/core/src/java/org/apache/lucene/util/bkd/MutablePointsReaderUtils.java
+++ b/lucene/core/src/java/org/apache/lucene/util/bkd/MutablePointsReaderUtils.java
@@ -21,9 +21,9 @@ import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.FutureArrays;
 import org.apache.lucene.util.IntroSelector;
 import org.apache.lucene.util.IntroSorter;
-import org.apache.lucene.util.MSBRadixSorter;
 import org.apache.lucene.util.RadixSelector;
 import org.apache.lucene.util.Selector;
+import org.apache.lucene.util.StableMSBRadixSorter;
 import org.apache.lucene.util.packed.PackedInts;
 
 /** Utility APIs for sorting and partitioning buffered points.
@@ -36,8 +36,22 @@ public final class MutablePointsReaderUtils {
   /** Sort the given {@link MutablePointValues} based on its packed value then doc ID. */
   public static void sort(BKDConfig config, int maxDoc,
                           MutablePointValues reader, int from, int to) {
-    final int bitsPerDocId = PackedInts.bitsRequired(maxDoc - 1);
-    new MSBRadixSorter(config.packedBytesLength + (bitsPerDocId + 7) / 8) {
+    boolean sortedByDocID = true;
+    int prevDoc = 0;
+    for (int i = from; i < to; ++i) {
+      int doc = reader.getDocID(i);
+      if (doc < prevDoc) {
+        sortedByDocID = false;
+        break;
+      }
+      prevDoc = doc;
+    }
+
+    // No need to tie break on doc IDs if already sorted by doc ID, since we use a stable sort.
+    // This should be a common situation as IndexWriter accumulates data in doc ID order when
+    // index sorting is not enabled.
+    final int bitsPerDocId = sortedByDocID ? 0 : PackedInts.bitsRequired(maxDoc - 1);
+    new StableMSBRadixSorter(config.packedBytesLength + (bitsPerDocId + 7) / 8) {
 
       @Override
       protected void swap(int i, int j) {
@@ -45,6 +59,16 @@ public final class MutablePointsReaderUtils {
       }
 
       @Override
+      protected void save(int i, int j) {
+        reader.save(i, j);
+      }
+
+      @Override
+      protected void restore(int i, int j) {
+        reader.restore(i, j);
+      }
+
+      @Override
       protected int byteAt(int i, int k) {
         if (k < config.packedBytesLength) {
           return Byte.toUnsignedInt(reader.getByteAt(i, k));
@@ -53,41 +77,6 @@ public final class MutablePointsReaderUtils {
           return (reader.getDocID(i) >>> Math.max(0, shift)) & 0xff;
         }
       }
-
-      @Override
-      protected org.apache.lucene.util.Sorter getFallbackSorter(int k) {
-        return new IntroSorter() {
-
-          final BytesRef pivot = new BytesRef();
-          final BytesRef scratch = new BytesRef();
-          int pivotDoc;
-
-          @Override
-          protected void swap(int i, int j) {
-            reader.swap(i, j);
-          }
-
-          @Override
-          protected void setPivot(int i) {
-            reader.getValue(i, pivot);
-            pivotDoc = reader.getDocID(i);
-          }
-
-          @Override
-          protected int comparePivot(int j) {
-            if (k < config.packedBytesLength) {
-              reader.getValue(j, scratch);
-              int cmp = FutureArrays.compareUnsigned(pivot.bytes, pivot.offset + k, pivot.offset + k + config.packedBytesLength - k,
-                      scratch.bytes, scratch.offset + k, scratch.offset + k + config.packedBytesLength - k);
-              if (cmp != 0) {
-                return cmp;
-              }
-            }
-            return pivotDoc - reader.getDocID(j);
-          }
-        };
-      }
-
     }.sort(from, to);
   }
 
diff --git a/lucene/core/src/test/org/apache/lucene/util/TestStableMSBRadixSorter.java b/lucene/core/src/test/org/apache/lucene/util/TestStableMSBRadixSorter.java
new file mode 100644
index 0000000..387b6b6
--- /dev/null
+++ b/lucene/core/src/test/org/apache/lucene/util/TestStableMSBRadixSorter.java
@@ -0,0 +1,211 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.util;
+
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.Set;
+
+public class TestStableMSBRadixSorter extends LuceneTestCase {
+
+  private void test(BytesRef[] refs, int len) {
+    BytesRef[] expected = ArrayUtil.copyOfSubArray(refs, 0, len);
+    Arrays.sort(expected);
+
+    int maxLength = 0;
+    for (int i = 0; i < len; ++i) {
+      BytesRef ref = refs[i];
+      maxLength = Math.max(maxLength, ref.length);
+    }
+    switch (random().nextInt(3)) {
+      case 0:
+        maxLength += TestUtil.nextInt(random(), 1, 5);
+        break;
+      case 1:
+        maxLength = Integer.MAX_VALUE;
+        break;
+      default:
+        // leave unchanged
+        break;
+    }
+
+    final int finalMaxLength = maxLength;
+    new StableMSBRadixSorter(maxLength) {
+
+      private BytesRef[] temp;
+
+      @Override
+      protected int byteAt(int i, int k) {
+        assertTrue(k < finalMaxLength);
+        BytesRef ref = refs[i];
+        if (ref.length <= k) {
+          return -1;
+        }
+        return ref.bytes[ref.offset + k] & 0xff;
+      }
+
+      @Override
+      protected void swap(int i, int j) {
+        BytesRef tmp = refs[i];
+        refs[i] = refs[j];
+        refs[j] = tmp;
+      }
+
+      @Override
+      protected void save(int i, int j) {
+        if (temp == null) {
+          temp = new BytesRef[refs.length];
+        }
+        temp[j] = refs[i];
+      }
+
+      @Override
+      protected void restore(int i, int j) {
+        if (temp != null) {
+          System.arraycopy(temp, i, refs, i, j - i);
+        }
+      }
+    }.sort(0, len);
+    BytesRef[] actual = ArrayUtil.copyOfSubArray(refs, 0, len);
+    assertArrayEquals(expected, actual);
+    // Verify that the arrays are not only equal after sorting with Arrays#sort and
+    // StableMSBRadixSorter
+    // but also that they have the very same instance at every index.
+    // This is different from MSBRadixSorter which does not guarantee ordering of the same value.
+    assertEquals(expected.length, actual.length);
+    for (int i = 0; i < expected.length; i++) {
+      assertSame(expected[i].bytes, actual[i].bytes);
+    }
+  }
+
+  public void testEmpty() {
+    test(new BytesRef[random().nextInt(5)], 0);
+  }
+
+  public void testOneValue() {
+    BytesRef bytes = new BytesRef(TestUtil.randomSimpleString(random()));
+    test(new BytesRef[] {bytes}, 1);
+  }
+
+  public void testTwoValues() {
+    BytesRef bytes1 = new BytesRef(TestUtil.randomSimpleString(random()));
+    BytesRef bytes2 = new BytesRef(TestUtil.randomSimpleString(random()));
+    test(new BytesRef[] {bytes1, bytes2}, 2);
+  }
+
+  private void testRandom(int commonPrefixLen, int maxLen) {
+    byte[] commonPrefix = new byte[commonPrefixLen];
+    random().nextBytes(commonPrefix);
+    final int len = random().nextInt(100000);
+    BytesRef[] bytes = new BytesRef[len + random().nextInt(50)];
+    for (int i = 0; i < len; ++i) {
+      byte[] b = new byte[commonPrefixLen + random().nextInt(maxLen)];
+      random().nextBytes(b);
+      System.arraycopy(commonPrefix, 0, b, 0, commonPrefixLen);
+      bytes[i] = new BytesRef(b);
+    }
+    test(bytes, len);
+  }
+
+  public void testRandom() {
+    for (int iter = 0; iter < 10; ++iter) {
+      testRandom(0, 10);
+    }
+  }
+
+  public void testRandomWithLotsOfDuplicates() {
+    for (int iter = 0; iter < 10; ++iter) {
+      testRandom(0, 2);
+    }
+  }
+
+  public void testRandomWithSharedPrefix() {
+    for (int iter = 0; iter < 10; ++iter) {
+      testRandom(TestUtil.nextInt(random(), 1, 30), 10);
+    }
+  }
+
+  public void testRandomWithSharedPrefixAndLotsOfDuplicates() {
+    for (int iter = 0; iter < 10; ++iter) {
+      testRandom(TestUtil.nextInt(random(), 1, 30), 2);
+    }
+  }
+
+  public void testRandom2() {
+    // how large our alphabet is
+    int letterCount = TestUtil.nextInt(random(), 2, 10);
+
+    // how many substring fragments to use
+    int substringCount = TestUtil.nextInt(random(), 2, 10);
+    Set<BytesRef> substringsSet = new HashSet<>();
+
+    // how many strings to make
+    int stringCount = atLeast(10000);
+
+    // System.out.println("letterCount=" + letterCount + " substringCount=" + substringCount + "
+    // stringCount=" + stringCount);
+    while (substringsSet.size() < substringCount) {
+      int length = TestUtil.nextInt(random(), 2, 10);
+      byte[] bytes = new byte[length];
+      for (int i = 0; i < length; i++) {
+        bytes[i] = (byte) random().nextInt(letterCount);
+      }
+      BytesRef br = new BytesRef(bytes);
+      substringsSet.add(br);
+      // System.out.println("add substring count=" + substringsSet.size() + ": " + br);
+    }
+
+    BytesRef[] substrings = substringsSet.toArray(new BytesRef[substringsSet.size()]);
+    double[] chance = new double[substrings.length];
+    double sum = 0.0;
+    for (int i = 0; i < substrings.length; i++) {
+      chance[i] = random().nextDouble();
+      sum += chance[i];
+    }
+
+    // give each substring a random chance of occurring:
+    double accum = 0.0;
+    for (int i = 0; i < substrings.length; i++) {
+      accum += chance[i] / sum;
+      chance[i] = accum;
+    }
+
+    Set<BytesRef> stringsSet = new HashSet<>();
+    int iters = 0;
+    while (stringsSet.size() < stringCount && iters < stringCount * 5) {
+      int count = TestUtil.nextInt(random(), 1, 5);
+      BytesRefBuilder b = new BytesRefBuilder();
+      for (int i = 0; i < count; i++) {
+        double v = random().nextDouble();
+        accum = 0.0;
+        for (int j = 0; j < substrings.length; j++) {
+          accum += chance[j];
+          if (accum >= v) {
+            b.append(substrings[j]);
+            break;
+          }
+        }
+      }
+      BytesRef br = b.toBytesRef();
+      stringsSet.add(br);
+      // System.out.println("add string count=" + stringsSet.size() + ": " + br);
+      iters++;
+    }
+
+    test(stringsSet.toArray(new BytesRef[stringsSet.size()]), stringsSet.size());
+  }
+}
diff --git a/lucene/core/src/test/org/apache/lucene/util/bkd/TestBKD.java b/lucene/core/src/test/org/apache/lucene/util/bkd/TestBKD.java
index f6eacc6..d37284a 100644
--- a/lucene/core/src/test/org/apache/lucene/util/bkd/TestBKD.java
+++ b/lucene/core/src/test/org/apache/lucene/util/bkd/TestBKD.java
@@ -1465,6 +1465,18 @@ public class TestBKD extends LuceneTestCase {
 
       @Override
       public byte getByteAt(int i, int k) {
+        BytesRef b = new BytesRef();
+        getValue(i, b);
+        return b.bytes[b.offset + k];
+      }
+
+      @Override
+      public void save(int i, int j) {
+        throw new UnsupportedOperationException();
+      }
+
+      @Override
+      public void restore(int i, int j) {
         throw new UnsupportedOperationException();
       }
     };
@@ -1582,6 +1594,16 @@ public class TestBKD extends LuceneTestCase {
       public int getDocCount() {
         return 11;
       }
+
+      @Override
+      public void save(int i, int j) {
+        throw new UnsupportedOperationException();
+      }
+
+      @Override
+      public void restore(int i, int j) {
+        throw new UnsupportedOperationException();
+      }
     };
     try (IndexOutput out = dir.createOutput("bkd", IOContext.DEFAULT)) {
       IllegalStateException ex = expectThrows(IllegalStateException.class, () -> { w.writeField(out, out, out, "", val);});
diff --git a/lucene/core/src/test/org/apache/lucene/util/bkd/TestMutablePointsReaderUtils.java b/lucene/core/src/test/org/apache/lucene/util/bkd/TestMutablePointsReaderUtils.java
index e75ab24..d4869b0 100644
--- a/lucene/core/src/test/org/apache/lucene/util/bkd/TestMutablePointsReaderUtils.java
+++ b/lucene/core/src/test/org/apache/lucene/util/bkd/TestMutablePointsReaderUtils.java
@@ -30,16 +30,22 @@ import org.apache.lucene.util.TestUtil;
 public class TestMutablePointsReaderUtils extends LuceneTestCase {
 
   public void testSort() {
-    for (int iter = 0; iter < 5; ++iter) {
-      doTestSort();
+    for (int iter = 0; iter < 10; ++iter) {
+      doTestSort(false);
+    }
+  }
+
+  public void testSortWithIncrementalDocId() {
+    for (int iter = 0; iter < 10; ++iter) {
+      doTestSort(true);
     }
   }
 
-  private void doTestSort() {
+  private void doTestSort(boolean isDocIdIncremental) {
     final int bytesPerDim = TestUtil.nextInt(random(), 1, 16);
     final int maxDoc = TestUtil.nextInt(random(), 1, 1 << random().nextInt(30));
     BKDConfig config = new BKDConfig(1, 1, bytesPerDim, BKDConfig.DEFAULT_MAX_POINTS_IN_LEAF_NODE);
-    Point[] points = createRandomPoints(config, maxDoc, new int[1]);
+    Point[] points = createRandomPoints(config, maxDoc, new int[1], isDocIdIncremental);
     DummyPointsReader reader = new DummyPointsReader(points);
     MutablePointsReaderUtils.sort(config, maxDoc, reader, 0, points.length);
     Arrays.sort(points, new Comparator<Point>() {
@@ -53,7 +59,23 @@ public class TestMutablePointsReaderUtils extends LuceneTestCase {
       }
     });
     assertNotSame(points, reader.points);
-    assertArrayEquals(points, reader.points);
+    assertEquals(points.length, reader.points.length);
+
+    // Check doc IDs are in ascending order.
+    // If doc IDs are already increasing, StableMSBRadixSorter should keep doc ID's ordering.
+    // If doc IDs are not ordered, StableMSBRadixSorter should compare doc ID to guarantee the
+    // ordering.
+    Point prevPoint = null;
+    for (int i = 0; i < points.length; i++) {
+      assertEquals(points[i].packedValue, reader.points[i].packedValue);
+      assertSame(points[i].packedValue, reader.points[i].packedValue);
+      if (prevPoint != null) {
+        if (reader.points[i].packedValue.equals(prevPoint.packedValue)) {
+          assertTrue(reader.points[i].doc >= prevPoint.doc);
+        }
+      }
+      prevPoint = reader.points[i];
+    }
   }
 
   public void testSortByDim() {
@@ -66,7 +88,7 @@ public class TestMutablePointsReaderUtils extends LuceneTestCase {
     BKDConfig config = createRandomConfig();
     final int maxDoc = TestUtil.nextInt(random(), 1, 1 << random().nextInt(30));
     int[] commonPrefixLengths = new int[config.numDims];
-    Point[] points = createRandomPoints(config, maxDoc, commonPrefixLengths);
+    Point[] points = createRandomPoints(config, maxDoc, commonPrefixLengths, false);
     DummyPointsReader reader = new DummyPointsReader(points);
     final int sortedDim = random().nextInt(config.numIndexDims);
     MutablePointsReaderUtils.sortByDim(config, sortedDim, commonPrefixLengths, reader, 0, points.length,
@@ -99,7 +121,7 @@ public class TestMutablePointsReaderUtils extends LuceneTestCase {
     BKDConfig config = createRandomConfig();
     int[] commonPrefixLengths  = new int[config.numDims];
     final int maxDoc = TestUtil.nextInt(random(), 1, 1 << random().nextInt(30));
-    Point[] points = createRandomPoints(config, maxDoc, commonPrefixLengths);
+    Point[] points = createRandomPoints(config, maxDoc, commonPrefixLengths, false);
     final int splitDim =  random().nextInt(config.numIndexDims);
     DummyPointsReader reader = new DummyPointsReader(points);
     final int pivot = TestUtil.nextInt(random(), 0, points.length - 1);
@@ -138,15 +160,15 @@ public class TestMutablePointsReaderUtils extends LuceneTestCase {
     return new BKDConfig(numDims, numIndexDims, bytesPerDim, maxPointsInLeafNode);
   }
 
-  private static Point[] createRandomPoints(BKDConfig config, int maxDoc, int[] commonPrefixLengths) {
+  private static Point[] createRandomPoints(BKDConfig config, int maxDoc, int[] commonPrefixLengths, boolean isDocIdIncremental) {
     assertTrue(commonPrefixLengths.length == config.numDims);
     final int numPoints = TestUtil.nextInt(random(), 1, 100000);
     Point[] points = new Point[numPoints];
-    if (random().nextInt(5) != 0) {
+    if (random().nextInt(10) != 0) {
       for (int i = 0; i < numPoints; ++i) {
         byte[] value = new byte[config.packedBytesLength];
         random().nextBytes(value);
-        points[i] = new Point(value, random().nextInt(maxDoc));
+        points[i] = new Point(value, isDocIdIncremental ? Math.min(i, maxDoc - 1) : random().nextInt(maxDoc));
       }
       for (int i = 0; i < config.numDims; ++i) {
         commonPrefixLengths[i] = TestUtil.nextInt(random(), 0, config.bytesPerDim);
@@ -170,7 +192,7 @@ public class TestMutablePointsReaderUtils extends LuceneTestCase {
         System.arraycopy(indexDims, 0, value, 0, config.packedIndexBytesLength);
         random().nextBytes(dataDims);
         System.arraycopy(dataDims, 0, value, config.packedIndexBytesLength, numDataDims * config.bytesPerDim);
-        points[i] = new Point(value, random().nextInt(maxDoc));
+        points[i] = new Point(value, isDocIdIncremental ? Math.min(i, maxDoc - 1) : random().nextInt(maxDoc));
       }
       for (int i = 0; i < config.numIndexDims; ++i) {
         commonPrefixLengths[i] = config.bytesPerDim;
@@ -228,6 +250,8 @@ public class TestMutablePointsReaderUtils extends LuceneTestCase {
 
     private final Point[] points;
 
+    private Point[] temp;
+
     DummyPointsReader(Point[] points) {
       this.points = points.clone();
     }
@@ -300,6 +324,20 @@ public class TestMutablePointsReaderUtils extends LuceneTestCase {
       throw new UnsupportedOperationException();
     }
 
+    @Override
+    public void save(int i, int j) {
+      if (temp == null) {
+        temp = new Point[points.length];
+      }
+      temp[j] = points[i];
+    }
+
+    @Override
+    public void restore(int i, int j) {
+      if (temp != null) {
+        System.arraycopy(temp, i, points, i, j - i);
+      }
+    }
   }
 
 }