You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@lucene.apache.org by kr...@apache.org on 2016/12/15 21:35:16 UTC

[01/50] [abbrv] lucene-solr:jira/solr-8593: LUCENE-7542: Remove debug printing of parsed versions

Repository: lucene-solr
Updated Branches:
  refs/heads/jira/solr-8593 37fdc37fc -> 6c0cafeda


LUCENE-7542: Remove debug printing of parsed versions


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/98f75723
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/98f75723
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/98f75723

Branch: refs/heads/jira/solr-8593
Commit: 98f75723f3bc6a718f1a7b47a50b820c4fb408f6
Parents: e64bcb3
Author: Steve Rowe <sa...@apache.org>
Authored: Wed Nov 30 14:04:58 2016 -0500
Committer: Steve Rowe <sa...@apache.org>
Committed: Wed Nov 30 17:07:48 2016 -0500

----------------------------------------------------------------------
 dev-tools/scripts/smokeTestRelease.py | 1 -
 1 file changed, 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/98f75723/dev-tools/scripts/smokeTestRelease.py
----------------------------------------------------------------------
diff --git a/dev-tools/scripts/smokeTestRelease.py b/dev-tools/scripts/smokeTestRelease.py
index 2b1ff19..f9c3499 100644
--- a/dev-tools/scripts/smokeTestRelease.py
+++ b/dev-tools/scripts/smokeTestRelease.py
@@ -497,7 +497,6 @@ def versionToTuple(version, name):
     versionTuple = versionTuple[:-2] + ('100',)
   elif versionTuple[-1].lower()[:2] == 'rc':
     versionTuple = versionTuple[:-2] + (versionTuple[-1][2:],)
-  print('%s: %s' % (version, versionTuple))
   return versionTuple

[03/50] [abbrv] lucene-solr:jira/solr-8593: LUCENE-7576: detect when special case automaton is passed to Terms.intersect

Posted by kr...@apache.org.

LUCENE-7576: detect when special case automaton is passed to Terms.intersect


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/fcccd317
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/fcccd317
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/fcccd317

Branch: refs/heads/jira/solr-8593
Commit: fcccd317ddb44a742a0b3265fcf32923649f38cd
Parents: c61268f
Author: Mike McCandless <mi...@apache.org>
Authored: Fri Dec 2 15:26:04 2016 -0500
Committer: Mike McCandless <mi...@apache.org>
Committed: Fri Dec 2 15:26:04 2016 -0500

----------------------------------------------------------------------
 lucene/CHANGES.txt                                |  4 ++++
 .../lucene/codecs/blocktree/FieldReader.java      |  3 +++
 .../src/java/org/apache/lucene/index/Terms.java   |  8 ++++++--
 .../org/apache/lucene/index/TestTermsEnum.java    | 18 ++++++++++++++++++
 4 files changed, 31 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/fcccd317/lucene/CHANGES.txt
----------------------------------------------------------------------
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index e62a99d..4afc507 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -81,6 +81,10 @@ Bug Fixes
 * LUCENE-7536: ASCIIFoldingFilterFactory used to return an illegal multi-term
   component when preserveOriginal was set to true. (Adrien Grand)
 
+* LUCENE-7576: Fix Terms.intersect in the default codec to detect when
+  the incoming automaton is a special case and throw a clearer
+  exception than NullPointerException (Tom Mortimer via Mike McCandless)
+
 Improvements
 
 * LUCENE-6824: TermAutomatonQuery now rewrites to TermQuery,

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/fcccd317/lucene/core/src/java/org/apache/lucene/codecs/blocktree/FieldReader.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/blocktree/FieldReader.java b/lucene/core/src/java/org/apache/lucene/codecs/blocktree/FieldReader.java
index 7f13a32..4ee3826 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/blocktree/FieldReader.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/blocktree/FieldReader.java
@@ -182,6 +182,9 @@ public final class FieldReader extends Terms implements Accountable {
     //System.out.println("intersect: " + compiled.type + " a=" + compiled.automaton);
     // TODO: we could push "it's a range" or "it's a prefix" down into IntersectTermsEnum?
     // can we optimize knowing that...?
+    if (compiled.type != CompiledAutomaton.AUTOMATON_TYPE.NORMAL) {
+      throw new IllegalArgumentException("please use CompiledAutomaton.getTermsEnum instead");
+    }
     return new IntersectTermsEnum(this, compiled.automaton, compiled.runAutomaton, compiled.commonSuffixRef, startTerm, compiled.sinkState);
   }
     

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/fcccd317/lucene/core/src/java/org/apache/lucene/index/Terms.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/Terms.java b/lucene/core/src/java/org/apache/lucene/index/Terms.java
index dd48ce9..7197e25 100644
--- a/lucene/core/src/java/org/apache/lucene/index/Terms.java
+++ b/lucene/core/src/java/org/apache/lucene/index/Terms.java
@@ -49,8 +49,12 @@ public abstract class Terms {
    *  provided <code>startTerm</code> must be accepted by
    *  the automaton.
    *
-   * <p><b>NOTE</b>: the returned TermsEnum cannot
-   * seek</p>.
+   *  <p>This is an expert low-level API and will only work
+   *  for {@code NORMAL} compiled automata.  To handle any
+   *  compiled automata you should instead use
+   *  {@link CompiledAutomaton#getTermsEnum} instead.
+   *
+   *  <p><b>NOTE</b>: the returned TermsEnum cannot seek</p>.
    *
    *  <p><b>NOTE</b>: the terms dictionary is free to
    *  return arbitrary terms as long as the resulted visited

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/fcccd317/lucene/core/src/test/org/apache/lucene/index/TestTermsEnum.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/test/org/apache/lucene/index/TestTermsEnum.java b/lucene/core/src/test/org/apache/lucene/index/TestTermsEnum.java
index 3f15381..a388d42 100644
--- a/lucene/core/src/test/org/apache/lucene/index/TestTermsEnum.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestTermsEnum.java
@@ -998,4 +998,22 @@ public class TestTermsEnum extends LuceneTestCase {
     }
     dir.close();
   }
+
+  // LUCENE-7576
+  public void testIntersectRegexp() throws Exception {
+    Directory d = newDirectory();
+    RandomIndexWriter w = new RandomIndexWriter(random(), d);
+    Document doc = new Document();
+    doc.add(newStringField("field", "foobar", Field.Store.NO));
+    w.addDocument(doc);
+    IndexReader r = w.getReader();
+    Fields fields = MultiFields.getFields(r);
+    CompiledAutomaton automaton = new CompiledAutomaton(new RegExp("do_not_match_anything").toAutomaton());
+    Terms terms = fields.terms("field");
+    String message = expectThrows(IllegalArgumentException.class, () -> {terms.intersect(automaton, null);}).getMessage();
+    assertEquals("please use CompiledAutomaton.getTermsEnum instead", message);
+    r.close();
+    w.close();
+    d.close();
+  }
 }

[18/50] [abbrv] lucene-solr:jira/solr-8593: removed two unused imports in TestPlainTextEntityProcessor.java

Posted by kr...@apache.org.

removed two unused imports in TestPlainTextEntityProcessor.java


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/d2ed42b8
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/d2ed42b8
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/d2ed42b8

Branch: refs/heads/jira/solr-8593
Commit: d2ed42b847b1114fe3d0befc788fba55255d4ee2
Parents: ca5e736
Author: Christine Poerschke <cp...@apache.org>
Authored: Wed Dec 7 18:39:04 2016 +0000
Committer: Christine Poerschke <cp...@apache.org>
Committed: Wed Dec 7 18:39:04 2016 +0000

----------------------------------------------------------------------
 .../solr/handler/dataimport/TestPlainTextEntityProcessor.java      | 2 --
 1 file changed, 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/d2ed42b8/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestPlainTextEntityProcessor.java
----------------------------------------------------------------------
diff --git a/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestPlainTextEntityProcessor.java b/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestPlainTextEntityProcessor.java
index a286d84..2385438 100644
--- a/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestPlainTextEntityProcessor.java
+++ b/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestPlainTextEntityProcessor.java
@@ -21,10 +21,8 @@ import java.io.IOException;
 import java.io.InputStream;
 import java.io.OutputStream;
 import java.io.StringReader;
-import java.nio.charset.StandardCharsets;
 import java.sql.Blob;
 import java.sql.SQLException;
-import java.util.Arrays;
 import java.util.Collections;
 import java.util.Properties;

[19/50] [abbrv] lucene-solr:jira/solr-8593: SOLR-8542: couple of tweaks (Michael Nilsson, Diego Ceccarelli, Christine Poerschke)

Posted by kr...@apache.org.

SOLR-8542: couple of tweaks (Michael Nilsson, Diego Ceccarelli, Christine Poerschke)

* removed code triplication in ManagedModelStore
* LTRScoringQuery.java tweaks
* FeatureLogger.makeFeatureVector(...) can now safely be called repeatedly (though that doesn't happen at present)
* make Feature.FeatureWeight.extractTerms a no-op; (OriginalScore|SolrFeature)Weight now implement extractTerms

* LTRThreadModule javadocs and README.md tweaks

* add TestFieldValueFeature.testBooleanValue test; replace "T"/"F" magic string use in FieldValueFeature
* add TestOriginalScoreScorer test; add OriginalScoreScorer.freq() method
* in TestMultipleAdditiveTreesModel revive dead explain test


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/bfc3690d
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/bfc3690d
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/bfc3690d

Branch: refs/heads/jira/solr-8593
Commit: bfc3690d5203cee20550450bac3771e5c2b85cbf
Parents: d2ed42b
Author: Christine Poerschke <cp...@apache.org>
Authored: Wed Dec 7 20:43:49 2016 +0000
Committer: Christine Poerschke <cp...@apache.org>
Committed: Wed Dec 7 20:46:42 2016 +0000

----------------------------------------------------------------------
 solr/contrib/ltr/README.md                      |  6 +--
 .../java/org/apache/solr/ltr/FeatureLogger.java | 10 ++--
 .../org/apache/solr/ltr/LTRScoringQuery.java    | 22 ++++-----
 .../org/apache/solr/ltr/LTRThreadModule.java    | 29 ++++++++++++
 .../org/apache/solr/ltr/feature/Feature.java    |  3 +-
 .../solr/ltr/feature/FieldValueFeature.java     | 18 +++++---
 .../solr/ltr/feature/OriginalScoreFeature.java  | 12 ++++-
 .../apache/solr/ltr/feature/SolrFeature.java    | 17 +++++--
 .../ltr/store/rest/ManagedFeatureStore.java     |  1 -
 .../solr/ltr/store/rest/ManagedModelStore.java  | 32 ++++++-------
 .../test-files/solr/collection1/conf/schema.xml |  2 +
 .../solr/ltr/feature/TestFieldValueFeature.java | 48 +++++++++++++++++---
 .../ltr/feature/TestOriginalScoreScorer.java    | 47 +++++++++++++++++++
 .../model/TestMultipleAdditiveTreesModel.java   | 44 +++++++++---------
 .../java/org/apache/solr/schema/BoolField.java  |  4 +-
 15 files changed, 212 insertions(+), 83 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/bfc3690d/solr/contrib/ltr/README.md
----------------------------------------------------------------------
diff --git a/solr/contrib/ltr/README.md b/solr/contrib/ltr/README.md
index 88e2f67..2033ffc 100644
--- a/solr/contrib/ltr/README.md
+++ b/solr/contrib/ltr/README.md
@@ -390,17 +390,17 @@ About half the time for ranking is spent in the creation of weights for each fea
   <!-- Query parser used to rerank top docs with a provided model -->
   <queryParser name="ltr" class="org.apache.solr.ltr.search.LTRQParserPlugin">
      <int name="threadModule.totalPoolThreads">10</int> <!-- Maximum threads to share for all requests -->
-     <int name="threadModule.numThreadsPerRequest">5</int> <!-- Maximum threads to use for a single requests-->
+     <int name="threadModule.numThreadsPerRequest">5</int> <!-- Maximum threads to use for a single request -->
   </queryParser>
   
   <!-- Transformer for extracting features -->
   <transformer name="features" class="org.apache.solr.ltr.response.transform.LTRFeatureLoggerTransformerFactory">
      <int name="threadModule.totalPoolThreads">10</int> <!-- Maximum threads to share for all requests -->
-     <int name="threadModule.numThreadsPerRequest">5</int> <!-- Maximum threads to use for a single requests-->
+     <int name="threadModule.numThreadsPerRequest">5</int> <!-- Maximum threads to use for a single request -->
   </transformer>
 </config>
 
 ```
   
-The threadModule.totalPoolThreads option limits the total number of threads to be used across all query instances at any given time. threadModule.numThreadsPerRequest limits the number of threads used to process a single query. In the above example, 10 threads will be used to services all queries and a maximum of 5 threads to service a single query. If the solr instances is expected to receive no more than one query at a time, it is best to set both these numbers to the same value. If multiple queries need to serviced simultaneously, the numbers can be adjusted based on the expected response times. If the value of  threadModule.numThreadsPerRequest is higher, the reponse time for a single query will be improved upto a point. If multiple queries are serviced simultaneously, the threadModule.totalPoolThreads imposes a contention between the queries if (threadModule.numThreadsPerRequest*total parallel queries > threadModule.totalPoolThreads). 
+The threadModule.totalPoolThreads option limits the total number of threads to be used across all query instances at any given time. threadModule.numThreadsPerRequest limits the number of threads used to process a single query. In the above example, 10 threads will be used to services all queries and a maximum of 5 threads to service a single query. If the solr instance is expected to receive no more than one query at a time, it is best to set both these numbers to the same value. If multiple queries need to be serviced simultaneously, the numbers can be adjusted based on the expected response times. If the value of threadModule.numThreadsPerRequest is higher, the response time for a single query will be improved upto a point. If multiple queries are serviced simultaneously, the threadModule.totalPoolThreads imposes a contention between the queries if (threadModule.numThreadsPerRequest*total parallel queries > threadModule.totalPoolThreads).
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/bfc3690d/solr/contrib/ltr/src/java/org/apache/solr/ltr/FeatureLogger.java
----------------------------------------------------------------------
diff --git a/solr/contrib/ltr/src/java/org/apache/solr/ltr/FeatureLogger.java b/solr/contrib/ltr/src/java/org/apache/solr/ltr/FeatureLogger.java
index a5afd05..9c10c2c 100644
--- a/solr/contrib/ltr/src/java/org/apache/solr/ltr/FeatureLogger.java
+++ b/solr/contrib/ltr/src/java/org/apache/solr/ltr/FeatureLogger.java
@@ -151,7 +151,6 @@ public abstract class FeatureLogger<FV_TYPE> {
   }
 
   public static class CSVFeatureLogger extends FeatureLogger<String> {
-    StringBuilder sb = new StringBuilder(500);
     char keyValueSep = ':';
     char featureSep = ';';
 
@@ -171,6 +170,10 @@ public abstract class FeatureLogger<FV_TYPE> {
 
     @Override
     public String makeFeatureVector(LTRScoringQuery.FeatureInfo[] featuresInfo) {
+      // Allocate the buffer to a size based on the number of features instead of the
+      // default 16.  You need space for the name, value, and two separators per feature,
+      // but not all the features are expected to fire, so this is just a naive estimate.
+      StringBuilder sb = new StringBuilder(featuresInfo.length * 3);
       boolean isDense = featureFormat.equals(FeatureFormat.DENSE);
       for (LTRScoringQuery.FeatureInfo featInfo:featuresInfo) {
         if (featInfo.isUsed() || isDense){
@@ -181,9 +184,8 @@ public abstract class FeatureLogger<FV_TYPE> {
         }
       }
 
-      final String features = (sb.length() > 0 ? sb.substring(0,
-          sb.length() - 1) : "");
-      sb.setLength(0);
+      final String features = (sb.length() > 0 ?
+          sb.substring(0, sb.length() - 1) : "");
 
       return features;
     }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/bfc3690d/solr/contrib/ltr/src/java/org/apache/solr/ltr/LTRScoringQuery.java
----------------------------------------------------------------------
diff --git a/solr/contrib/ltr/src/java/org/apache/solr/ltr/LTRScoringQuery.java b/solr/contrib/ltr/src/java/org/apache/solr/ltr/LTRScoringQuery.java
index 991c1ed..d60ebf5 100644
--- a/solr/contrib/ltr/src/java/org/apache/solr/ltr/LTRScoringQuery.java
+++ b/solr/contrib/ltr/src/java/org/apache/solr/ltr/LTRScoringQuery.java
@@ -205,10 +205,10 @@ public class LTRScoringQuery extends Query {
     List<Feature.FeatureWeight > featureWeights = new ArrayList<>(features.size());
 
     if (querySemaphore == null) {
-      createWeights(searcher, needsScores, boost, featureWeights, features);
+      createWeights(searcher, needsScores, featureWeights, features);
     }
     else{
-      createWeightsParallel(searcher, needsScores, boost, featureWeights, features);
+      createWeightsParallel(searcher, needsScores, featureWeights, features);
     }
     int i=0, j = 0;
     if (this.extractAllFeatures) {
@@ -228,7 +228,7 @@ public class LTRScoringQuery extends Query {
     return new ModelWeight(modelFeaturesWeights, extractedFeatureWeights, allFeatures.size());
   }
 
-  private void createWeights(IndexSearcher searcher, boolean needsScores, float boost,
+  private void createWeights(IndexSearcher searcher, boolean needsScores,
       List<Feature.FeatureWeight > featureWeights, Collection<Feature> features) throws IOException {
     final SolrQueryRequest req = getRequest();
     // since the feature store is a linkedhashmap order is preserved
@@ -271,7 +271,7 @@ public class LTRScoringQuery extends Query {
     }
   } // end of call CreateWeightCallable
 
-  private void createWeightsParallel(IndexSearcher searcher, boolean needsScores, float boost,
+  private void createWeightsParallel(IndexSearcher searcher, boolean needsScores,
       List<Feature.FeatureWeight > featureWeights, Collection<Feature> features) throws RuntimeException {
 
     final SolrQueryRequest req = getRequest();
@@ -401,8 +401,9 @@ public class LTRScoringQuery extends Query {
     /**
      * Goes through all the stored feature values, and calculates the normalized
      * values for all the features that will be used for scoring.
+     * Then calculate and return the model's score.
      */
-    private void makeNormalizedFeatures() {
+    private float makeNormalizedFeaturesAndScore() {
       int pos = 0;
       for (final Feature.FeatureWeight feature : modelFeatureWeights) {
         final int featureId = feature.getIndex();
@@ -415,6 +416,7 @@ public class LTRScoringQuery extends Query {
         pos++;
       }
       ltrScoringModel.normalizeFeaturesInPlace(modelFeatureValuesNormalized);
+      return ltrScoringModel.score(modelFeatureValuesNormalized);
     }
 
     @Override
@@ -491,8 +493,8 @@ public class LTRScoringQuery extends Query {
         for (final Feature.FeatureWeight.FeatureScorer subSocer : featureScorers) {
           subSocer.setDocInfo(docInfo);
         }
-        if (featureScorers.size() <= 1) { // TODO: Allow the use of dense
-          // features in other cases
+        if (featureScorers.size() <= 1) {
+          // future enhancement: allow the use of dense features in other cases
           featureTraversalScorer = new DenseModelScorer(weight, featureScorers);
         } else {
           featureTraversalScorer = new SparseModelScorer(weight, featureScorers);
@@ -570,8 +572,7 @@ public class LTRScoringQuery extends Query {
               featuresInfo[featureId].setUsed(true);
             }
           }
-          makeNormalizedFeatures();
-          return ltrScoringModel.score(modelFeatureValuesNormalized);
+          return makeNormalizedFeaturesAndScore();
         }
 
         @Override
@@ -663,8 +664,7 @@ public class LTRScoringQuery extends Query {
               }
             }
           }
-          makeNormalizedFeatures();
-          return ltrScoringModel.score(modelFeatureValuesNormalized);
+          return makeNormalizedFeaturesAndScore();
         }
 
         @Override

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/bfc3690d/solr/contrib/ltr/src/java/org/apache/solr/ltr/LTRThreadModule.java
----------------------------------------------------------------------
diff --git a/solr/contrib/ltr/src/java/org/apache/solr/ltr/LTRThreadModule.java b/solr/contrib/ltr/src/java/org/apache/solr/ltr/LTRThreadModule.java
index 8e2563f..b8d0bda 100644
--- a/solr/contrib/ltr/src/java/org/apache/solr/ltr/LTRThreadModule.java
+++ b/solr/contrib/ltr/src/java/org/apache/solr/ltr/LTRThreadModule.java
@@ -29,6 +29,35 @@ import org.apache.solr.util.DefaultSolrThreadFactory;
 import org.apache.solr.util.SolrPluginUtils;
 import org.apache.solr.util.plugin.NamedListInitializedPlugin;
 
+/**
+ * The LTRThreadModule is optionally used by the {@link org.apache.solr.ltr.search.LTRQParserPlugin} and
+ * {@link org.apache.solr.ltr.response.transform.LTRFeatureLoggerTransformerFactory LTRFeatureLoggerTransformerFactory}
+ * classes to parallelize the creation of {@link org.apache.solr.ltr.feature.Feature.FeatureWeight Feature.FeatureWeight}
+ * objects.
+ * <p>
+ * Example configuration:
+ * <pre>
+  &lt;queryParser name="ltr" class="org.apache.solr.ltr.search.LTRQParserPlugin"&gt;
+     &lt;int name="threadModule.totalPoolThreads"&gt;10&lt;/int&gt;
+     &lt;int name="threadModule.numThreadsPerRequest"&gt;5&lt;/int&gt;
+  &lt;/queryParser&gt;
+
+  &lt;transformer name="features" class="org.apache.solr.ltr.response.transform.LTRFeatureLoggerTransformerFactory"&gt;
+     &lt;int name="threadModule.totalPoolThreads"&gt;10&lt;/int&gt;
+     &lt;int name="threadModule.numThreadsPerRequest"&gt;5&lt;/int&gt;
+  &lt;/transformer&gt;
+</pre>
+ * If an individual solr instance is expected to receive no more than one query at a time, it is best
+ * to set <code>totalPoolThreads</code> and <code>numThreadsPerRequest</code> to the same value.
+ *
+ * If multiple queries need to be serviced simultaneously then <code>totalPoolThreads</code> and
+ * <code>numThreadsPerRequest</code> can be adjusted based on the expected response times.
+ *
+ * If the value of <code>numThreadsPerRequest</code> is higher, the response time for a single query
+ * will be improved up to a point. If multiple queries are serviced simultaneously, the value of
+ * <code>totalPoolThreads</code> imposes a contention between the queries if
+ * <code>(totalPoolThreads &lt; numThreadsPerRequest * total parallel queries)</code>.
+ */
 final public class LTRThreadModule implements NamedListInitializedPlugin {
 
   public static LTRThreadModule getInstance(NamedList args) {

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/bfc3690d/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/Feature.java
----------------------------------------------------------------------
diff --git a/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/Feature.java b/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/Feature.java
index 228b964..48e8942 100644
--- a/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/Feature.java
+++ b/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/Feature.java
@@ -258,8 +258,7 @@ public abstract class Feature extends Query {
 
     @Override
     public void extractTerms(Set<Term> terms) {
-      // needs to be implemented by query subclasses
-      throw new UnsupportedOperationException();
+      // no-op
     }
 
     /**

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/bfc3690d/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/FieldValueFeature.java
----------------------------------------------------------------------
diff --git a/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/FieldValueFeature.java b/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/FieldValueFeature.java
index 279adbc..5fcf144 100644
--- a/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/FieldValueFeature.java
+++ b/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/FieldValueFeature.java
@@ -29,6 +29,7 @@ import org.apache.lucene.search.DocIdSetIterator;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.Query;
 import org.apache.solr.request.SolrQueryRequest;
+import org.apache.solr.schema.BoolField;
 
 /**
  * This feature returns the value of a field in the current document
@@ -119,13 +120,16 @@ public class FieldValueFeature extends Feature {
             return number.floatValue();
           } else {
             final String string = indexableField.stringValue();
-            // boolean values in the index are encoded with the
-            // chars T/F
-            if (string.equals("T")) {
-              return 1;
-            }
-            if (string.equals("F")) {
-              return 0;
+            if (string.length() == 1) {
+              // boolean values in the index are encoded with the
+              // a single char contained in TRUE_TOKEN or FALSE_TOKEN
+              // (see BoolField)
+              if (string.charAt(0) == BoolField.TRUE_TOKEN[0]) {
+                return 1;
+              }
+              if (string.charAt(0) == BoolField.FALSE_TOKEN[0]) {
+                return 0;
+              }
             }
           }
         } catch (final IOException e) {

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/bfc3690d/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/OriginalScoreFeature.java
----------------------------------------------------------------------
diff --git a/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/OriginalScoreFeature.java b/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/OriginalScoreFeature.java
index 125615c..549880b 100644
--- a/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/OriginalScoreFeature.java
+++ b/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/OriginalScoreFeature.java
@@ -19,8 +19,10 @@ package org.apache.solr.ltr.feature;
 import java.io.IOException;
 import java.util.LinkedHashMap;
 import java.util.Map;
+import java.util.Set;
 
 import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.index.Term;
 import org.apache.lucene.search.DocIdSetIterator;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.Query;
@@ -76,7 +78,10 @@ public class OriginalScoreFeature extends Feature {
       return "OriginalScoreFeature [query:" + originalQuery.toString() + "]";
     }
 
-
+    @Override
+    public void extractTerms(Set<Term> terms) {
+      w.extractTerms(terms);
+    }
 
     @Override
     public FeatureScorer scorer(LeafReaderContext context) throws IOException {
@@ -103,6 +108,11 @@ public class OriginalScoreFeature extends Feature {
       }
 
       @Override
+      public int freq() throws IOException {
+        return originalScorer.freq();
+      }
+
+      @Override
       public int docID() {
         return originalScorer.docID();
       }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/bfc3690d/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/SolrFeature.java
----------------------------------------------------------------------
diff --git a/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/SolrFeature.java b/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/SolrFeature.java
index cb7c1a0..13eb96f 100644
--- a/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/SolrFeature.java
+++ b/solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/SolrFeature.java
@@ -21,8 +21,10 @@ import java.util.ArrayList;
 import java.util.LinkedHashMap;
 import java.util.List;
 import java.util.Map;
+import java.util.Set;
 
 import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.index.Term;
 import org.apache.lucene.search.DocIdSet;
 import org.apache.lucene.search.DocIdSetIterator;
 import org.apache.lucene.search.IndexSearcher;
@@ -123,9 +125,9 @@ public class SolrFeature extends Feature {
    * Weight for a SolrFeature
    **/
   public class SolrFeatureWeight extends FeatureWeight {
-    Weight solrQueryWeight;
-    Query query;
-    List<Query> queryAndFilters;
+    final private Weight solrQueryWeight;
+    final private Query query;
+    final private List<Query> queryAndFilters;
 
     public SolrFeatureWeight(IndexSearcher searcher,
         SolrQueryRequest request, Query originalQuery, Map<String,String[]> efi) throws IOException {
@@ -174,6 +176,8 @@ public class SolrFeature extends Feature {
         if (query != null) {
           queryAndFilters.add(query);
           solrQueryWeight = searcher.createNormalizedWeight(query, true);
+        } else {
+          solrQueryWeight = null;
         }
       } catch (final SyntaxError e) {
         throw new FeatureException("Failed to parse feature query.", e);
@@ -202,6 +206,13 @@ public class SolrFeature extends Feature {
     }
 
     @Override
+    public void extractTerms(Set<Term> terms) {
+      if (solrQueryWeight != null) {
+        solrQueryWeight.extractTerms(terms);
+      }
+    }
+
+    @Override
     public FeatureScorer scorer(LeafReaderContext context) throws IOException {
       Scorer solrScorer = null;
       if (solrQueryWeight != null) {

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/bfc3690d/solr/contrib/ltr/src/java/org/apache/solr/ltr/store/rest/ManagedFeatureStore.java
----------------------------------------------------------------------
diff --git a/solr/contrib/ltr/src/java/org/apache/solr/ltr/store/rest/ManagedFeatureStore.java b/solr/contrib/ltr/src/java/org/apache/solr/ltr/store/rest/ManagedFeatureStore.java
index beb217c..2c7bce5 100644
--- a/solr/contrib/ltr/src/java/org/apache/solr/ltr/store/rest/ManagedFeatureStore.java
+++ b/solr/contrib/ltr/src/java/org/apache/solr/ltr/store/rest/ManagedFeatureStore.java
@@ -57,7 +57,6 @@ public class ManagedFeatureStore extends ManagedResource implements ManagedResou
 
   /** the feature store rest endpoint **/
   public static final String REST_END_POINT = "/schema/feature-store";
-  // TODO: reduce from public to package visibility (once tests no longer need public access)
 
   /** name of the attribute containing the feature class **/
   static final String CLASS_KEY = "class";

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/bfc3690d/solr/contrib/ltr/src/java/org/apache/solr/ltr/store/rest/ManagedModelStore.java
----------------------------------------------------------------------
diff --git a/solr/contrib/ltr/src/java/org/apache/solr/ltr/store/rest/ManagedModelStore.java b/solr/contrib/ltr/src/java/org/apache/solr/ltr/store/rest/ManagedModelStore.java
index 97aaa40..9c19b0a 100644
--- a/solr/contrib/ltr/src/java/org/apache/solr/ltr/store/rest/ManagedModelStore.java
+++ b/solr/contrib/ltr/src/java/org/apache/solr/ltr/store/rest/ManagedModelStore.java
@@ -61,7 +61,6 @@ public class ManagedModelStore extends ManagedResource implements ManagedResourc
 
   /** the model store rest endpoint **/
   public static final String REST_END_POINT = "/schema/model-store";
-  // TODO: reduce from public to package visibility (once tests no longer need public access)
 
   /**
    * Managed model store: the name of the attribute containing all the models of
@@ -124,16 +123,20 @@ public class ManagedModelStore extends ManagedResource implements ManagedResourc
     if ((managedData != null) && (managedData instanceof List)) {
       final List<Map<String,Object>> up = (List<Map<String,Object>>) managedData;
       for (final Map<String,Object> u : up) {
-        try {
-          final LTRScoringModel algo = fromLTRScoringModelMap(solrResourceLoader, u, managedFeatureStore);
-          addModel(algo);
-        } catch (final ModelException e) {
-          throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e);
-        }
+        addModelFromMap(u);
       }
     }
   }
 
+  private void addModelFromMap(Map<String,Object> modelMap) {
+    try {
+      final LTRScoringModel algo = fromLTRScoringModelMap(solrResourceLoader, modelMap, managedFeatureStore);
+      addModel(algo);
+    } catch (final ModelException e) {
+      throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e);
+    }
+  }
+
   public synchronized void addModel(LTRScoringModel ltrScoringModel) throws ModelException {
     try {
       log.info("adding model {}", ltrScoringModel.getName());
@@ -146,26 +149,17 @@ public class ManagedModelStore extends ManagedResource implements ManagedResourc
   @SuppressWarnings("unchecked")
   @Override
   protected Object applyUpdatesToManagedData(Object updates) {
+
     if (updates instanceof List) {
       final List<Map<String,Object>> up = (List<Map<String,Object>>) updates;
       for (final Map<String,Object> u : up) {
-        try {
-          final LTRScoringModel algo = fromLTRScoringModelMap(solrResourceLoader, u, managedFeatureStore);
-          addModel(algo);
-        } catch (final ModelException e) {
-          throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e);
-        }
+        addModelFromMap(u);
       }
     }
 
     if (updates instanceof Map) {
       final Map<String,Object> map = (Map<String,Object>) updates;
-      try {
-        final LTRScoringModel algo = fromLTRScoringModelMap(solrResourceLoader, map, managedFeatureStore);
-        addModel(algo);
-      } catch (final ModelException e) {
-        throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e);
-      }
+      addModelFromMap(map);
     }
 
     return modelsAsManagedResources(store.getModels());

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/bfc3690d/solr/contrib/ltr/src/test-files/solr/collection1/conf/schema.xml
----------------------------------------------------------------------
diff --git a/solr/contrib/ltr/src/test-files/solr/collection1/conf/schema.xml b/solr/contrib/ltr/src/test-files/solr/collection1/conf/schema.xml
index 15cf140..0b958c0 100644
--- a/solr/contrib/ltr/src/test-files/solr/collection1/conf/schema.xml
+++ b/solr/contrib/ltr/src/test-files/solr/collection1/conf/schema.xml
@@ -24,6 +24,8 @@
     <field name="keywords" type="text_general" indexed="true" stored="true" multiValued="true"/>
     <field name="popularity" type="int" indexed="true" stored="true" />
     <field name="normHits" type="float" indexed="true" stored="true" />
+    <field name="isTrendy" type="boolean" indexed="true" stored="true" />
+
     <field name="text" type="text_general" indexed="true" stored="false" multiValued="true"/>
     <field name="_version_" type="long" indexed="true" stored="true"/>
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/bfc3690d/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestFieldValueFeature.java
----------------------------------------------------------------------
diff --git a/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestFieldValueFeature.java b/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestFieldValueFeature.java
index af150c0..9574273 100644
--- a/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestFieldValueFeature.java
+++ b/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestFieldValueFeature.java
@@ -32,21 +32,21 @@ public class TestFieldValueFeature extends TestRerankBase {
     setuptest("solrconfig-ltr.xml", "schema.xml");
 
     assertU(adoc("id", "1", "title", "w1", "description", "w1", "popularity",
-        "1"));
+        "1","isTrendy","true"));
     assertU(adoc("id", "2", "title", "w2 2asd asdd didid", "description",
         "w2 2asd asdd didid", "popularity", "2"));
     assertU(adoc("id", "3", "title", "w3", "description", "w3", "popularity",
-        "3"));
+        "3","isTrendy","true"));
     assertU(adoc("id", "4", "title", "w4", "description", "w4", "popularity",
-        "4"));
+        "4","isTrendy","false"));
     assertU(adoc("id", "5", "title", "w5", "description", "w5", "popularity",
-        "5"));
+        "5","isTrendy","true"));
     assertU(adoc("id", "6", "title", "w1 w2", "description", "w1 w2",
-        "popularity", "6"));
+        "popularity", "6","isTrendy","false"));
     assertU(adoc("id", "7", "title", "w1 w2 w3 w4 w5", "description",
-        "w1 w2 w3 w4 w5 w8", "popularity", "7"));
+        "w1 w2 w3 w4 w5 w8", "popularity", "7","isTrendy","true"));
     assertU(adoc("id", "8", "title", "w1 w1 w1 w2 w2 w8", "description",
-        "w1 w1 w1 w2 w2", "popularity", "8"));
+        "w1 w1 w1 w2 w2", "popularity", "8","isTrendy","false"));
 
     // a document without the popularity field
     assertU(adoc("id", "42", "title", "NO popularity", "description", "NO popularity"));
@@ -169,5 +169,39 @@ public class TestFieldValueFeature extends TestRerankBase {
 
   }
 
+  @Test
+  public void testBooleanValue() throws Exception {
+    final String fstore = "test_boolean_store";
+    loadFeature("trendy", FieldValueFeature.class.getCanonicalName(), fstore,
+            "{\"field\":\"isTrendy\"}");
+
+    loadModel("trendy-model", LinearModel.class.getCanonicalName(),
+            new String[] {"trendy"}, fstore, "{\"weights\":{\"trendy\":1.0}}");
+
+    SolrQuery query = new SolrQuery();
+    query.setQuery("id:4");
+    query.add("rq", "{!ltr model=trendy-model reRankDocs=4}");
+    query.add("fl", "[fv]");
+    assertJQ("/query" + query.toQueryString(),
+            "/response/docs/[0]/=={'[fv]':'trendy:0.0'}");
+
+
+    query = new SolrQuery();
+    query.setQuery("id:5");
+    query.add("rq", "{!ltr model=trendy-model reRankDocs=4}");
+    query.add("fl", "[fv]");
+    assertJQ("/query" + query.toQueryString(),
+            "/response/docs/[0]/=={'[fv]':'trendy:1.0'}");
+
+    // check default value is false
+    query = new SolrQuery();
+    query.setQuery("id:2");
+    query.add("rq", "{!ltr model=trendy-model reRankDocs=4}");
+    query.add("fl", "[fv]");
+    assertJQ("/query" + query.toQueryString(),
+            "/response/docs/[0]/=={'[fv]':'trendy:0.0'}");
+
+  }
+
 
 }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/bfc3690d/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestOriginalScoreScorer.java
----------------------------------------------------------------------
diff --git a/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestOriginalScoreScorer.java b/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestOriginalScoreScorer.java
new file mode 100644
index 0000000..e85ebed
--- /dev/null
+++ b/solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestOriginalScoreScorer.java
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.ltr.feature;
+
+import java.lang.reflect.Method;
+import java.lang.reflect.Modifier;
+
+import org.apache.lucene.search.Scorer;
+import org.apache.lucene.util.LuceneTestCase;
+import org.junit.Test;
+
+public class TestOriginalScoreScorer extends LuceneTestCase {
+
+  @Test
+  public void testOverridesAbstractScorerMethods() {
+    final Class<?> ossClass = OriginalScoreFeature.OriginalScoreWeight.OriginalScoreScorer.class;
+    for (final Method scorerClassMethod : Scorer.class.getDeclaredMethods()) {
+      final int modifiers = scorerClassMethod.getModifiers();
+      if (!Modifier.isAbstract(modifiers)) continue;
+
+      try {
+        final Method ossClassMethod = ossClass.getDeclaredMethod(
+            scorerClassMethod.getName(),
+            scorerClassMethod.getParameterTypes());
+        assertEquals("getReturnType() difference",
+            scorerClassMethod.getReturnType(),
+            ossClassMethod.getReturnType());
+      } catch (NoSuchMethodException e) {
+        fail(ossClass + " needs to override '" + scorerClassMethod + "'");
+      }
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/bfc3690d/solr/contrib/ltr/src/test/org/apache/solr/ltr/model/TestMultipleAdditiveTreesModel.java
----------------------------------------------------------------------
diff --git a/solr/contrib/ltr/src/test/org/apache/solr/ltr/model/TestMultipleAdditiveTreesModel.java b/solr/contrib/ltr/src/test/org/apache/solr/ltr/model/TestMultipleAdditiveTreesModel.java
index 3748331..5604370 100644
--- a/solr/contrib/ltr/src/test/org/apache/solr/ltr/model/TestMultipleAdditiveTreesModel.java
+++ b/solr/contrib/ltr/src/test/org/apache/solr/ltr/model/TestMultipleAdditiveTreesModel.java
@@ -16,7 +16,7 @@
  */
 package org.apache.solr.ltr.model;
 
-//import static org.junit.internal.matchers.StringContains.containsString;
+import static org.junit.internal.matchers.StringContains.containsString;
 
 import org.apache.solr.client.solrj.SolrQuery;
 import org.apache.solr.ltr.TestRerankBase;
@@ -93,30 +93,28 @@ public class TestMultipleAdditiveTreesModel extends TestRerankBase {
 
     // test out the explain feature, make sure it returns something
     query.setParam("debugQuery", "on");
-    String qryResult = JQ("/query" + query.toQueryString());
 
+    String qryResult = JQ("/query" + query.toQueryString());
     qryResult = qryResult.replaceAll("\n", " ");
-    // FIXME containsString doesn't exist.
-    // assertThat(qryResult, containsString("\"debug\":{"));
-    // qryResult = qryResult.substring(qryResult.indexOf("debug"));
-    //
-    // assertThat(qryResult, containsString("\"explain\":{"));
-    // qryResult = qryResult.substring(qryResult.indexOf("explain"));
-    //
-    // assertThat(qryResult, containsString("multipleadditivetreesmodel"));
-    // assertThat(qryResult,
-    // containsString(MultipleAdditiveTreesModel.class.getCanonicalName()));
-    //
-    // assertThat(qryResult, containsString("-100.0 = tree 0"));
-    // assertThat(qryResult, containsString("50.0 = tree 0"));
-    // assertThat(qryResult, containsString("-20.0 = tree 1"));
-    // assertThat(qryResult, containsString("'matchedTitle':1.0 > 0.5"));
-    // assertThat(qryResult, containsString("'matchedTitle':0.0 <= 0.5"));
-    //
-    // assertThat(qryResult, containsString(" Go Right "));
-    // assertThat(qryResult, containsString(" Go Left "));
-    // assertThat(qryResult,
-    // containsString("'this_feature_doesnt_exist' does not exist in FV"));
+
+    assertThat(qryResult, containsString("\"debug\":{"));
+    qryResult = qryResult.substring(qryResult.indexOf("debug"));
+
+    assertThat(qryResult, containsString("\"explain\":{"));
+    qryResult = qryResult.substring(qryResult.indexOf("explain"));
+
+    assertThat(qryResult, containsString("multipleadditivetreesmodel"));
+    assertThat(qryResult, containsString(MultipleAdditiveTreesModel.class.getCanonicalName()));
+
+    assertThat(qryResult, containsString("-100.0 = tree 0"));
+    assertThat(qryResult, containsString("50.0 = tree 0"));
+    assertThat(qryResult, containsString("-20.0 = tree 1"));
+    assertThat(qryResult, containsString("'matchedTitle':1.0 > 0.5"));
+    assertThat(qryResult, containsString("'matchedTitle':0.0 <= 0.5"));
+
+    assertThat(qryResult, containsString(" Go Right "));
+    assertThat(qryResult, containsString(" Go Left "));
+    assertThat(qryResult, containsString("'this_feature_doesnt_exist' does not exist in FV"));
   }
 
   @Test

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/bfc3690d/solr/core/src/java/org/apache/solr/schema/BoolField.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/schema/BoolField.java b/solr/core/src/java/org/apache/solr/schema/BoolField.java
index 210ea0b..1645ee6 100644
--- a/solr/core/src/java/org/apache/solr/schema/BoolField.java
+++ b/solr/core/src/java/org/apache/solr/schema/BoolField.java
@@ -71,8 +71,8 @@ public class BoolField extends PrimitiveFieldType {
   }
 
   // avoid instantiating every time...
-  protected final static char[] TRUE_TOKEN = {'T'};
-  protected final static char[] FALSE_TOKEN = {'F'};
+  public final static char[] TRUE_TOKEN = {'T'};
+  public final static char[] FALSE_TOKEN = {'F'};
 
   ////////////////////////////////////////////////////////////////////////
   // TODO: look into creating my own queryParser that can more efficiently

[17/50] [abbrv] lucene-solr:jira/solr-8593: SOLR-9822: speed up single-valued string fieldcache counting in dv facet processor

Posted by kr...@apache.org.

SOLR-9822: speed up single-valued string fieldcache counting in dv facet processor


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/ca5e736d
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/ca5e736d
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/ca5e736d

Branch: refs/heads/jira/solr-8593
Commit: ca5e736db1df0cdf35f1b039350bfd5a9cdfa102
Parents: 10500c8
Author: yonik <yo...@apache.org>
Authored: Wed Dec 7 11:08:33 2016 -0500
Committer: yonik <yo...@apache.org>
Committed: Wed Dec 7 11:09:55 2016 -0500

----------------------------------------------------------------------
 solr/CHANGES.txt                                |   5 +
 .../facet/FacetFieldProcessorByArrayDV.java     |  49 ++++++-
 .../org/apache/solr/search/facet/FieldUtil.java | 147 +++++++++++++++++++
 .../org/apache/solr/uninverting/FieldCache.java |   2 +-
 .../apache/solr/uninverting/FieldCacheImpl.java | 130 ++++++++--------
 5 files changed, 264 insertions(+), 69 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ca5e736d/solr/CHANGES.txt
----------------------------------------------------------------------
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 14dd2fa..8dee837 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -62,6 +62,11 @@ Optimizations
 * SOLR-9579: Make Solr's SchemaField implement Lucene's IndexableFieldType, removing the 
   creation of a Lucene FieldType every time a field is indexed. (John Call, yonik) 
 
+* SOLR-9822: JSON Facet API: Recover performance lost due to the DocValues transition to
+  an iterator API (LUCENE-7407).  This only fixes calculating counts for single-valued
+  string fields from the FieldCache, resulting in up to 56% better throughput for those cases.
+  (yonik)
+
 
 ==================  6.4.0 ==================
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ca5e736d/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessorByArrayDV.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessorByArrayDV.java b/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessorByArrayDV.java
index 88adf67..1481f18 100644
--- a/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessorByArrayDV.java
+++ b/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessorByArrayDV.java
@@ -33,6 +33,7 @@ import org.apache.lucene.util.UnicodeUtil;
 import org.apache.solr.common.SolrException;
 import org.apache.solr.schema.SchemaField;
 import org.apache.solr.search.Filter;
+import org.apache.solr.uninverting.FieldCacheImpl;
 
 /**
  * Grabs values from {@link DocValues}.
@@ -184,15 +185,33 @@ class FacetFieldProcessorByArrayDV extends FacetFieldProcessorByArray {
     int segMax = singleDv.getValueCount() + 1;
     final int[] counts = getCountArr( segMax );
 
+    /** alternate trial implementations
+     // ord
+     // FieldUtil.visitOrds(singleDv, disi,  (doc,ord)->{counts[ord+1]++;} );
+
+    FieldUtil.OrdValues ordValues = FieldUtil.getOrdValues(singleDv, disi);
+    while (ordValues.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
+      counts[ ordValues.getOrd() + 1]++;
+    }
+     **/
+
+
+    // calculate segment-local counts
     int doc;
-    while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
-      if (singleDv.advanceExact(doc)) {
-        counts[ singleDv.ordValue() + 1 ]++;
-      } else {
-        counts[ 0 ]++;
+    if (singleDv instanceof FieldCacheImpl.SortedDocValuesImpl.Iter) {
+      FieldCacheImpl.SortedDocValuesImpl.Iter fc = (FieldCacheImpl.SortedDocValuesImpl.Iter) singleDv;
+      while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
+        counts[fc.getOrd(doc) + 1]++;
+      }
+    } else {
+      while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
+        if (singleDv.advanceExact(doc)) {
+          counts[singleDv.ordValue() + 1]++;
+        }
       }
     }
 
+    // convert segment-local counts to global counts
     for (int i=1; i<segMax; i++) {
       int segCount = counts[i];
       if (segCount > 0) {
@@ -250,12 +269,26 @@ class FacetFieldProcessorByArrayDV extends FacetFieldProcessorByArray {
 
   private void collectCounts(SortedDocValues singleDv, DocIdSetIterator disi, LongValues toGlobal) throws IOException {
     int doc;
-    while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
-      if (singleDv.advanceExact(doc)) {
-        int segOrd = singleDv.ordValue();
+    if (singleDv instanceof FieldCacheImpl.SortedDocValuesImpl.Iter) {
+
+      FieldCacheImpl.SortedDocValuesImpl.Iter fc = (FieldCacheImpl.SortedDocValuesImpl.Iter)singleDv;
+      while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
+        int segOrd = fc.getOrd(doc);
+        if (segOrd < 0) continue;
         int ord = (int)toGlobal.get(segOrd);
         countAcc.incrementCount(ord, 1);
       }
+
+    } else {
+
+      while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
+        if (singleDv.advanceExact(doc)) {
+          int segOrd = singleDv.ordValue();
+          int ord = (int) toGlobal.get(segOrd);
+          countAcc.incrementCount(ord, 1);
+        }
+      }
+
     }
   }
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ca5e736d/solr/core/src/java/org/apache/solr/search/facet/FieldUtil.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/search/facet/FieldUtil.java b/solr/core/src/java/org/apache/solr/search/facet/FieldUtil.java
index 84255b9..389b6d7 100644
--- a/solr/core/src/java/org/apache/solr/search/facet/FieldUtil.java
+++ b/solr/core/src/java/org/apache/solr/search/facet/FieldUtil.java
@@ -21,10 +21,13 @@ import java.io.IOException;
 import org.apache.lucene.index.DocValues;
 import org.apache.lucene.index.SortedDocValues;
 import org.apache.lucene.index.SortedSetDocValues;
+import org.apache.lucene.search.DocIdSetIterator;
+import org.apache.lucene.util.BytesRef;
 import org.apache.solr.schema.SchemaField;
 import org.apache.solr.search.QParser;
 import org.apache.solr.search.QueryContext;
 import org.apache.solr.search.SolrIndexSearcher;
+import org.apache.solr.uninverting.FieldCacheImpl;
 
 /** @lucene.internal
  * Porting helper... may be removed if it offers no value in the future.
@@ -52,4 +55,148 @@ public class FieldUtil {
     return si == null ? DocValues.emptySortedSet() : si;
   }
 
+
+  /** The following ord visitors and wrappers are a work in progress and experimental
+   *  @lucene.experimental */
+  @FunctionalInterface
+  public interface OrdFunc {
+    void handleOrd(int docid, int ord); // TODO: throw exception?
+  }
+
+  public static boolean isFieldCache(SortedDocValues singleDv) {
+    return singleDv instanceof FieldCacheImpl.SortedDocValuesImpl.Iter;
+  }
+
+  public static void visitOrds(SortedDocValues singleDv, DocIdSetIterator disi, OrdFunc ordFunc) throws IOException {
+    int doc;
+    if (singleDv instanceof FieldCacheImpl.SortedDocValuesImpl.Iter) {
+      FieldCacheImpl.SortedDocValuesImpl.Iter fc = (FieldCacheImpl.SortedDocValuesImpl.Iter) singleDv;
+      while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
+        ordFunc.handleOrd(doc, fc.getOrd(doc));
+      }
+    } else {
+      while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
+        if (singleDv.advanceExact(doc)) {
+          ordFunc.handleOrd(doc, singleDv.ordValue());
+        } else {
+          // TODO: optionally pass in missingOrd?
+        }
+      }
+    }
+  }
+
+  public static OrdValues getOrdValues(SortedDocValues singleDv, DocIdSetIterator disi) {
+    if (singleDv instanceof FieldCacheImpl.SortedDocValuesImpl.Iter) {
+      FieldCacheImpl.SortedDocValuesImpl.Iter fc = (FieldCacheImpl.SortedDocValuesImpl.Iter) singleDv;
+      return new FCOrdValues(fc, disi);
+    }
+    return new DVOrdValues(singleDv, disi);
+  }
+
+
+  public static abstract class OrdValues extends SortedDocValues {
+    int doc;
+    int ord;
+
+    public int getOrd() {
+      return ord;
+    }
+
+    @Override
+    public int docID() {
+      return doc;
+    }
+
+    @Override
+    public abstract int nextDoc() throws IOException;
+
+    @Override
+    public int advance(int target) throws IOException {
+      return 0; // TODO
+    }
+
+    @Override
+    public long cost() {
+      return 0;
+    }
+
+    @Override
+    public int getValueCount() {
+      throw new UnsupportedOperationException();
+    }
+  }
+
+
+  public static class FCOrdValues extends OrdValues {
+    FieldCacheImpl.SortedDocValuesImpl.Iter vals;
+    DocIdSetIterator disi;
+
+    public FCOrdValues(FieldCacheImpl.SortedDocValuesImpl.Iter iter, DocIdSetIterator disi) {
+      this.vals = iter;
+      this.disi = disi;
+    }
+
+    @Override
+    public int nextDoc() throws IOException {
+      doc = disi.nextDoc();
+      if (doc == NO_MORE_DOCS) return NO_MORE_DOCS;
+      ord = vals.getOrd(doc); // todo: loop until a hit?
+      return doc;
+    }
+
+    @Override
+    public boolean advanceExact(int target) throws IOException {
+      return false;
+    }
+
+    @Override
+    public int ordValue() {
+      return 0;
+    }
+
+    @Override
+    public BytesRef lookupOrd(int ord) throws IOException {
+      return null;
+    }
+  }
+
+  public static class DVOrdValues extends OrdValues {
+    SortedDocValues vals;
+    DocIdSetIterator disi;
+    int valDoc;
+
+    public DVOrdValues(SortedDocValues vals, DocIdSetIterator disi) {
+      this.vals = vals;
+      this.disi = disi;
+    }
+
+    @Override
+    public int nextDoc() throws IOException {
+      for (;;) {
+        // todo - use skipping when appropriate
+        doc = disi.nextDoc();
+        if (doc == NO_MORE_DOCS) return NO_MORE_DOCS;
+        boolean match = vals.advanceExact(doc);
+        if (match) {
+          ord = vals.ordValue();
+          return doc;
+        }
+      }
+    }
+
+    @Override
+    public boolean advanceExact(int target) throws IOException {
+      return false;
+    }
+
+    @Override
+    public int ordValue() {
+      return 0;
+    }
+
+    @Override
+    public BytesRef lookupOrd(int ord) throws IOException {
+      return null;
+    }
+  }
 }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ca5e736d/solr/core/src/java/org/apache/solr/uninverting/FieldCache.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/uninverting/FieldCache.java b/solr/core/src/java/org/apache/solr/uninverting/FieldCache.java
index ea8f6ea..32f5615 100644
--- a/solr/core/src/java/org/apache/solr/uninverting/FieldCache.java
+++ b/solr/core/src/java/org/apache/solr/uninverting/FieldCache.java
@@ -45,7 +45,7 @@ import org.apache.lucene.util.RamUsageEstimator;
  *
  * @lucene.internal
  */
-interface FieldCache {
+public interface FieldCache {
 
   /**
    * Placeholder indicating creation of this cache is currently in-progress.

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ca5e736d/solr/core/src/java/org/apache/solr/uninverting/FieldCacheImpl.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/uninverting/FieldCacheImpl.java b/solr/core/src/java/org/apache/solr/uninverting/FieldCacheImpl.java
index 2224010..0ca687f 100644
--- a/solr/core/src/java/org/apache/solr/uninverting/FieldCacheImpl.java
+++ b/solr/core/src/java/org/apache/solr/uninverting/FieldCacheImpl.java
@@ -58,9 +58,9 @@ import org.apache.lucene.util.packed.PackedLongValues;
  * Expert: The default cache implementation, storing all values in memory.
  * A WeakHashMap is used for storage.
  *
- * @since   lucene 1.4
+ * @lucene.internal
  */
-class FieldCacheImpl implements FieldCache {
+public class FieldCacheImpl implements FieldCache {
 
   private Map<Class<?>,Cache> caches;
   FieldCacheImpl() {
@@ -786,79 +786,89 @@ class FieldCacheImpl implements FieldCache {
       this.termOrdToBytesOffset = termOrdToBytesOffset;
       this.numOrd = numOrd;
     }
-    
+
     public SortedDocValues iterator() {
-      final BytesRef term = new BytesRef();
-      return new SortedDocValues() {
-        private int docID = -1;
+      return new Iter();
+    }
 
-        @Override
-        public int docID() {
-          return docID;
-        }
+    public class Iter extends SortedDocValues {
+      private int docID = -1;
+      private final BytesRef term = new BytesRef();
 
-        @Override
-        public int nextDoc() {
-          while (true) {
-            docID++;
-            if (docID >= docToTermOrd.size()) {
-              docID = NO_MORE_DOCS;
-              return docID;
-            }
-            if (docToTermOrd.get(docID) != 0) {
-              return docID;
-            }
-          }
-        }
+      /** @lucene.internal Specific to this implementation and subject to change.  For internal optimization only. */
+      public int getOrd(int docID) {
+        // Subtract 1, matching the 1+ord we did when
+        // storing, so that missing values, which are 0 in the
+        // packed ints, are returned as -1 ord:
+        return (int) docToTermOrd.get(docID)-1;
+      }
 
-        @Override
-        public int advance(int target) {
-          if (target < docToTermOrd.size()) {
-            docID = target;
-            if (docToTermOrd.get(docID) != 0) {
-              return docID;
-            } else{
-              return nextDoc();
-            }
-          } else {
+      @Override
+      public int docID() {
+        return docID;
+      }
+
+      @Override
+      public int nextDoc() {
+        while (true) {
+          docID++;
+          if (docID >= docToTermOrd.size()) {
             docID = NO_MORE_DOCS;
             return docID;
           }
+          if (docToTermOrd.get(docID) != 0) {
+            return docID;
+          }
         }
+      }
 
-        @Override
-        public boolean advanceExact(int target) throws IOException {
+      @Override
+      public int advance(int target) {
+        if (target < docToTermOrd.size()) {
           docID = target;
-          return docToTermOrd.get(docID) != 0;
+          if (docToTermOrd.get(docID) != 0) {
+            return docID;
+          } else{
+            return nextDoc();
+          }
+        } else {
+          docID = NO_MORE_DOCS;
+          return docID;
         }
+      }
 
-        @Override
-        public long cost() {
-          return 0;
-        }
-        
-        @Override
-        public int ordValue() {
-          // Subtract 1, matching the 1+ord we did when
-          // storing, so that missing values, which are 0 in the
-          // packed ints, are returned as -1 ord:
-          return (int) docToTermOrd.get(docID)-1;
-        }
+      @Override
+      public boolean advanceExact(int target) throws IOException {
+        docID = target;
+        return docToTermOrd.get(docID) != 0;
+      }
 
-        @Override
-        public int getValueCount() {
-          return numOrd;
-        }
+      @Override
+      public long cost() {
+        return 0;
+      }
 
-        @Override
-        public BytesRef lookupOrd(int ord) {
-          if (ord < 0) {
-            throw new IllegalArgumentException("ord must be >=0 (got ord=" + ord + ")");
-          }
-          bytes.fill(term, termOrdToBytesOffset.get(ord));
-          return term;
+      @Override
+      public int ordValue() {
+        // Subtract 1, matching the 1+ord we did when
+        // storing, so that missing values, which are 0 in the
+        // packed ints, are returned as -1 ord:
+        return (int) docToTermOrd.get(docID)-1;
+      }
+
+      @Override
+      public int getValueCount() {
+        return numOrd;
+      }
+
+      @Override
+      public BytesRef lookupOrd(int ord) {
+        if (ord < 0) {
+          throw new IllegalArgumentException("ord must be >=0 (got ord=" + ord + ")");
         }
-      };
+        bytes.fill(term, termOrdToBytesOffset.get(ord));
+        return term;
+      }
     }
 
     @Override

[39/50] [abbrv] lucene-solr:jira/solr-8593: LUCENE-7590: fix typo in method parameter

Posted by kr...@apache.org.

LUCENE-7590: fix typo in method parameter


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/85582dab
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/85582dab
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/85582dab

Branch: refs/heads/jira/solr-8593
Commit: 85582dabe4372085e1af5d01ebbfcfd0303b9f12
Parents: 770f1eb
Author: Shai Erera <sh...@apache.org>
Authored: Wed Dec 14 13:28:02 2016 +0200
Committer: Shai Erera <sh...@apache.org>
Committed: Wed Dec 14 13:28:02 2016 +0200

----------------------------------------------------------------------
 .../misc/src/java/org/apache/lucene/search/DocValuesStats.java | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/85582dab/lucene/misc/src/java/org/apache/lucene/search/DocValuesStats.java
----------------------------------------------------------------------
diff --git a/lucene/misc/src/java/org/apache/lucene/search/DocValuesStats.java b/lucene/misc/src/java/org/apache/lucene/search/DocValuesStats.java
index fad9f97..38158cf 100644
--- a/lucene/misc/src/java/org/apache/lucene/search/DocValuesStats.java
+++ b/lucene/misc/src/java/org/apache/lucene/search/DocValuesStats.java
@@ -51,7 +51,7 @@ public abstract class DocValuesStats<T> {
    * Initializes this object with the given reader context. Returns whether stats can be computed for this segment (i.e.
    * it does have the requested DocValues field).
    */
-  protected abstract boolean init(LeafReaderContext contxt) throws IOException;
+  protected abstract boolean init(LeafReaderContext context) throws IOException;
 
   /** Returns whether the given document has a value for the requested DocValues field. */
   protected abstract boolean hasValue(int doc) throws IOException;
@@ -106,8 +106,8 @@ public abstract class DocValuesStats<T> {
     }
 
     @Override
-    protected final boolean init(LeafReaderContext contxt) throws IOException {
-      ndv = contxt.reader().getNumericDocValues(field);
+    protected final boolean init(LeafReaderContext context) throws IOException {
+      ndv = context.reader().getNumericDocValues(field);
       return ndv != null;
     }

[11/50] [abbrv] lucene-solr:jira/solr-8593: LUCENE-7575: Add UnifiedHighlighter field matcher predicate (AKA requireFieldMatch=false)

Posted by kr...@apache.org.

LUCENE-7575: Add UnifiedHighlighter field matcher predicate (AKA requireFieldMatch=false)


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/2e948fea
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/2e948fea
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/2e948fea

Branch: refs/heads/jira/solr-8593
Commit: 2e948fea300f883b7dfb586e303d5720d09b3210
Parents: bd8b191
Author: David Smiley <ds...@apache.org>
Authored: Mon Dec 5 16:11:57 2016 -0500
Committer: David Smiley <ds...@apache.org>
Committed: Mon Dec 5 16:11:57 2016 -0500

----------------------------------------------------------------------
 lucene/CHANGES.txt                              |   4 +
 .../uhighlight/MemoryIndexOffsetStrategy.java   |  10 +-
 .../uhighlight/MultiTermHighlighting.java       |  37 +--
 .../lucene/search/uhighlight/PhraseHelper.java  | 158 ++++++++---
 .../search/uhighlight/UnifiedHighlighter.java   |  64 +++--
 .../uhighlight/TestUnifiedHighlighter.java      | 275 +++++++++++++++++++
 .../TestUnifiedHighlighterExtensibility.java    |   3 +-
 7 files changed, 467 insertions(+), 84 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/2e948fea/lucene/CHANGES.txt
----------------------------------------------------------------------
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 79e44e1..c6c39ac 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -121,6 +121,10 @@ Improvements
   control how text is analyzed and converted into a query (Matt Weber
   via Mike McCandless)
 
+* LUCENE-7575: UnifiedHighlighter can now highlight fields with queries that don't
+  necessarily refer to that field (AKA requireFieldMatch==false). Disabled by default.
+  See UH get/setFieldMatcher. (Jim Ferenczi via David Smiley)
+
 Optimizations
 
 * LUCENE-7568: Optimize merging when index sorting is used but the

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/2e948fea/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/MemoryIndexOffsetStrategy.java
----------------------------------------------------------------------
diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/MemoryIndexOffsetStrategy.java b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/MemoryIndexOffsetStrategy.java
index 4028912..0001a80 100644
--- a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/MemoryIndexOffsetStrategy.java
+++ b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/MemoryIndexOffsetStrategy.java
@@ -23,6 +23,7 @@ import java.util.Collection;
 import java.util.Collections;
 import java.util.List;
 import java.util.function.Function;
+import java.util.function.Predicate;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.FilteringTokenFilter;
@@ -49,7 +50,7 @@ public class MemoryIndexOffsetStrategy extends AnalysisOffsetStrategy {
   private final LeafReader leafReader;
   private final CharacterRunAutomaton preMemIndexFilterAutomaton;
 
-  public MemoryIndexOffsetStrategy(String field, BytesRef[] extractedTerms, PhraseHelper phraseHelper,
+  public MemoryIndexOffsetStrategy(String field, Predicate<String> fieldMatcher, BytesRef[] extractedTerms, PhraseHelper phraseHelper,
                                    CharacterRunAutomaton[] automata, Analyzer analyzer,
                                    Function<Query, Collection<Query>> multiTermQueryRewrite) {
     super(field, extractedTerms, phraseHelper, automata, analyzer);
@@ -57,13 +58,14 @@ public class MemoryIndexOffsetStrategy extends AnalysisOffsetStrategy {
     memoryIndex = new MemoryIndex(true, storePayloads);//true==store offsets
     leafReader = (LeafReader) memoryIndex.createSearcher().getIndexReader(); // appears to be re-usable
     // preFilter for MemoryIndex
-    preMemIndexFilterAutomaton = buildCombinedAutomaton(field, terms, this.automata, phraseHelper, multiTermQueryRewrite);
+    preMemIndexFilterAutomaton = buildCombinedAutomaton(fieldMatcher, terms, this.automata, phraseHelper, multiTermQueryRewrite);
   }
 
   /**
    * Build one {@link CharacterRunAutomaton} matching any term the query might match.
    */
-  private static CharacterRunAutomaton buildCombinedAutomaton(String field, BytesRef[] terms,
+  private static CharacterRunAutomaton buildCombinedAutomaton(Predicate<String> fieldMatcher,
+                                                              BytesRef[] terms,
                                                               CharacterRunAutomaton[] automata,
                                                               PhraseHelper strictPhrases,
                                                               Function<Query, Collection<Query>> multiTermQueryRewrite) {
@@ -74,7 +76,7 @@ public class MemoryIndexOffsetStrategy extends AnalysisOffsetStrategy {
     Collections.addAll(allAutomata, automata);
     for (SpanQuery spanQuery : strictPhrases.getSpanQueries()) {
       Collections.addAll(allAutomata,
-          MultiTermHighlighting.extractAutomata(spanQuery, field, true, multiTermQueryRewrite));//true==lookInSpan
+          MultiTermHighlighting.extractAutomata(spanQuery, fieldMatcher, true, multiTermQueryRewrite));//true==lookInSpan
     }
 
     if (allAutomata.size() == 1) {

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/2e948fea/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/MultiTermHighlighting.java
----------------------------------------------------------------------
diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/MultiTermHighlighting.java b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/MultiTermHighlighting.java
index fd6a26a..267d603 100644
--- a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/MultiTermHighlighting.java
+++ b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/MultiTermHighlighting.java
@@ -22,6 +22,7 @@ import java.util.Collection;
 import java.util.Comparator;
 import java.util.List;
 import java.util.function.Function;
+import java.util.function.Predicate;
 
 import org.apache.lucene.index.Term;
 import org.apache.lucene.search.AutomatonQuery;
@@ -56,50 +57,52 @@ class MultiTermHighlighting {
   }
 
   /**
-   * Extracts all MultiTermQueries for {@code field}, and returns equivalent
-   * automata that will match terms.
+   * Extracts MultiTermQueries that match the provided field predicate.
+   * Returns equivalent automata that will match terms.
    */
-  public static CharacterRunAutomaton[] extractAutomata(Query query, String field, boolean lookInSpan,
+  public static CharacterRunAutomaton[] extractAutomata(Query query,
+                                                        Predicate<String> fieldMatcher,
+                                                        boolean lookInSpan,
                                                         Function<Query, Collection<Query>> preRewriteFunc) {
     List<CharacterRunAutomaton> list = new ArrayList<>();
     Collection<Query> customSubQueries = preRewriteFunc.apply(query);
     if (customSubQueries != null) {
       for (Query sub : customSubQueries) {
-        list.addAll(Arrays.asList(extractAutomata(sub, field, lookInSpan, preRewriteFunc)));
+        list.addAll(Arrays.asList(extractAutomata(sub, fieldMatcher, lookInSpan, preRewriteFunc)));
       }
     } else if (query instanceof BooleanQuery) {
       for (BooleanClause clause : (BooleanQuery) query) {
         if (!clause.isProhibited()) {
-          list.addAll(Arrays.asList(extractAutomata(clause.getQuery(), field, lookInSpan, preRewriteFunc)));
+          list.addAll(Arrays.asList(extractAutomata(clause.getQuery(), fieldMatcher, lookInSpan, preRewriteFunc)));
         }
       }
     } else if (query instanceof ConstantScoreQuery) {
-      list.addAll(Arrays.asList(extractAutomata(((ConstantScoreQuery) query).getQuery(), field, lookInSpan,
+      list.addAll(Arrays.asList(extractAutomata(((ConstantScoreQuery) query).getQuery(), fieldMatcher, lookInSpan,
           preRewriteFunc)));
     } else if (query instanceof DisjunctionMaxQuery) {
       for (Query sub : ((DisjunctionMaxQuery) query).getDisjuncts()) {
-        list.addAll(Arrays.asList(extractAutomata(sub, field, lookInSpan, preRewriteFunc)));
+        list.addAll(Arrays.asList(extractAutomata(sub, fieldMatcher, lookInSpan, preRewriteFunc)));
       }
     } else if (lookInSpan && query instanceof SpanOrQuery) {
       for (Query sub : ((SpanOrQuery) query).getClauses()) {
-        list.addAll(Arrays.asList(extractAutomata(sub, field, lookInSpan, preRewriteFunc)));
+        list.addAll(Arrays.asList(extractAutomata(sub, fieldMatcher, lookInSpan, preRewriteFunc)));
       }
     } else if (lookInSpan && query instanceof SpanNearQuery) {
       for (Query sub : ((SpanNearQuery) query).getClauses()) {
-        list.addAll(Arrays.asList(extractAutomata(sub, field, lookInSpan, preRewriteFunc)));
+        list.addAll(Arrays.asList(extractAutomata(sub, fieldMatcher, lookInSpan, preRewriteFunc)));
       }
     } else if (lookInSpan && query instanceof SpanNotQuery) {
-      list.addAll(Arrays.asList(extractAutomata(((SpanNotQuery) query).getInclude(), field, lookInSpan,
+      list.addAll(Arrays.asList(extractAutomata(((SpanNotQuery) query).getInclude(), fieldMatcher, lookInSpan,
           preRewriteFunc)));
     } else if (lookInSpan && query instanceof SpanPositionCheckQuery) {
-      list.addAll(Arrays.asList(extractAutomata(((SpanPositionCheckQuery) query).getMatch(), field, lookInSpan,
+      list.addAll(Arrays.asList(extractAutomata(((SpanPositionCheckQuery) query).getMatch(), fieldMatcher, lookInSpan,
           preRewriteFunc)));
     } else if (lookInSpan && query instanceof SpanMultiTermQueryWrapper) {
-      list.addAll(Arrays.asList(extractAutomata(((SpanMultiTermQueryWrapper<?>) query).getWrappedQuery(), field,
-          lookInSpan, preRewriteFunc)));
+      list.addAll(Arrays.asList(extractAutomata(((SpanMultiTermQueryWrapper<?>) query).getWrappedQuery(),
+          fieldMatcher, lookInSpan, preRewriteFunc)));
     } else if (query instanceof AutomatonQuery) {
       final AutomatonQuery aq = (AutomatonQuery) query;
-      if (aq.getField().equals(field)) {
+      if (fieldMatcher.test(aq.getField())) {
         list.add(new CharacterRunAutomaton(aq.getAutomaton()) {
           @Override
           public String toString() {
@@ -110,7 +113,7 @@ class MultiTermHighlighting {
     } else if (query instanceof PrefixQuery) {
       final PrefixQuery pq = (PrefixQuery) query;
       Term prefix = pq.getPrefix();
-      if (prefix.field().equals(field)) {
+      if (fieldMatcher.test(prefix.field())) {
         list.add(new CharacterRunAutomaton(Operations.concatenate(Automata.makeString(prefix.text()),
             Automata.makeAnyString())) {
           @Override
@@ -121,7 +124,7 @@ class MultiTermHighlighting {
       }
     } else if (query instanceof FuzzyQuery) {
       final FuzzyQuery fq = (FuzzyQuery) query;
-      if (fq.getField().equals(field)) {
+      if (fieldMatcher.test(fq.getField())) {
         String utf16 = fq.getTerm().text();
         int termText[] = new int[utf16.codePointCount(0, utf16.length())];
         for (int cp, i = 0, j = 0; i < utf16.length(); i += Character.charCount(cp)) {
@@ -142,7 +145,7 @@ class MultiTermHighlighting {
       }
     } else if (query instanceof TermRangeQuery) {
       final TermRangeQuery tq = (TermRangeQuery) query;
-      if (tq.getField().equals(field)) {
+      if (fieldMatcher.test(tq.getField())) {
         final CharsRef lowerBound;
         if (tq.getLowerTerm() == null) {
           lowerBound = null;

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/2e948fea/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/PhraseHelper.java
----------------------------------------------------------------------
diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/PhraseHelper.java b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/PhraseHelper.java
index cde17ba..d7e8671 100644
--- a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/PhraseHelper.java
+++ b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/PhraseHelper.java
@@ -16,17 +16,50 @@
  */
 package org.apache.lucene.search.uhighlight;
 
-import org.apache.lucene.index.*;
-import org.apache.lucene.search.*;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.LinkedHashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.PriorityQueue;
+import java.util.Set;
+import java.util.TreeSet;
+import java.util.function.Function;
+import java.util.function.Predicate;
+
+import org.apache.lucene.index.BinaryDocValues;
+import org.apache.lucene.index.FieldInfos;
+import org.apache.lucene.index.Fields;
+import org.apache.lucene.index.FilterLeafReader;
+import org.apache.lucene.index.LeafReader;
+import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.index.NumericDocValues;
+import org.apache.lucene.index.PostingsEnum;
+import org.apache.lucene.index.SortedDocValues;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.index.Terms;
+import org.apache.lucene.search.DocIdSetIterator;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.MatchAllDocsQuery;
+import org.apache.lucene.search.MultiTermQuery;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.TwoPhaseIterator;
 import org.apache.lucene.search.highlight.WeightedSpanTerm;
 import org.apache.lucene.search.highlight.WeightedSpanTermExtractor;
-import org.apache.lucene.search.spans.*;
+import org.apache.lucene.search.spans.SpanCollector;
+import org.apache.lucene.search.spans.SpanMultiTermQueryWrapper;
+import org.apache.lucene.search.spans.SpanQuery;
+import org.apache.lucene.search.spans.SpanWeight;
+import org.apache.lucene.search.spans.Spans;
 import org.apache.lucene.util.BytesRef;
 
-import java.io.IOException;
-import java.util.*;
-import java.util.function.Function;
-
 /**
  * Helps the {@link FieldOffsetStrategy} with strict position highlighting (e.g. highlight phrases correctly).
  * This is a stateful class holding information about the query, but it can (and is) re-used across highlighting
@@ -40,7 +73,7 @@ import java.util.function.Function;
 public class PhraseHelper {
 
   public static final PhraseHelper NONE = new PhraseHelper(new MatchAllDocsQuery(), "_ignored_",
-      spanQuery -> null, query -> null, true);
+      (s) -> false, spanQuery -> null, query -> null, true);
 
   //TODO it seems this ought to be a general thing on Spans?
   private static final Comparator<? super Spans> SPANS_COMPARATOR = (o1, o2) -> {
@@ -59,10 +92,11 @@ public class PhraseHelper {
     }
   };
 
-  private final String fieldName; // if non-null, only look at queries/terms for this field
+  private final String fieldName;
   private final Set<Term> positionInsensitiveTerms; // (TermQuery terms)
   private final Set<SpanQuery> spanQueries;
   private final boolean willRewrite;
+  private final Predicate<String> fieldMatcher;
 
   /**
    * Constructor.
@@ -73,14 +107,15 @@ public class PhraseHelper {
    * to be set before the {@link WeightedSpanTermExtractor}'s extraction is invoked.
    * {@code ignoreQueriesNeedingRewrite} effectively ignores any query clause that needs to be "rewritten", which is
    * usually limited to just a {@link SpanMultiTermQueryWrapper} but could be other custom ones.
+   * {@code fieldMatcher} The field name predicate to use for extracting the query part that must be highlighted.
    */
-  public PhraseHelper(Query query, String field, Function<SpanQuery, Boolean> rewriteQueryPred,
+  public PhraseHelper(Query query, String field, Predicate<String> fieldMatcher, Function<SpanQuery, Boolean> rewriteQueryPred,
                       Function<Query, Collection<Query>> preExtractRewriteFunction,
                       boolean ignoreQueriesNeedingRewrite) {
-    this.fieldName = field; // if null then don't require field match
+    this.fieldName = field;
+    this.fieldMatcher = fieldMatcher;
     // filter terms to those we want
-    positionInsensitiveTerms = field != null ? new FieldFilteringTermHashSet(field) : new HashSet<>();
-    // requireFieldMatch optional
+    positionInsensitiveTerms = new FieldFilteringTermSet();
     spanQueries = new HashSet<>();
 
     // TODO Have toSpanQuery(query) Function as an extension point for those with custom Query impls
@@ -131,11 +166,11 @@ public class PhraseHelper {
       @Override
       protected void extractWeightedSpanTerms(Map<String, WeightedSpanTerm> terms, SpanQuery spanQuery,
                                               float boost) throws IOException {
-        if (field != null) {
-          // if this span query isn't for this field, skip it.
-          Set<String> fieldNameSet = new HashSet<>();//TODO reuse.  note: almost always size 1
-          collectSpanQueryFields(spanQuery, fieldNameSet);
-          if (!fieldNameSet.contains(field)) {
+        // if this span query isn't for this field, skip it.
+        Set<String> fieldNameSet = new HashSet<>();//TODO reuse.  note: almost always size 1
+        collectSpanQueryFields(spanQuery, fieldNameSet);
+        for (String spanField : fieldNameSet) {
+          if (!fieldMatcher.test(spanField)) {
             return;
           }
         }
@@ -190,10 +225,11 @@ public class PhraseHelper {
     if (spanQueries.isEmpty()) {
       return Collections.emptyMap();
     }
+    final LeafReader filteredReader = new SingleFieldFilterLeafReader(leafReader, fieldName);
     // for each SpanQuery, collect the member spans into a map.
     Map<BytesRef, Spans> result = new HashMap<>();
     for (SpanQuery spanQuery : spanQueries) {
-      getTermToSpans(spanQuery, leafReader.getContext(), doc, result);
+      getTermToSpans(spanQuery, filteredReader.getContext(), doc, result);
     }
     return result;
   }
@@ -203,15 +239,14 @@ public class PhraseHelper {
                               int doc, Map<BytesRef, Spans> result)
       throws IOException {
     // note: in WSTE there was some field specific looping that seemed pointless so that isn't here.
-    final IndexSearcher searcher = new IndexSearcher(readerContext);
+    final IndexSearcher searcher = new IndexSearcher(readerContext.reader());
     searcher.setQueryCache(null);
     if (willRewrite) {
       spanQuery = (SpanQuery) searcher.rewrite(spanQuery); // searcher.rewrite loops till done
     }
 
     // Get the underlying query terms
-
-    TreeSet<Term> termSet = new TreeSet<>(); // sorted so we can loop over results in order shortly...
+    TreeSet<Term> termSet = new FieldFilteringTermSet(); // sorted so we can loop over results in order shortly...
     searcher.createWeight(spanQuery, false, 1.0f).extractTerms(termSet);//needsScores==false
 
     // Get Spans by running the query against the reader
@@ -240,9 +275,6 @@ public class PhraseHelper {
     for (final Term queryTerm : termSet) {
       // note: we expect that at least one query term will pass these filters. This is because the collected
       //   spanQuery list were already filtered by these conditions.
-      if (fieldName != null && fieldName.equals(queryTerm.field()) == false) {
-        continue;
-      }
       if (positionInsensitiveTerms.contains(queryTerm)) {
         continue;
       }
@@ -375,19 +407,17 @@ public class PhraseHelper {
   }
 
   /**
-   * Simple HashSet that filters out Terms not matching a desired field on {@code add()}.
+   * Simple TreeSet that filters out Terms not matching the provided predicate on {@code add()}.
    */
-  private static class FieldFilteringTermHashSet extends HashSet<Term> {
-    private final String field;
-
-    FieldFilteringTermHashSet(String field) {
-      this.field = field;
-    }
-
+  private class FieldFilteringTermSet extends TreeSet<Term> {
     @Override
     public boolean add(Term term) {
-      if (term.field().equals(field)) {
-        return super.add(term);
+      if (fieldMatcher.test(term.field())) {
+        if (term.field().equals(fieldName)) {
+          return super.add(term);
+        } else {
+          return super.add(new Term(fieldName, term.bytes()));
+        }
       } else {
         return false;
       }
@@ -500,6 +530,64 @@ public class PhraseHelper {
   }
 
   /**
+   * This reader will just delegate every call to a single field in the wrapped
+   * LeafReader. This way we ensure that all queries going through this reader target the same field.
+  */
+  static final class SingleFieldFilterLeafReader extends FilterLeafReader {
+    final String fieldName;
+    SingleFieldFilterLeafReader(LeafReader in, String fieldName) {
+      super(in);
+      this.fieldName = fieldName;
+    }
+
+    @Override
+    public FieldInfos getFieldInfos() {
+      throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public Fields fields() throws IOException {
+      return new FilterFields(super.fields()) {
+        @Override
+        public Terms terms(String field) throws IOException {
+          return super.terms(fieldName);
+        }
+
+        @Override
+        public Iterator<String> iterator() {
+          return Collections.singletonList(fieldName).iterator();
+        }
+
+        @Override
+        public int size() {
+          return 1;
+        }
+      };
+    }
+
+    @Override
+    public NumericDocValues getNumericDocValues(String field) throws IOException {
+      return super.getNumericDocValues(fieldName);
+    }
+
+    @Override
+    public BinaryDocValues getBinaryDocValues(String field) throws IOException {
+      return super.getBinaryDocValues(fieldName);
+    }
+
+    @Override
+    public SortedDocValues getSortedDocValues(String field) throws IOException {
+      return super.getSortedDocValues(fieldName);
+    }
+
+    @Override
+    public NumericDocValues getNormValues(String field) throws IOException {
+      return super.getNormValues(fieldName);
+    }
+  }
+
+
+  /**
    * A Spans based on a list of cached spans for one doc.  It is pre-positioned to this doc.
    */
   private static class CachedSpans extends Spans {

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/2e948fea/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/UnifiedHighlighter.java
----------------------------------------------------------------------
diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/UnifiedHighlighter.java b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/UnifiedHighlighter.java
index ac5f0f6..bbcfd5b 100644
--- a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/UnifiedHighlighter.java
+++ b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/UnifiedHighlighter.java
@@ -24,6 +24,7 @@ import java.util.Arrays;
 import java.util.Collection;
 import java.util.EnumSet;
 import java.util.HashMap;
+import java.util.HashSet;
 import java.util.List;
 import java.util.Locale;
 import java.util.Map;
@@ -31,6 +32,7 @@ import java.util.Objects;
 import java.util.Set;
 import java.util.SortedSet;
 import java.util.TreeSet;
+import java.util.function.Predicate;
 import java.util.function.Supplier;
 
 import org.apache.lucene.analysis.Analyzer;
@@ -58,7 +60,6 @@ import org.apache.lucene.search.Weight;
 import org.apache.lucene.search.spans.SpanQuery;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.InPlaceMergeSorter;
-import org.apache.lucene.util.UnicodeUtil;
 import org.apache.lucene.util.automaton.CharacterRunAutomaton;
 
 /**
@@ -119,13 +120,13 @@ public class UnifiedHighlighter {
 
   private boolean defaultPassageRelevancyOverSpeed = true; //For analysis, prefer MemoryIndexOffsetStrategy
 
-  // private boolean defaultRequireFieldMatch = true; TODO
-
   private int maxLength = DEFAULT_MAX_LENGTH;
 
   // BreakIterator is stateful so we use a Supplier factory method
   private Supplier<BreakIterator> defaultBreakIterator = () -> BreakIterator.getSentenceInstance(Locale.ROOT);
 
+  private Predicate<String> defaultFieldMatcher;
+
   private PassageScorer defaultScorer = new PassageScorer();
 
   private PassageFormatter defaultFormatter = new DefaultPassageFormatter();
@@ -140,8 +141,8 @@ public class UnifiedHighlighter {
   /**
    * Calls {@link Weight#extractTerms(Set)} on an empty index for the query.
    */
-  protected static SortedSet<Term> extractTerms(Query query) throws IOException {
-    SortedSet<Term> queryTerms = new TreeSet<>();
+  protected static Set<Term> extractTerms(Query query) throws IOException {
+    Set<Term> queryTerms = new HashSet<>();
     EMPTY_INDEXSEARCHER.createNormalizedWeight(query, false).extractTerms(queryTerms);
     return queryTerms;
   }
@@ -197,6 +198,10 @@ public class UnifiedHighlighter {
     this.cacheFieldValCharsThreshold = cacheFieldValCharsThreshold;
   }
 
+  public void setFieldMatcher(Predicate<String> predicate) {
+    this.defaultFieldMatcher = predicate;
+  }
+
   /**
    * Returns whether {@link MultiTermQuery} derivatives will be highlighted.  By default it's enabled.  MTQ
    * highlighting can be expensive, particularly when using offsets in postings.
@@ -220,6 +225,18 @@ public class UnifiedHighlighter {
     return defaultPassageRelevancyOverSpeed;
   }
 
+  /**
+   * Returns the predicate to use for extracting the query part that must be highlighted.
+   * By default only queries that target the current field are kept. (AKA requireFieldMatch)
+   */
+  protected Predicate<String> getFieldMatcher(String field) {
+    if (defaultFieldMatcher != null) {
+      return defaultFieldMatcher;
+    } else {
+      // requireFieldMatch = true
+      return (qf) -> field.equals(qf);
+    }
+  }
 
   /**
    * The maximum content size to process.  Content will be truncated to this size before highlighting. Typically
@@ -548,7 +565,7 @@ public class UnifiedHighlighter {
     copyAndSortFieldsWithMaxPassages(fieldsIn, maxPassagesIn, fields, maxPassages); // latter 2 are "out" params
 
     // Init field highlighters (where most of the highlight logic lives, and on a per field basis)
-    SortedSet<Term> queryTerms = extractTerms(query);
+    Set<Term> queryTerms = extractTerms(query);
     FieldHighlighter[] fieldHighlighters = new FieldHighlighter[fields.length];
     int numTermVectors = 0;
     int numPostings = 0;
@@ -718,13 +735,13 @@ public class UnifiedHighlighter {
           getClass().getSimpleName() + " without an IndexSearcher.");
     }
     Objects.requireNonNull(content, "content is required");
-    SortedSet<Term> queryTerms = extractTerms(query);
+    Set<Term> queryTerms = extractTerms(query);
     return getFieldHighlighter(field, query, queryTerms, maxPassages)
         .highlightFieldForDoc(null, -1, content);
   }
 
-  protected FieldHighlighter getFieldHighlighter(String field, Query query, SortedSet<Term> allTerms, int maxPassages) {
-    BytesRef[] terms = filterExtractedTerms(field, allTerms);
+  protected FieldHighlighter getFieldHighlighter(String field, Query query, Set<Term> allTerms, int maxPassages) {
+    BytesRef[] terms = filterExtractedTerms(getFieldMatcher(field), allTerms);
     Set<HighlightFlag> highlightFlags = getFlags(field);
     PhraseHelper phraseHelper = getPhraseHelper(field, query, highlightFlags);
     CharacterRunAutomaton[] automata = getAutomata(field, query, highlightFlags);
@@ -738,19 +755,15 @@ public class UnifiedHighlighter {
         getFormatter(field));
   }
 
-  protected static BytesRef[] filterExtractedTerms(String field, SortedSet<Term> queryTerms) {
-    // TODO consider requireFieldMatch
-    Term floor = new Term(field, "");
-    Term ceiling = new Term(field, UnicodeUtil.BIG_TERM);
-    SortedSet<Term> fieldTerms = queryTerms.subSet(floor, ceiling);
-
-    // Strip off the redundant field:
-    BytesRef[] terms = new BytesRef[fieldTerms.size()];
-    int termUpto = 0;
-    for (Term term : fieldTerms) {
-      terms[termUpto++] = term.bytes();
+  protected static BytesRef[] filterExtractedTerms(Predicate<String> fieldMatcher, Set<Term> queryTerms) {
+    // Strip off the redundant field and sort the remaining terms
+    SortedSet<BytesRef> filteredTerms = new TreeSet<>();
+    for (Term term : queryTerms) {
+      if (fieldMatcher.test(term.field())) {
+        filteredTerms.add(term.bytes());
+      }
     }
-    return terms;
+    return filteredTerms.toArray(new BytesRef[filteredTerms.size()]);
   }
 
   protected Set<HighlightFlag> getFlags(String field) {
@@ -771,14 +784,13 @@ public class UnifiedHighlighter {
     boolean highlightPhrasesStrictly = highlightFlags.contains(HighlightFlag.PHRASES);
     boolean handleMultiTermQuery = highlightFlags.contains(HighlightFlag.MULTI_TERM_QUERY);
     return highlightPhrasesStrictly ?
-        new PhraseHelper(query, field, this::requiresRewrite, this::preSpanQueryRewrite, !handleMultiTermQuery) :
-        PhraseHelper.NONE;
+        new PhraseHelper(query, field, getFieldMatcher(field),
+            this::requiresRewrite, this::preSpanQueryRewrite, !handleMultiTermQuery) : PhraseHelper.NONE;
   }
 
   protected CharacterRunAutomaton[] getAutomata(String field, Query query, Set<HighlightFlag> highlightFlags) {
     return highlightFlags.contains(HighlightFlag.MULTI_TERM_QUERY)
-        ? MultiTermHighlighting.extractAutomata(query, field, !highlightFlags.contains(HighlightFlag.PHRASES),
-          this::preMultiTermQueryRewrite)
+        ? MultiTermHighlighting.extractAutomata(query, getFieldMatcher(field), !highlightFlags.contains(HighlightFlag.PHRASES), this::preMultiTermQueryRewrite)
         : ZERO_LEN_AUTOMATA_ARRAY;
   }
 
@@ -826,7 +838,7 @@ public class UnifiedHighlighter {
           //skip using a memory index since it's pure term filtering
           return new TokenStreamOffsetStrategy(field, terms, phraseHelper, automata, getIndexAnalyzer());
         } else {
-          return new MemoryIndexOffsetStrategy(field, terms, phraseHelper, automata, getIndexAnalyzer(),
+          return new MemoryIndexOffsetStrategy(field, getFieldMatcher(field), terms, phraseHelper, automata, getIndexAnalyzer(),
               this::preMultiTermQueryRewrite);
         }
       case NONE_NEEDED:

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/2e948fea/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighter.java
----------------------------------------------------------------------
diff --git a/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighter.java b/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighter.java
index 0fd7d3d..ddf8a92 100644
--- a/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighter.java
+++ b/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighter.java
@@ -25,6 +25,7 @@ import java.util.Arrays;
 import java.util.Collections;
 import java.util.List;
 import java.util.Map;
+import java.util.function.Predicate;
 
 import com.carrotsearch.randomizedtesting.annotations.ParametersFactory;
 import org.apache.lucene.analysis.MockAnalyzer;
@@ -32,14 +33,17 @@ import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.document.FieldType;
+import org.apache.lucene.index.IndexOptions;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.RandomIndexWriter;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.search.BooleanClause;
 import org.apache.lucene.search.BooleanQuery;
 import org.apache.lucene.search.DocIdSetIterator;
+import org.apache.lucene.search.FuzzyQuery;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.PhraseQuery;
+import org.apache.lucene.search.PrefixQuery;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.ScoreDoc;
 import org.apache.lucene.search.Sort;
@@ -959,4 +963,275 @@ public class TestUnifiedHighlighter extends LuceneTestCase {
     ir.close();
   }
 
+  private IndexReader indexSomeFields() throws IOException {
+    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
+    FieldType ft = new FieldType();
+    ft.setIndexOptions(IndexOptions.NONE);
+    ft.setTokenized(false);
+    ft.setStored(true);
+    ft.freeze();
+
+    Field title = new Field("title", "", fieldType);
+    Field text = new Field("text", "", fieldType);
+    Field category = new Field("category", "", fieldType);
+
+    Document doc = new Document();
+    doc.add(title);
+    doc.add(text);
+    doc.add(category);
+    title.setStringValue("This is the title field.");
+    text.setStringValue("This is the text field. You can put some text if you want.");
+    category.setStringValue("This is the category field.");
+    iw.addDocument(doc);
+
+    IndexReader ir = iw.getReader();
+    iw.close();
+    return ir;
+  }
+
+  public void testFieldMatcherTermQuery() throws Exception {
+    IndexReader ir = indexSomeFields();
+    IndexSearcher searcher = newSearcher(ir);
+    UnifiedHighlighter highlighterNoFieldMatch = new UnifiedHighlighter(searcher, indexAnalyzer) {
+      @Override
+      protected Predicate<String> getFieldMatcher(String field) {
+        // requireFieldMatch=false
+        return (qf) -> true;
+      }
+    };
+    UnifiedHighlighter highlighterFieldMatch = new UnifiedHighlighter(searcher, indexAnalyzer);
+    BooleanQuery.Builder queryBuilder =
+        new BooleanQuery.Builder()
+            .add(new TermQuery(new Term("text", "some")), BooleanClause.Occur.SHOULD)
+            .add(new TermQuery(new Term("text", "field")), BooleanClause.Occur.SHOULD)
+            .add(new TermQuery(new Term("text", "this")), BooleanClause.Occur.SHOULD)
+            .add(new TermQuery(new Term("title", "is")), BooleanClause.Occur.SHOULD)
+            .add(new TermQuery(new Term("title", "this")), BooleanClause.Occur.SHOULD)
+            .add(new TermQuery(new Term("category", "this")), BooleanClause.Occur.SHOULD)
+            .add(new TermQuery(new Term("category", "some")), BooleanClause.Occur.SHOULD)
+            .add(new TermQuery(new Term("category", "category")), BooleanClause.Occur.SHOULD);
+    Query query = queryBuilder.build();
+
+    // title
+    {
+      TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
+      assertEquals(1, topDocs.totalHits);
+      String[] snippets = highlighterNoFieldMatch.highlight("title", query, topDocs, 10);
+      assertEquals(1, snippets.length);
+      assertEquals("<b>This</b> <b>is</b> the title <b>field</b>.", snippets[0]);
+
+      snippets = highlighterFieldMatch.highlight("title", query, topDocs, 10);
+      assertEquals(1, snippets.length);
+      assertEquals("<b>This</b> <b>is</b> the title field.", snippets[0]);
+
+      highlighterFieldMatch.setFieldMatcher((fq) -> "text".equals(fq));
+      snippets = highlighterFieldMatch.highlight("title", query, topDocs, 10);
+      assertEquals(1, snippets.length);
+      assertEquals("<b>This</b> is the title <b>field</b>.", snippets[0]);
+      highlighterFieldMatch.setFieldMatcher(null);
+    }
+
+    // text
+    {
+      TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
+      assertEquals(1, topDocs.totalHits);
+      String[] snippets = highlighterNoFieldMatch.highlight("text", query, topDocs, 10);
+      assertEquals(1, snippets.length);
+      assertEquals("<b>This</b> <b>is</b> the text <b>field</b>. You can put <b>some</b> text if you want.", snippets[0]);
+
+      snippets = highlighterFieldMatch.highlight("text", query, topDocs, 10);
+      assertEquals(1, snippets.length);
+      assertEquals("<b>This</b> is the text <b>field</b>. You can put <b>some</b> text if you want.", snippets[0]);
+
+      highlighterFieldMatch.setFieldMatcher((fq) -> "title".equals(fq));
+      snippets = highlighterFieldMatch.highlight("text", query, topDocs, 10);
+      assertEquals(1, snippets.length);
+      assertEquals("<b>This</b> <b>is</b> the text field. ", snippets[0]);
+      highlighterFieldMatch.setFieldMatcher(null);
+    }
+
+    // category
+    {
+      TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
+      assertEquals(1, topDocs.totalHits);
+      String[] snippets = highlighterNoFieldMatch.highlight("category", query, topDocs, 10);
+      assertEquals(1, snippets.length);
+      assertEquals("<b>This</b> <b>is</b> the <b>category</b> <b>field</b>.", snippets[0]);
+
+      snippets = highlighterFieldMatch.highlight("category", query, topDocs, 10);
+      assertEquals(1, snippets.length);
+      assertEquals("<b>This</b> is the <b>category</b> field.", snippets[0]);
+
+
+      highlighterFieldMatch.setFieldMatcher((fq) -> "title".equals(fq));
+      snippets = highlighterFieldMatch.highlight("category", query, topDocs, 10);
+      assertEquals(1, snippets.length);
+      assertEquals("<b>This</b> <b>is</b> the category field.", snippets[0]);
+      highlighterFieldMatch.setFieldMatcher(null);
+    }
+    ir.close();
+  }
+
+  public void testFieldMatcherMultiTermQuery() throws Exception {
+    IndexReader ir = indexSomeFields();
+    IndexSearcher searcher = newSearcher(ir);
+    UnifiedHighlighter highlighterNoFieldMatch = new UnifiedHighlighter(searcher, indexAnalyzer) {
+      @Override
+      protected Predicate<String> getFieldMatcher(String field) {
+        // requireFieldMatch=false
+        return (qf) -> true;
+      }
+    };
+    UnifiedHighlighter highlighterFieldMatch = new UnifiedHighlighter(searcher, indexAnalyzer);
+    BooleanQuery.Builder queryBuilder =
+        new BooleanQuery.Builder()
+            .add(new FuzzyQuery(new Term("text", "sime"), 1), BooleanClause.Occur.SHOULD)
+            .add(new PrefixQuery(new Term("text", "fie")), BooleanClause.Occur.SHOULD)
+            .add(new PrefixQuery(new Term("text", "thi")), BooleanClause.Occur.SHOULD)
+            .add(new TermQuery(new Term("title", "is")), BooleanClause.Occur.SHOULD)
+            .add(new PrefixQuery(new Term("title", "thi")), BooleanClause.Occur.SHOULD)
+            .add(new PrefixQuery(new Term("category", "thi")), BooleanClause.Occur.SHOULD)
+            .add(new FuzzyQuery(new Term("category", "sime"), 1), BooleanClause.Occur.SHOULD)
+            .add(new PrefixQuery(new Term("category", "categ")), BooleanClause.Occur.SHOULD);
+    Query query = queryBuilder.build();
+
+    // title
+    {
+      TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
+      assertEquals(1, topDocs.totalHits);
+      String[] snippets = highlighterNoFieldMatch.highlight("title", query, topDocs, 10);
+      assertEquals(1, snippets.length);
+      assertEquals("<b>This</b> <b>is</b> the title <b>field</b>.", snippets[0]);
+
+      snippets = highlighterFieldMatch.highlight("title", query, topDocs, 10);
+      assertEquals(1, snippets.length);
+      assertEquals("<b>This</b> <b>is</b> the title field.", snippets[0]);
+
+      highlighterFieldMatch.setFieldMatcher((fq) -> "text".equals(fq));
+      snippets = highlighterFieldMatch.highlight("title", query, topDocs, 10);
+      assertEquals(1, snippets.length);
+      assertEquals("<b>This</b> is the title <b>field</b>.", snippets[0]);
+      highlighterFieldMatch.setFieldMatcher(null);
+    }
+
+    // text
+    {
+      TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
+      assertEquals(1, topDocs.totalHits);
+      String[] snippets = highlighterNoFieldMatch.highlight("text", query, topDocs, 10);
+      assertEquals(1, snippets.length);
+      assertEquals("<b>This</b> <b>is</b> the text <b>field</b>. You can put <b>some</b> text if you want.", snippets[0]);
+
+      snippets = highlighterFieldMatch.highlight("text", query, topDocs, 10);
+      assertEquals(1, snippets.length);
+      assertEquals("<b>This</b> is the text <b>field</b>. You can put <b>some</b> text if you want.", snippets[0]);
+
+      highlighterFieldMatch.setFieldMatcher((fq) -> "title".equals(fq));
+      snippets = highlighterFieldMatch.highlight("text", query, topDocs, 10);
+      assertEquals(1, snippets.length);
+      assertEquals("<b>This</b> <b>is</b> the text field. ", snippets[0]);
+      highlighterFieldMatch.setFieldMatcher(null);
+    }
+
+    // category
+    {
+      TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
+      assertEquals(1, topDocs.totalHits);
+      String[] snippets = highlighterNoFieldMatch.highlight("category", query, topDocs, 10);
+      assertEquals(1, snippets.length);
+      assertEquals("<b>This</b> <b>is</b> the <b>category</b> <b>field</b>.", snippets[0]);
+
+      snippets = highlighterFieldMatch.highlight("category", query, topDocs, 10);
+      assertEquals(1, snippets.length);
+      assertEquals("<b>This</b> is the <b>category</b> field.", snippets[0]);
+
+
+      highlighterFieldMatch.setFieldMatcher((fq) -> "title".equals(fq));
+      snippets = highlighterFieldMatch.highlight("category", query, topDocs, 10);
+      assertEquals(1, snippets.length);
+      assertEquals("<b>This</b> <b>is</b> the category field.", snippets[0]);
+      highlighterFieldMatch.setFieldMatcher(null);
+    }
+    ir.close();
+  }
+
+  public void testFieldMatcherPhraseQuery() throws Exception {
+    IndexReader ir = indexSomeFields();
+    IndexSearcher searcher = newSearcher(ir);
+    UnifiedHighlighter highlighterNoFieldMatch = new UnifiedHighlighter(searcher, indexAnalyzer) {
+      @Override
+      protected Predicate<String> getFieldMatcher(String field) {
+        // requireFieldMatch=false
+        return (qf) -> true;
+      }
+    };
+    UnifiedHighlighter highlighterFieldMatch = new UnifiedHighlighter(searcher, indexAnalyzer);
+    BooleanQuery.Builder queryBuilder =
+        new BooleanQuery.Builder()
+            .add(new PhraseQuery("title", "this", "is", "the", "title"), BooleanClause.Occur.SHOULD)
+            .add(new PhraseQuery(2, "category", "this", "is", "the", "field"), BooleanClause.Occur.SHOULD)
+            .add(new PhraseQuery("text", "this", "is"), BooleanClause.Occur.SHOULD)
+            .add(new PhraseQuery("category", "this", "is"), BooleanClause.Occur.SHOULD)
+            .add(new PhraseQuery(1, "text", "you", "can", "put", "text"), BooleanClause.Occur.SHOULD);
+    Query query = queryBuilder.build();
+
+    // title
+    {
+      TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
+      assertEquals(1, topDocs.totalHits);
+      String[] snippets = highlighterNoFieldMatch.highlight("title", query, topDocs, 10);
+      assertEquals(1, snippets.length);
+      assertEquals("<b>This</b> <b>is</b> <b>the</b> <b>title</b> <b>field</b>.", snippets[0]);
+
+      snippets = highlighterFieldMatch.highlight("title", query, topDocs, 10);
+      assertEquals(1, snippets.length);
+      assertEquals("<b>This</b> <b>is</b> <b>the</b> <b>title</b> field.", snippets[0]);
+
+      highlighterFieldMatch.setFieldMatcher((fq) -> "text".equals(fq));
+      snippets = highlighterFieldMatch.highlight("title", query, topDocs, 10);
+      assertEquals(1, snippets.length);
+      assertEquals("<b>This</b> <b>is</b> the title field.", snippets[0]);
+      highlighterFieldMatch.setFieldMatcher(null);
+    }
+
+    // text
+    {
+      TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
+      assertEquals(1, topDocs.totalHits);
+      String[] snippets = highlighterNoFieldMatch.highlight("text", query, topDocs, 10);
+      assertEquals(1, snippets.length);
+      assertEquals("<b>This</b> <b>is</b> <b>the</b> <b>text</b> <b>field</b>. <b>You</b> <b>can</b> <b>put</b> some <b>text</b> if you want.", snippets[0]);
+
+      snippets = highlighterFieldMatch.highlight("text", query, topDocs, 10);
+      assertEquals(1, snippets.length);
+      assertEquals("<b>This</b> <b>is</b> the <b>text</b> field. <b>You</b> <b>can</b> <b>put</b> some <b>text</b> if you want.", snippets[0]);
+
+      highlighterFieldMatch.setFieldMatcher((fq) -> "title".equals(fq));
+      snippets = highlighterFieldMatch.highlight("text", query, topDocs, 10);
+      assertEquals(1, snippets.length);
+      assertEquals("This is the text field. You can put some text if you want.", snippets[0]);
+      highlighterFieldMatch.setFieldMatcher(null);
+    }
+
+    // category
+    {
+      TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
+      assertEquals(1, topDocs.totalHits);
+      String[] snippets = highlighterNoFieldMatch.highlight("category", query, topDocs, 10);
+      assertEquals(1, snippets.length);
+      assertEquals("<b>This</b> <b>is</b> <b>the</b> category <b>field</b>.", snippets[0]);
+
+      snippets = highlighterFieldMatch.highlight("category", query, topDocs, 10);
+      assertEquals(1, snippets.length);
+      assertEquals("<b>This</b> <b>is</b> <b>the</b> category <b>field</b>.", snippets[0]);
+
+
+      highlighterFieldMatch.setFieldMatcher((fq) -> "text".equals(fq));
+      snippets = highlighterFieldMatch.highlight("category", query, topDocs, 10);
+      assertEquals(1, snippets.length);
+      assertEquals("<b>This</b> <b>is</b> the category field.", snippets[0]);
+      highlighterFieldMatch.setFieldMatcher(null);
+    }
+    ir.close();
+  }
 }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/2e948fea/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/visibility/TestUnifiedHighlighterExtensibility.java
----------------------------------------------------------------------
diff --git a/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/visibility/TestUnifiedHighlighterExtensibility.java b/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/visibility/TestUnifiedHighlighterExtensibility.java
index d150940..10757a5 100644
--- a/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/visibility/TestUnifiedHighlighterExtensibility.java
+++ b/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/visibility/TestUnifiedHighlighterExtensibility.java
@@ -23,7 +23,6 @@ import java.util.Collections;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
-import java.util.SortedSet;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.MockAnalyzer;
@@ -144,7 +143,7 @@ public class TestUnifiedHighlighterExtensibility extends LuceneTestCase {
       }
 
       @Override
-      protected FieldHighlighter getFieldHighlighter(String field, Query query, SortedSet<Term> allTerms, int maxPassages) {
+      protected FieldHighlighter getFieldHighlighter(String field, Query query, Set<Term> allTerms, int maxPassages) {
         return super.getFieldHighlighter(field, query, allTerms, maxPassages);
       }

[49/50] [abbrv] lucene-solr:jira/solr-8593: LUCENE-7589: Prevent outliers from raising the bpv for everyone.

Posted by kr...@apache.org.

LUCENE-7589: Prevent outliers from raising the bpv for everyone.


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/3b182aa2
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/3b182aa2
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/3b182aa2

Branch: refs/heads/jira/solr-8593
Commit: 3b182aa2fb3e4062f6ec5be819f3aa70aa2e523d
Parents: ea1569e
Author: Adrien Grand <jp...@gmail.com>
Authored: Thu Dec 15 16:33:36 2016 +0100
Committer: Adrien Grand <jp...@gmail.com>
Committed: Thu Dec 15 17:17:54 2016 +0100

----------------------------------------------------------------------
 lucene/CHANGES.txt                              |   4 +
 .../lucene70/Lucene70DocValuesConsumer.java     | 163 +++++++++++---
 .../lucene70/Lucene70DocValuesFormat.java       |   5 +-
 .../lucene70/Lucene70DocValuesProducer.java     | 220 +++++++++++++++----
 .../java/org/apache/lucene/util/LongValues.java |   9 +
 .../apache/lucene/util/packed/DirectWriter.java |   8 +-
 .../lucene70/TestLucene70DocValuesFormat.java   | 152 +++++++++++++
 7 files changed, 479 insertions(+), 82 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/3b182aa2/lucene/CHANGES.txt
----------------------------------------------------------------------
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index bacc270..7e61469 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -48,6 +48,10 @@ Optimizations
 * LUCENE-7519: Add optimized APIs to compute browse-only top level
   facets (Mike McCandless)
 
+* LUCENE-7589: Numeric doc values now have the ability to encode blocks of
+  values using different numbers of bits per value if this proves to save
+  storage. (Adrien Grand)
+
 Other
 
 * LUCENE-7328: Remove LegacyNumericEncoding from GeoPointField. (Nick Knize)

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/3b182aa2/lucene/core/src/java/org/apache/lucene/codecs/lucene70/Lucene70DocValuesConsumer.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene70/Lucene70DocValuesConsumer.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene70/Lucene70DocValuesConsumer.java
index e1b66e1..2dd68e9 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene70/Lucene70DocValuesConsumer.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene70/Lucene70DocValuesConsumer.java
@@ -18,6 +18,8 @@ package org.apache.lucene.codecs.lucene70;
 
 
 import static org.apache.lucene.codecs.lucene70.Lucene70DocValuesFormat.DIRECT_MONOTONIC_BLOCK_SHIFT;
+import static org.apache.lucene.codecs.lucene70.Lucene70DocValuesFormat.NUMERIC_BLOCK_SHIFT;
+import static org.apache.lucene.codecs.lucene70.Lucene70DocValuesFormat.NUMERIC_BLOCK_SIZE;
 
 import java.io.Closeable; // javadocs
 import java.io.IOException;
@@ -42,6 +44,7 @@ import org.apache.lucene.index.SortedSetDocValues;
 import org.apache.lucene.index.TermsEnum;
 import org.apache.lucene.search.DocIdSetIterator;
 import org.apache.lucene.search.SortedSetSelector;
+import org.apache.lucene.store.GrowableByteArrayDataOutput;
 import org.apache.lucene.store.IndexOutput;
 import org.apache.lucene.store.RAMOutputStream;
 import org.apache.lucene.util.BytesRef;
@@ -112,12 +115,46 @@ final class Lucene70DocValuesConsumer extends DocValuesConsumer implements Close
     });
   }
 
+  private static class MinMaxTracker {
+    long min, max, numValues, spaceInBits;
+
+    MinMaxTracker() {
+      reset();
+      spaceInBits = 0;
+    }
+
+    private void reset() {
+      min = Long.MAX_VALUE;
+      max = Long.MIN_VALUE;
+      numValues = 0;
+    }
+
+    /** Accumulate a new value. */
+    void update(long v) {
+      min = Math.min(min, v);
+      max = Math.max(max, v);
+      ++numValues;
+    }
+
+    /** Update the required space. */
+    void finish() {
+      if (max > min) {
+        spaceInBits += DirectWriter.unsignedBitsRequired(max - min) * numValues;
+      }
+    }
+
+    /** Update space usage and get ready for accumulating values for the next block. */
+    void nextBlock() {
+      finish();
+      reset();
+    }
+  }
+
   private long[] writeValues(FieldInfo field, DocValuesProducer valuesProducer) throws IOException {
     SortedNumericDocValues values = valuesProducer.getSortedNumeric(field);
     int numDocsWithValue = 0;
-    long numValues = 0;
-    long min = Long.MAX_VALUE;
-    long max = Long.MIN_VALUE;
+    MinMaxTracker minMax = new MinMaxTracker();
+    MinMaxTracker blockMinMax = new MinMaxTracker();
     long gcd = 0;
     Set<Long> uniqueValues = new HashSet<>();
     for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) {
@@ -130,26 +167,35 @@ final class Lucene70DocValuesConsumer extends DocValuesConsumer implements Close
             // wrong results. Since these extreme values are unlikely, we just discard
             // GCD computation for them
             gcd = 1;
-          } else if (numValues != 0) { // minValue needs to be set first
-            gcd = MathUtil.gcd(gcd, v - min);
+          } else if (minMax.numValues != 0) { // minValue needs to be set first
+            gcd = MathUtil.gcd(gcd, v - minMax.min);
           }
         }
 
-        min = Math.min(min, v);
-        max = Math.max(max, v);
+        minMax.update(v);
+        blockMinMax.update(v);
+        if (blockMinMax.numValues == NUMERIC_BLOCK_SIZE) {
+          blockMinMax.nextBlock();
+        }
 
         if (uniqueValues != null
             && uniqueValues.add(v)
             && uniqueValues.size() > 256) {
           uniqueValues = null;
         }
-
-        numValues++;
       }
 
       numDocsWithValue++;
     }
 
+    minMax.finish();
+    blockMinMax.finish();
+
+    final long numValues = minMax.numValues;
+    long min = minMax.min;
+    final long max = minMax.max;
+    assert blockMinMax.spaceInBits <= minMax.spaceInBits;
+
     if (numDocsWithValue == 0) {
       meta.writeLong(-2);
       meta.writeLong(0L);
@@ -166,6 +212,7 @@ final class Lucene70DocValuesConsumer extends DocValuesConsumer implements Close
 
     meta.writeLong(numValues);
     final int numBitsPerValue;
+    boolean doBlocks = false;
     Map<Long, Integer> encode = null;
     if (min >= max) {
       numBitsPerValue = 0;
@@ -189,12 +236,19 @@ final class Lucene70DocValuesConsumer extends DocValuesConsumer implements Close
         gcd = 1;
       } else {
         uniqueValues = null;
-        numBitsPerValue = DirectWriter.unsignedBitsRequired((max - min) / gcd);
-        if (gcd == 1 && min > 0
-            && DirectWriter.unsignedBitsRequired(max) == DirectWriter.unsignedBitsRequired(max - min)) {
-          min = 0;
+        // we do blocks if that appears to save 10+% storage
+        doBlocks = minMax.spaceInBits > 0 && (double) blockMinMax.spaceInBits / minMax.spaceInBits <= 0.9;
+        if (doBlocks) {
+          numBitsPerValue = 0xFF;
+          meta.writeInt(-2 - NUMERIC_BLOCK_SHIFT);
+        } else {
+          numBitsPerValue = DirectWriter.unsignedBitsRequired((max - min) / gcd);
+          if (gcd == 1 && min > 0
+              && DirectWriter.unsignedBitsRequired(max) == DirectWriter.unsignedBitsRequired(max - min)) {
+            min = 0;
+          }
+          meta.writeInt(-1);
         }
-        meta.writeInt(-1);
       }
     }
 
@@ -203,26 +257,79 @@ final class Lucene70DocValuesConsumer extends DocValuesConsumer implements Close
     meta.writeLong(gcd);
     long startOffset = data.getFilePointer();
     meta.writeLong(startOffset);
-    if (numBitsPerValue != 0) {
-      values = valuesProducer.getSortedNumeric(field);
-      DirectWriter writer = DirectWriter.getInstance(data, numValues, numBitsPerValue);
-      for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) {
-        for (int i = 0, count = values.docValueCount(); i < count; ++i) {
-          long v = values.nextValue();
-          if (encode == null) {
-            writer.add((v - min) / gcd);
-          } else {
-            writer.add(encode.get(v));
-          }
-        }
-      }
-      writer.finish();
+    if (doBlocks) {
+      writeValuesMultipleBlocks(valuesProducer.getSortedNumeric(field), gcd);
+    } else if (numBitsPerValue != 0) {
+      writeValuesSingleBlock(valuesProducer.getSortedNumeric(field), numValues, numBitsPerValue, min, gcd, encode);
     }
     meta.writeLong(data.getFilePointer() - startOffset);
 
     return new long[] {numDocsWithValue, numValues};
   }
 
+  private void writeValuesSingleBlock(SortedNumericDocValues values, long numValues, int numBitsPerValue,
+      long min, long gcd, Map<Long, Integer> encode) throws IOException {
+    DirectWriter writer = DirectWriter.getInstance(data, numValues, numBitsPerValue);
+    for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) {
+      for (int i = 0, count = values.docValueCount(); i < count; ++i) {
+        long v = values.nextValue();
+        if (encode == null) {
+          writer.add((v - min) / gcd);
+        } else {
+          writer.add(encode.get(v));
+        }
+      }
+    }
+    writer.finish();
+  }
+ 
+  private void writeValuesMultipleBlocks(SortedNumericDocValues values, long gcd) throws IOException {
+    final long[] buffer = new long[NUMERIC_BLOCK_SIZE];
+    final GrowableByteArrayDataOutput encodeBuffer = new GrowableByteArrayDataOutput(NUMERIC_BLOCK_SIZE);
+    int upTo = 0;
+    for (int doc = values.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = values.nextDoc()) {
+      for (int i = 0, count = values.docValueCount(); i < count; ++i) {
+        buffer[upTo++] = values.nextValue();
+        if (upTo == NUMERIC_BLOCK_SIZE) {
+          writeBlock(buffer, NUMERIC_BLOCK_SIZE, gcd, encodeBuffer);
+          upTo = 0;
+        }
+      }
+    }
+    if (upTo > 0) {
+      writeBlock(buffer, upTo, gcd, encodeBuffer);
+    }
+  }
+
+  private void writeBlock(long[] values, int length, long gcd, GrowableByteArrayDataOutput buffer) throws IOException {
+    assert length > 0;
+    long min = values[0];
+    long max = values[0];
+    for (int i = 1; i < length; ++i) {
+      final long v = values[i];
+      assert Math.floorMod(values[i] - min, gcd) == 0;
+      min = Math.min(min, v);
+      max = Math.max(max, v);
+    }
+    if (min == max) {
+      data.writeByte((byte) 0);
+      data.writeLong(min);
+    } else {
+      final int bitsPerValue = DirectWriter.unsignedBitsRequired(max - min);
+      buffer.reset();
+      assert buffer.getPosition() == 0;
+      final DirectWriter w = DirectWriter.getInstance(buffer, length, bitsPerValue);
+      for (int i = 0; i < length; ++i) {
+        w.add((values[i] - min) / gcd);
+      }
+      w.finish();
+      data.writeByte((byte) bitsPerValue);
+      data.writeLong(min);
+      data.writeInt(buffer.getPosition());
+      data.writeBytes(buffer.getBytes(), buffer.getPosition());
+    }
+  }
+
   @Override
   public void addBinaryField(FieldInfo field, DocValuesProducer valuesProducer) throws IOException {
     meta.writeInt(field.number);

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/3b182aa2/lucene/core/src/java/org/apache/lucene/codecs/lucene70/Lucene70DocValuesFormat.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene70/Lucene70DocValuesFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene70/Lucene70DocValuesFormat.java
index ee477d6..2ce2124 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene70/Lucene70DocValuesFormat.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene70/Lucene70DocValuesFormat.java
@@ -146,10 +146,11 @@ public final class Lucene70DocValuesFormat extends DocValuesFormat {
   static final byte SORTED_SET = 3;
   static final byte SORTED_NUMERIC = 4;
 
-  // addressing uses 16k blocks
-  static final int MONOTONIC_BLOCK_SIZE = 16384;
   static final int DIRECT_MONOTONIC_BLOCK_SHIFT = 16;
 
+  static final int NUMERIC_BLOCK_SHIFT = 14;
+  static final int NUMERIC_BLOCK_SIZE = 1 << NUMERIC_BLOCK_SHIFT;
+
   static final int TERMS_DICT_BLOCK_SHIFT = 4;
   static final int TERMS_DICT_BLOCK_SIZE = 1 << TERMS_DICT_BLOCK_SHIFT;
   static final int TERMS_DICT_BLOCK_MASK = TERMS_DICT_BLOCK_SIZE - 1;

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/3b182aa2/lucene/core/src/java/org/apache/lucene/codecs/lucene70/Lucene70DocValuesProducer.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene70/Lucene70DocValuesProducer.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene70/Lucene70DocValuesProducer.java
index 3f3e73f..386655e 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene70/Lucene70DocValuesProducer.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene70/Lucene70DocValuesProducer.java
@@ -144,7 +144,7 @@ final class Lucene70DocValuesProducer extends DocValuesProducer implements Close
     entry.docsWithFieldLength = meta.readLong();
     entry.numValues = meta.readLong();
     int tableSize = meta.readInt();
-    if (tableSize < -1 || tableSize > 256) {
+    if (tableSize > 256) {
       throw new CorruptIndexException("invalid table size: " + tableSize, meta);
     }
     if (tableSize >= 0) {
@@ -154,6 +154,11 @@ final class Lucene70DocValuesProducer extends DocValuesProducer implements Close
         entry.table[i] = meta.readLong();
       }
     }
+    if (tableSize < -1) {
+      entry.blockShift = -2 - tableSize;
+    } else {
+      entry.blockShift = -1;
+    }
     entry.bitsPerValue = meta.readByte();
     entry.minValue = meta.readLong();
     entry.gcd = meta.readLong();
@@ -260,6 +265,7 @@ final class Lucene70DocValuesProducer extends DocValuesProducer implements Close
 
   private static class NumericEntry {
     long[] table;
+    int blockShift;
     byte bitsPerValue;
     long docsWithFieldOffset;
     long docsWithFieldLength;
@@ -429,24 +435,62 @@ final class Lucene70DocValuesProducer extends DocValuesProducer implements Close
         };
       } else {
         final RandomAccessInput slice = data.randomAccessSlice(entry.valuesOffset, entry.valuesLength);
-        final LongValues values = DirectReader.getInstance(slice, entry.bitsPerValue);
-        if (entry.table != null) {
-          final long[] table = entry.table;
-          return new DenseNumericDocValues(maxDoc) {
-            @Override
-            public long longValue() throws IOException {
-              return table[(int) values.get(doc)];
-            }
-          };
-        } else {
+        if (entry.blockShift >= 0) {
+          // dense but split into blocks of different bits per value
+          final int shift = entry.blockShift;
           final long mul = entry.gcd;
-          final long delta = entry.minValue;
+          final int mask = (1 << shift) - 1;
           return new DenseNumericDocValues(maxDoc) {
+            int block = -1;
+            long delta;
+            long offset;
+            long blockEndOffset;
+            LongValues values;
+
             @Override
             public long longValue() throws IOException {
-              return mul * values.get(doc) + delta;
+              final int block = doc >>> shift;
+              if (this.block != block) {
+                int bitsPerValue;
+                do {
+                  offset = blockEndOffset;
+                  bitsPerValue = slice.readByte(offset++);
+                  delta = slice.readLong(offset);
+                  offset += Long.BYTES;
+                  if (bitsPerValue == 0) {
+                    blockEndOffset = offset;
+                  } else {
+                    final int length = slice.readInt(offset);
+                    offset += Integer.BYTES;
+                    blockEndOffset = offset + length;
+                  }
+                  this.block ++;
+                } while (this.block != block);
+                values = bitsPerValue == 0 ? LongValues.ZEROES : DirectReader.getInstance(slice, bitsPerValue, offset);
+              }
+              return mul * values.get(doc & mask) + delta;
             }
           };
+        } else {
+          final LongValues values = DirectReader.getInstance(slice, entry.bitsPerValue);
+          if (entry.table != null) {
+            final long[] table = entry.table;
+            return new DenseNumericDocValues(maxDoc) {
+              @Override
+              public long longValue() throws IOException {
+                return table[(int) values.get(doc)];
+              }
+            };
+          } else {
+            final long mul = entry.gcd;
+            final long delta = entry.minValue;
+            return new DenseNumericDocValues(maxDoc) {
+              @Override
+              public long longValue() throws IOException {
+                return mul * values.get(doc) + delta;
+              }
+            };
+          }
         }
       }
     } else {
@@ -461,24 +505,63 @@ final class Lucene70DocValuesProducer extends DocValuesProducer implements Close
         };
       } else {
         final RandomAccessInput slice = data.randomAccessSlice(entry.valuesOffset, entry.valuesLength);
-        final LongValues values = DirectReader.getInstance(slice, entry.bitsPerValue);
-        if (entry.table != null) {
-          final long[] table = entry.table;
-          return new SparseNumericDocValues(disi) {
-            @Override
-            public long longValue() throws IOException {
-              return table[(int) values.get(disi.index())];
-            }
-          };
-        } else {
+        if (entry.blockShift >= 0) {
+          // sparse and split into blocks of different bits per value
+          final int shift = entry.blockShift;
           final long mul = entry.gcd;
-          final long delta = entry.minValue;
+          final int mask = (1 << shift) - 1;
           return new SparseNumericDocValues(disi) {
+            int block = -1;
+            long delta;
+            long offset;
+            long blockEndOffset;
+            LongValues values;
+
             @Override
             public long longValue() throws IOException {
-              return mul * values.get(disi.index()) + delta;
+              final int index = disi.index();
+              final int block = index >>> shift;
+              if (this.block != block) {
+                int bitsPerValue;
+                do {
+                  offset = blockEndOffset;
+                  bitsPerValue = slice.readByte(offset++);
+                  delta = slice.readLong(offset);
+                  offset += Long.BYTES;
+                  if (bitsPerValue == 0) {
+                    blockEndOffset = offset;
+                  } else {
+                    final int length = slice.readInt(offset);
+                    offset += Integer.BYTES;
+                    blockEndOffset = offset + length;
+                  }
+                  this.block ++;
+                } while (this.block != block);
+                values = bitsPerValue == 0 ? LongValues.ZEROES : DirectReader.getInstance(slice, bitsPerValue, offset);
+              }
+              return mul * values.get(index & mask) + delta;
             }
           };
+        } else {
+          final LongValues values = DirectReader.getInstance(slice, entry.bitsPerValue);
+          if (entry.table != null) {
+            final long[] table = entry.table;
+            return new SparseNumericDocValues(disi) {
+              @Override
+              public long longValue() throws IOException {
+                return table[(int) values.get(disi.index())];
+              }
+            };
+          } else {
+            final long mul = entry.gcd;
+            final long delta = entry.minValue;
+            return new SparseNumericDocValues(disi) {
+              @Override
+              public long longValue() throws IOException {
+                return mul * values.get(disi.index()) + delta;
+              }
+            };
+          }
         }
       }
     }
@@ -494,34 +577,75 @@ final class Lucene70DocValuesProducer extends DocValuesProducer implements Close
       };
     } else {
       final RandomAccessInput slice = data.randomAccessSlice(entry.valuesOffset, entry.valuesLength);
-      final LongValues values = DirectReader.getInstance(slice, entry.bitsPerValue);
-      if (entry.table != null) {
-        final long[] table = entry.table;
+      if (entry.blockShift >= 0) {
+        final int shift = entry.blockShift;
+        final long mul = entry.gcd;
+        final long mask = (1L << shift) - 1;
         return new LongValues() {
-          @Override
-          public long get(long index) {
-            return table[(int) values.get(index)];
-          }
-        };
-      } else if (entry.gcd != 1) {
-        final long gcd = entry.gcd;
-        final long minValue = entry.minValue;
-        return new LongValues() {
-          @Override
-          public long get(long index) {
-            return values.get(index) * gcd + minValue;
-          }
-        };
-      } else if (entry.minValue != 0) {
-        final long minValue = entry.minValue;
-        return new LongValues() {
-          @Override
+          long block = -1;
+          long delta;
+          long offset;
+          long blockEndOffset;
+          LongValues values;
+
           public long get(long index) {
-            return values.get(index) + minValue;
+            final long block = index >>> shift;
+            if (this.block != block) {
+              assert block > this.block : "Reading backwards is illegal: " + this.block + " < " + block;
+              int bitsPerValue;
+              do {
+                offset = blockEndOffset;
+                try {
+                  bitsPerValue = slice.readByte(offset++);
+                  delta = slice.readLong(offset);
+                  offset += Long.BYTES;
+                  if (bitsPerValue == 0) {
+                    blockEndOffset = offset;
+                  } else {
+                    final int length = slice.readInt(offset);
+                    offset += Integer.BYTES;
+                    blockEndOffset = offset + length;
+                  }
+                } catch (IOException e) {
+                  throw new RuntimeException(e);
+                }
+                this.block ++;
+              } while (this.block != block);
+              values = bitsPerValue == 0 ? LongValues.ZEROES : DirectReader.getInstance(slice, bitsPerValue, offset);
+            }
+            return mul * values.get(index & mask) + delta;
           }
         };
       } else {
-        return values;
+        final LongValues values = DirectReader.getInstance(slice, entry.bitsPerValue);
+        if (entry.table != null) {
+          final long[] table = entry.table;
+          return new LongValues() {
+            @Override
+            public long get(long index) {
+              return table[(int) values.get(index)];
+            }
+          };
+        } else if (entry.gcd != 1) {
+          final long gcd = entry.gcd;
+          final long minValue = entry.minValue;
+          return new LongValues() {
+            @Override
+            public long get(long index) {
+              return values.get(index) * gcd + minValue;
+            }
+          };
+        } else if (entry.minValue != 0) {
+          final long minValue = entry.minValue;
+          return new LongValues() {
+            @Override
+            public long get(long index) {
+              return values.get(index) + minValue;
+            }
+          };
+        } else {
+          return values;
+        }
       }
     }
   }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/3b182aa2/lucene/core/src/java/org/apache/lucene/util/LongValues.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/util/LongValues.java b/lucene/core/src/java/org/apache/lucene/util/LongValues.java
index 23f4d32..04fbf81 100644
--- a/lucene/core/src/java/org/apache/lucene/util/LongValues.java
+++ b/lucene/core/src/java/org/apache/lucene/util/LongValues.java
@@ -30,6 +30,15 @@ public abstract class LongValues  {
 
   };
 
+  public static final LongValues ZEROES = new LongValues() {
+
+    @Override
+    public long get(long index) {
+      return 0;
+    }
+
+  };
+
   /** Get value at <code>index</code>. */
   public abstract long get(long index);
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/3b182aa2/lucene/core/src/java/org/apache/lucene/util/packed/DirectWriter.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/util/packed/DirectWriter.java b/lucene/core/src/java/org/apache/lucene/util/packed/DirectWriter.java
index 9a7f18e..5a38445 100644
--- a/lucene/core/src/java/org/apache/lucene/util/packed/DirectWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/util/packed/DirectWriter.java
@@ -21,7 +21,7 @@ import java.io.EOFException;
 import java.io.IOException;
 import java.util.Arrays;
 
-import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.store.DataOutput;
 
 /** 
  * Class for writing packed integers to be directly read from Directory.
@@ -44,7 +44,7 @@ import org.apache.lucene.store.IndexOutput;
 public final class DirectWriter {
   final int bitsPerValue;
   final long numValues;
-  final IndexOutput output;
+  final DataOutput output;
   
   long count;
   boolean finished;
@@ -56,7 +56,7 @@ public final class DirectWriter {
   final BulkOperation encoder;
   final int iterations;
   
-  DirectWriter(IndexOutput output, long numValues, int bitsPerValue) {
+  DirectWriter(DataOutput output, long numValues, int bitsPerValue) {
     this.output = output;
     this.numValues = numValues;
     this.bitsPerValue = bitsPerValue;
@@ -103,7 +103,7 @@ public final class DirectWriter {
   }
   
   /** Returns an instance suitable for encoding {@code numValues} using {@code bitsPerValue} */
-  public static DirectWriter getInstance(IndexOutput output, long numValues, int bitsPerValue) {
+  public static DirectWriter getInstance(DataOutput output, long numValues, int bitsPerValue) {
     if (Arrays.binarySearch(SUPPORTED_BITS_PER_VALUE, bitsPerValue) < 0) {
       throw new IllegalArgumentException("Unsupported bitsPerValue " + bitsPerValue + ". Did you use bitsRequired?");
     }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/3b182aa2/lucene/core/src/test/org/apache/lucene/codecs/lucene70/TestLucene70DocValuesFormat.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/test/org/apache/lucene/codecs/lucene70/TestLucene70DocValuesFormat.java b/lucene/core/src/test/org/apache/lucene/codecs/lucene70/TestLucene70DocValuesFormat.java
index 8661298..6cca55e 100644
--- a/lucene/core/src/test/org/apache/lucene/codecs/lucene70/TestLucene70DocValuesFormat.java
+++ b/lucene/core/src/test/org/apache/lucene/codecs/lucene70/TestLucene70DocValuesFormat.java
@@ -25,6 +25,7 @@ import java.util.HashSet;
 import java.util.List;
 import java.util.Set;
 import java.util.TreeSet;
+import java.util.function.LongSupplier;
 import java.util.function.Supplier;
 
 import org.apache.lucene.analysis.MockAnalyzer;
@@ -61,6 +62,7 @@ import org.apache.lucene.index.SortedSetDocValues;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.index.Terms;
 import org.apache.lucene.index.TermsEnum.SeekStatus;
+import org.apache.lucene.search.DocIdSetIterator;
 import org.apache.lucene.index.TermsEnum;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.RAMFile;
@@ -534,4 +536,154 @@ public class TestLucene70DocValuesFormat extends BaseCompressingDocValuesFormatT
       dir.close();
     }
   }
+
+  @Slow
+  public void testSortedNumericBlocksOfVariousBitsPerValue() throws Exception {
+    doTestSortedNumericBlocksOfVariousBitsPerValue(() -> TestUtil.nextInt(random(), 1, 3));
+  }
+
+  @Slow
+  public void testSparseSortedNumericBlocksOfVariousBitsPerValue() throws Exception {
+    doTestSortedNumericBlocksOfVariousBitsPerValue(() -> TestUtil.nextInt(random(), 0, 2));
+  }
+
+  @Slow
+  public void testNumericBlocksOfVariousBitsPerValue() throws Exception {
+    doTestSparseNumericBlocksOfVariousBitsPerValue(1);
+  }
+
+  @Slow
+  public void testSparseNumericBlocksOfVariousBitsPerValue() throws Exception {
+    doTestSparseNumericBlocksOfVariousBitsPerValue(random().nextDouble());
+  }
+
+  private static LongSupplier blocksOfVariousBPV() {
+    final long mul = TestUtil.nextInt(random(), 1, 100);
+    final long min = random().nextInt();
+    return new LongSupplier() {
+      int i = Lucene70DocValuesFormat.NUMERIC_BLOCK_SIZE;
+      int maxDelta;
+      @Override
+      public long getAsLong() {
+        if (i == Lucene70DocValuesFormat.NUMERIC_BLOCK_SIZE) {
+          maxDelta = 1 << random().nextInt(5);
+          i = 0;
+        }
+        i++;
+        return min + mul * random().nextInt(maxDelta);
+      }
+    };
+  }
+
+  private void doTestSortedNumericBlocksOfVariousBitsPerValue(LongSupplier counts) throws Exception {
+    Directory dir = newDirectory();
+    IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
+    conf.setMaxBufferedDocs(atLeast(Lucene70DocValuesFormat.NUMERIC_BLOCK_SIZE));
+    conf.setRAMBufferSizeMB(-1);
+    conf.setMergePolicy(newLogMergePolicy(random().nextBoolean()));
+    IndexWriter writer = new IndexWriter(dir, conf);
+    
+    final int numDocs = atLeast(Lucene70DocValuesFormat.NUMERIC_BLOCK_SIZE*3);
+    final LongSupplier values = blocksOfVariousBPV();
+    for (int i = 0; i < numDocs; i++) {
+      Document doc = new Document();
+      
+      int valueCount = (int) counts.getAsLong();
+      long valueArray[] = new long[valueCount];
+      for (int j = 0; j < valueCount; j++) {
+        long value = values.getAsLong();
+        valueArray[j] = value;
+        doc.add(new SortedNumericDocValuesField("dv", value));
+      }
+      Arrays.sort(valueArray);
+      for (int j = 0; j < valueCount; j++) {
+        doc.add(new StoredField("stored", Long.toString(valueArray[j])));
+      }
+      writer.addDocument(doc);
+      if (random().nextInt(31) == 0) {
+        writer.commit();
+      }
+    }
+    writer.forceMerge(1);
+
+    writer.close();
+    
+    // compare
+    DirectoryReader ir = DirectoryReader.open(dir);
+    TestUtil.checkReader(ir);
+    for (LeafReaderContext context : ir.leaves()) {
+      LeafReader r = context.reader();
+      SortedNumericDocValues docValues = DocValues.getSortedNumeric(r, "dv");
+      for (int i = 0; i < r.maxDoc(); i++) {
+        if (i > docValues.docID()) {
+          docValues.nextDoc();
+        }
+        String expected[] = r.document(i).getValues("stored");
+        if (i < docValues.docID()) {
+          assertEquals(0, expected.length);
+        } else {
+          String actual[] = new String[docValues.docValueCount()];
+          for (int j = 0; j < actual.length; j++) {
+            actual[j] = Long.toString(docValues.nextValue());
+          }
+          assertArrayEquals(expected, actual);
+        }
+      }
+    }
+    ir.close();
+    dir.close();
+  }
+
+  private void doTestSparseNumericBlocksOfVariousBitsPerValue(double density) throws Exception {
+    Directory dir = newDirectory();
+    IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
+    conf.setMaxBufferedDocs(atLeast(Lucene70DocValuesFormat.NUMERIC_BLOCK_SIZE));
+    conf.setRAMBufferSizeMB(-1);
+    conf.setMergePolicy(newLogMergePolicy(random().nextBoolean()));
+    IndexWriter writer = new IndexWriter(dir, conf);
+    Document doc = new Document();
+    Field storedField = newStringField("stored", "", Field.Store.YES);
+    Field dvField = new NumericDocValuesField("dv", 0);
+    doc.add(storedField);
+    doc.add(dvField);
+
+    final int numDocs = atLeast(Lucene70DocValuesFormat.NUMERIC_BLOCK_SIZE*3);
+    final LongSupplier longs = blocksOfVariousBPV();
+    for (int i = 0; i < numDocs; i++) {
+      if (random().nextDouble() > density) {
+        writer.addDocument(new Document());
+        continue;
+      }
+      long value = longs.getAsLong();
+      storedField.setStringValue(Long.toString(value));
+      dvField.setLongValue(value);
+      writer.addDocument(doc);
+    }
+
+    writer.forceMerge(1);
+
+    writer.close();
+    
+    // compare
+    DirectoryReader ir = DirectoryReader.open(dir);
+    TestUtil.checkReader(ir);
+    for (LeafReaderContext context : ir.leaves()) {
+      LeafReader r = context.reader();
+      NumericDocValues docValues = DocValues.getNumeric(r, "dv");
+      docValues.nextDoc();
+      for (int i = 0; i < r.maxDoc(); i++) {
+        String storedValue = r.document(i).get("stored");
+        if (storedValue == null) {
+          assertTrue(docValues.docID() > i);
+        } else {
+          assertEquals(i, docValues.docID());
+          assertEquals(Long.parseLong(storedValue), docValues.longValue());
+          docValues.nextDoc();
+        }
+      }
+      assertEquals(DocIdSetIterator.NO_MORE_DOCS, docValues.docID());
+    }
+    ir.close();
+    dir.close();
+  }
 }

[41/50] [abbrv] lucene-solr:jira/solr-8593: SOLR-1953: It may be possible for temporary files to accumulate until the Solr process is shut down.

Posted by kr...@apache.org.

SOLR-1953: It may be possible for temporary files to accumulate until the Solr process is shut down.


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/e82399d0
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/e82399d0
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/e82399d0

Branch: refs/heads/jira/solr-8593
Commit: e82399d0677651ad4be1d8d2bdc4777b5d90b0fa
Parents: 22d9af4
Author: markrmiller <ma...@apache.org>
Authored: Mon Dec 12 11:10:58 2016 -0500
Committer: markrmiller <ma...@apache.org>
Committed: Wed Dec 14 11:16:50 2016 -0500

----------------------------------------------------------------------
 solr/CHANGES.txt                                |   3 +
 .../apache/solr/servlet/SolrDispatchFilter.java |  15 ++
 .../apache/solr/servlet/SolrRequestParsers.java |  31 ++--
 .../solr/util/SolrFileCleaningTracker.java      | 147 +++++++++++++++++++
 4 files changed, 182 insertions(+), 14 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e82399d0/solr/CHANGES.txt
----------------------------------------------------------------------
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 41af0ff..946a04e 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -244,6 +244,9 @@ Bug Fixes
 
 * SOLR-9823: CoreContainer incorrectly setting MDCLoggingContext for core (Jessica Cheng Mallet via Erick Erickson)
 
+* SOLR-1953: It may be possible for temporary files to accumulate until the Solr process is shut down.
+  (Karl Wright, Mark Miller)
+
 Other Changes
 ----------------------
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e82399d0/solr/core/src/java/org/apache/solr/servlet/SolrDispatchFilter.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/servlet/SolrDispatchFilter.java b/solr/core/src/java/org/apache/solr/servlet/SolrDispatchFilter.java
index 5a4cfb6..e8c4657 100644
--- a/solr/core/src/java/org/apache/solr/servlet/SolrDispatchFilter.java
+++ b/solr/core/src/java/org/apache/solr/servlet/SolrDispatchFilter.java
@@ -45,6 +45,7 @@ import java.util.concurrent.atomic.AtomicReference;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
+import org.apache.commons.io.FileCleaningTracker;
 import org.apache.commons.io.input.CloseShieldInputStream;
 import org.apache.commons.io.output.CloseShieldOutputStream;
 import org.apache.commons.lang.StringUtils;
@@ -62,6 +63,7 @@ import org.apache.solr.core.SolrXmlConfig;
 import org.apache.solr.request.SolrRequestInfo;
 import org.apache.solr.security.AuthenticationPlugin;
 import org.apache.solr.security.PKIAuthenticationPlugin;
+import org.apache.solr.util.SolrFileCleaningTracker;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -123,6 +125,8 @@ public class SolrDispatchFilter extends BaseSolrFilter {
   {
     log.trace("SolrDispatchFilter.init(): {}", this.getClass().getClassLoader());
 
+    SolrRequestParsers.fileCleaningTracker = new SolrFileCleaningTracker();
+    
     StartupLoggingUtils.checkLogDir();
     logWelcomeBanner();
     String muteConsole = System.getProperty(SOLR_LOG_MUTECONSOLE);
@@ -240,6 +244,17 @@ public class SolrDispatchFilter extends BaseSolrFilter {
   
   @Override
   public void destroy() {
+    try {
+      FileCleaningTracker fileCleaningTracker = SolrRequestParsers.fileCleaningTracker;
+      if (fileCleaningTracker != null) {
+        fileCleaningTracker.exitWhenFinished();
+      }
+    } catch (Exception e) {
+      log.warn("Exception closing FileCleaningTracker", e);
+    } finally {
+      SolrRequestParsers.fileCleaningTracker = null;
+    }
+
     if (cores != null) {
       try {
         cores.shutdown();

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e82399d0/solr/core/src/java/org/apache/solr/servlet/SolrRequestParsers.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/servlet/SolrRequestParsers.java b/solr/core/src/java/org/apache/solr/servlet/SolrRequestParsers.java
index 9d7e7d9..968320e 100644
--- a/solr/core/src/java/org/apache/solr/servlet/SolrRequestParsers.java
+++ b/solr/core/src/java/org/apache/solr/servlet/SolrRequestParsers.java
@@ -42,6 +42,7 @@ import java.util.Map;
 import org.apache.commons.fileupload.FileItem;
 import org.apache.commons.fileupload.disk.DiskFileItemFactory;
 import org.apache.commons.fileupload.servlet.ServletFileUpload;
+import org.apache.commons.io.FileCleaningTracker;
 import org.apache.commons.io.input.CloseShieldInputStream;
 import org.apache.lucene.util.IOUtils;
 import org.apache.solr.common.SolrException;
@@ -58,6 +59,7 @@ import org.apache.solr.core.SolrCore;
 import org.apache.solr.request.SolrQueryRequest;
 import org.apache.solr.request.SolrQueryRequestBase;
 import org.apache.solr.util.RTimerTree;
+import org.apache.solr.util.SolrFileCleaningTracker;
 
 import static org.apache.solr.common.params.CommonParams.PATH;
 
@@ -88,6 +90,8 @@ public class SolrRequestParsers
   /** Default instance for e.g. admin requests. Limits to 2 MB uploads and does not allow remote streams. */
   public static final SolrRequestParsers DEFAULT = new SolrRequestParsers();
   
+  public static volatile SolrFileCleaningTracker fileCleaningTracker;
+  
   /**
    * Pass in an xml configuration.  A null configuration will enable
    * everything with maximum values.
@@ -532,31 +536,30 @@ public class SolrRequestParsers
   /**
    * Extract Multipart streams
    */
-  static class MultipartRequestParser implements SolrRequestParser
-  {
+  static class MultipartRequestParser implements SolrRequestParser {
     private final int uploadLimitKB;
+    private DiskFileItemFactory factory = new DiskFileItemFactory();
     
-    public MultipartRequestParser( int limit )
-    {
+    public MultipartRequestParser(int limit) {
       uploadLimitKB = limit;
+
+      // Set factory constraints
+      FileCleaningTracker fct = fileCleaningTracker;
+      if (fct != null) {
+        factory.setFileCleaningTracker(fileCleaningTracker);
+      }
+      // TODO - configure factory.setSizeThreshold(yourMaxMemorySize);
+      // TODO - configure factory.setRepository(yourTempDirectory);
     }
     
     @Override
-    public SolrParams parseParamsAndFillStreams( 
-        final HttpServletRequest req, ArrayList<ContentStream> streams ) throws Exception
-    {
+    public SolrParams parseParamsAndFillStreams(
+        final HttpServletRequest req, ArrayList<ContentStream> streams) throws Exception {
       if( !ServletFileUpload.isMultipartContent(req) ) {
         throw new SolrException( ErrorCode.BAD_REQUEST, "Not multipart content! "+req.getContentType() );
       }
       
       MultiMapSolrParams params = parseQueryString( req.getQueryString() );
-      
-      // Create a factory for disk-based file items
-      DiskFileItemFactory factory = new DiskFileItemFactory();
-
-      // Set factory constraints
-      // TODO - configure factory.setSizeThreshold(yourMaxMemorySize);
-      // TODO - configure factory.setRepository(yourTempDirectory);
 
       // Create a new file upload handler
       ServletFileUpload upload = new ServletFileUpload(factory);

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e82399d0/solr/core/src/java/org/apache/solr/util/SolrFileCleaningTracker.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/util/SolrFileCleaningTracker.java b/solr/core/src/java/org/apache/solr/util/SolrFileCleaningTracker.java
new file mode 100644
index 0000000..9c66f0f
--- /dev/null
+++ b/solr/core/src/java/org/apache/solr/util/SolrFileCleaningTracker.java
@@ -0,0 +1,147 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.util;
+
+import java.io.File;
+import java.lang.ref.PhantomReference;
+import java.lang.ref.ReferenceQueue;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.List;
+
+import org.apache.commons.io.FileCleaningTracker;
+import org.apache.commons.io.FileDeleteStrategy;
+
+public class SolrFileCleaningTracker extends FileCleaningTracker {
+
+  ReferenceQueue<Object> q = new ReferenceQueue<>();
+
+  final Collection<Tracker> trackers = Collections.synchronizedSet(new HashSet<Tracker>());
+
+  final List<String> deleteFailures = Collections.synchronizedList(new ArrayList<String>());
+
+  volatile boolean exitWhenFinished = false;
+
+  Thread reaper;
+
+  public void track(final File file, final Object marker) {
+    track(file, marker, null);
+  }
+
+  public void track(final File file, final Object marker, final FileDeleteStrategy deleteStrategy) {
+    if (file == null) {
+      throw new NullPointerException("The file must not be null");
+    }
+    addTracker(file.getPath(), marker, deleteStrategy);
+  }
+
+  public void track(final String path, final Object marker) {
+    track(path, marker, null);
+  }
+
+  public void track(final String path, final Object marker, final FileDeleteStrategy deleteStrategy) {
+    if (path == null) {
+      throw new NullPointerException("The path must not be null");
+    }
+    addTracker(path, marker, deleteStrategy);
+  }
+
+  private synchronized void addTracker(final String path, final Object marker,
+      final FileDeleteStrategy deleteStrategy) {
+    if (exitWhenFinished) {
+      throw new IllegalStateException("No new trackers can be added once exitWhenFinished() is called");
+    }
+    if (reaper == null) {
+      reaper = new Reaper();
+      reaper.start();
+    }
+    trackers.add(new Tracker(path, deleteStrategy, marker, q));
+  }
+
+  public int getTrackCount() {
+    return trackers.size();
+  }
+
+  public List<String> getDeleteFailures() {
+    return deleteFailures;
+  }
+
+  public synchronized void exitWhenFinished() {
+    // synchronized block protects reaper
+    exitWhenFinished = true;
+    if (reaper != null) {
+      synchronized (reaper) {
+        reaper.interrupt();
+        try {
+          reaper.join();
+        } catch (InterruptedException e) { 
+          Thread.currentThread().interrupt();
+        }
+      }
+    }
+  }
+
+  private final class Reaper extends Thread {
+    Reaper() {
+      super("MultiPart Upload Tmp File Reaper");
+      setDaemon(true);
+    }
+
+    @Override
+    public void run() {
+      while (exitWhenFinished == false || trackers.size() > 0) {
+        try {
+          // Wait for a tracker to remove.
+          final Tracker tracker = (Tracker) q.remove(); // cannot return null
+          trackers.remove(tracker);
+          if (!tracker.delete()) {
+            deleteFailures.add(tracker.getPath());
+          }
+          tracker.clear();
+        } catch (final InterruptedException e) {
+          Thread.currentThread().interrupt();
+          break;
+        }
+      }
+    }
+  }
+
+  private static final class Tracker extends PhantomReference<Object> {
+
+    private final String path;
+
+    private final FileDeleteStrategy deleteStrategy;
+
+    Tracker(final String path, final FileDeleteStrategy deleteStrategy, final Object marker,
+        final ReferenceQueue<? super Object> queue) {
+      super(marker, queue);
+      this.path = path;
+      this.deleteStrategy = deleteStrategy == null ? FileDeleteStrategy.NORMAL : deleteStrategy;
+    }
+
+    public String getPath() {
+      return path;
+    }
+
+    public boolean delete() {
+      return deleteStrategy.deleteQuietly(new File(path));
+    }
+  }
+
+}
\ No newline at end of file

[08/50] [abbrv] lucene-solr:jira/solr-8593: LUCENE-7563: use a compressed format for the in-heap BKD index

Posted by kr...@apache.org.

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/5e8db2e0/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextPointsWriter.java
----------------------------------------------------------------------
diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextPointsWriter.java b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextPointsWriter.java
index c06c128..9d2db89 100644
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextPointsWriter.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextPointsWriter.java
@@ -20,7 +20,6 @@ package org.apache.lucene.codecs.simpletext;
 import java.io.IOException;
 import java.util.HashMap;
 import java.util.Map;
-import java.util.function.IntFunction;
 
 import org.apache.lucene.codecs.PointsReader;
 import org.apache.lucene.codecs.PointsWriter;
@@ -33,29 +32,28 @@ import org.apache.lucene.index.SegmentWriteState;
 import org.apache.lucene.store.IndexOutput;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.BytesRefBuilder;
-import org.apache.lucene.util.bkd.BKDWriter;
 
 class SimpleTextPointsWriter extends PointsWriter {
 
-  final static BytesRef NUM_DIMS      = new BytesRef("num dims ");
-  final static BytesRef BYTES_PER_DIM = new BytesRef("bytes per dim ");
-  final static BytesRef MAX_LEAF_POINTS = new BytesRef("max leaf points ");
-  final static BytesRef INDEX_COUNT = new BytesRef("index count ");
-  final static BytesRef BLOCK_COUNT   = new BytesRef("block count ");
-  final static BytesRef BLOCK_DOC_ID  = new BytesRef("  doc ");
-  final static BytesRef BLOCK_FP      = new BytesRef("  block fp ");
-  final static BytesRef BLOCK_VALUE   = new BytesRef("  block value ");
-  final static BytesRef SPLIT_COUNT   = new BytesRef("split count ");
-  final static BytesRef SPLIT_DIM     = new BytesRef("  split dim ");
-  final static BytesRef SPLIT_VALUE   = new BytesRef("  split value ");
-  final static BytesRef FIELD_COUNT   = new BytesRef("field count ");
-  final static BytesRef FIELD_FP_NAME = new BytesRef("  field fp name ");
-  final static BytesRef FIELD_FP      = new BytesRef("  field fp ");
-  final static BytesRef MIN_VALUE     = new BytesRef("min value ");
-  final static BytesRef MAX_VALUE     = new BytesRef("max value ");
-  final static BytesRef POINT_COUNT   = new BytesRef("point count ");
-  final static BytesRef DOC_COUNT     = new BytesRef("doc count ");
-  final static BytesRef END           = new BytesRef("END");
+  public final static BytesRef NUM_DIMS      = new BytesRef("num dims ");
+  public final static BytesRef BYTES_PER_DIM = new BytesRef("bytes per dim ");
+  public final static BytesRef MAX_LEAF_POINTS = new BytesRef("max leaf points ");
+  public final static BytesRef INDEX_COUNT = new BytesRef("index count ");
+  public final static BytesRef BLOCK_COUNT   = new BytesRef("block count ");
+  public final static BytesRef BLOCK_DOC_ID  = new BytesRef("  doc ");
+  public final static BytesRef BLOCK_FP      = new BytesRef("  block fp ");
+  public final static BytesRef BLOCK_VALUE   = new BytesRef("  block value ");
+  public final static BytesRef SPLIT_COUNT   = new BytesRef("split count ");
+  public final static BytesRef SPLIT_DIM     = new BytesRef("  split dim ");
+  public final static BytesRef SPLIT_VALUE   = new BytesRef("  split value ");
+  public final static BytesRef FIELD_COUNT   = new BytesRef("field count ");
+  public final static BytesRef FIELD_FP_NAME = new BytesRef("  field fp name ");
+  public final static BytesRef FIELD_FP      = new BytesRef("  field fp ");
+  public final static BytesRef MIN_VALUE     = new BytesRef("min value ");
+  public final static BytesRef MAX_VALUE     = new BytesRef("max value ");
+  public final static BytesRef POINT_COUNT   = new BytesRef("point count ");
+  public final static BytesRef DOC_COUNT     = new BytesRef("doc count ");
+  public final static BytesRef END           = new BytesRef("END");
 
   private IndexOutput dataOut;
   final BytesRefBuilder scratch = new BytesRefBuilder();
@@ -75,105 +73,15 @@ class SimpleTextPointsWriter extends PointsWriter {
     boolean singleValuePerDoc = values.size() == values.getDocCount();
 
     // We use the normal BKDWriter, but subclass to customize how it writes the index and blocks to disk:
-    try (BKDWriter writer = new BKDWriter(writeState.segmentInfo.maxDoc(),
-                                          writeState.directory,
-                                          writeState.segmentInfo.name,
-                                          fieldInfo.getPointDimensionCount(),
-                                          fieldInfo.getPointNumBytes(),
-                                          BKDWriter.DEFAULT_MAX_POINTS_IN_LEAF_NODE,
-                                          BKDWriter.DEFAULT_MAX_MB_SORT_IN_HEAP,
-                                          values.size(),
-                                          singleValuePerDoc) {
-
-        @Override
-        protected void writeIndex(IndexOutput out, long[] leafBlockFPs, byte[] splitPackedValues) throws IOException {
-          write(out, NUM_DIMS);
-          writeInt(out, numDims);
-          newline(out);
-
-          write(out, BYTES_PER_DIM);
-          writeInt(out, bytesPerDim);
-          newline(out);
-
-          write(out, MAX_LEAF_POINTS);
-          writeInt(out, maxPointsInLeafNode);
-          newline(out);
-
-          write(out, INDEX_COUNT);
-          writeInt(out, leafBlockFPs.length);
-          newline(out);
-
-          write(out, MIN_VALUE);
-          BytesRef br = new BytesRef(minPackedValue, 0, minPackedValue.length);
-          write(out, br.toString());
-          newline(out);
-
-          write(out, MAX_VALUE);
-          br = new BytesRef(maxPackedValue, 0, maxPackedValue.length);
-          write(out, br.toString());
-          newline(out);
-
-          write(out, POINT_COUNT);
-          writeLong(out, pointCount);
-          newline(out);
-
-          write(out, DOC_COUNT);
-          writeInt(out, docsSeen.cardinality());
-          newline(out);
-
-          for(int i=0;i<leafBlockFPs.length;i++) {
-            write(out, BLOCK_FP);
-            writeLong(out, leafBlockFPs[i]);
-            newline(out);
-          }
-
-          assert (splitPackedValues.length % (1 + fieldInfo.getPointNumBytes())) == 0;
-          int count = splitPackedValues.length / (1 + fieldInfo.getPointNumBytes());
-          assert count == leafBlockFPs.length;
-
-          write(out, SPLIT_COUNT);
-          writeInt(out, count);
-          newline(out);
-
-          for(int i=0;i<count;i++) {
-            write(out, SPLIT_DIM);
-            writeInt(out, splitPackedValues[i * (1 + fieldInfo.getPointNumBytes())] & 0xff);
-            newline(out);
-            write(out, SPLIT_VALUE);
-            br = new BytesRef(splitPackedValues, 1+(i * (1+fieldInfo.getPointNumBytes())), fieldInfo.getPointNumBytes());
-            write(out, br.toString());
-            newline(out);
-          }
-        }
-
-        @Override
-        protected void writeLeafBlockDocs(IndexOutput out, int[] docIDs, int start, int count) throws IOException {
-          write(out, BLOCK_COUNT);
-          writeInt(out, count);
-          newline(out);
-          for(int i=0;i<count;i++) {
-            write(out, BLOCK_DOC_ID);
-            writeInt(out, docIDs[start+i]);
-            newline(out);
-          }
-        }
-
-        @Override
-        protected void writeCommonPrefixes(IndexOutput out, int[] commonPrefixLengths, byte[] packedValue) {
-          // NOTE: we don't do prefix coding, so we ignore commonPrefixLengths
-        }
-
-        @Override
-        protected void writeLeafBlockPackedValues(IndexOutput out, int[] commonPrefixLengths, int count, int sortedDim, IntFunction<BytesRef> packedValues) throws IOException {
-          for (int i = 0; i < count; ++i) {
-            BytesRef packedValue = packedValues.apply(i);
-            // NOTE: we don't do prefix coding, so we ignore commonPrefixLengths
-            write(out, BLOCK_VALUE);
-            write(out, packedValue.toString());
-            newline(out);
-          }
-        }
-      }) {
+    try (SimpleTextBKDWriter writer = new SimpleTextBKDWriter(writeState.segmentInfo.maxDoc(),
+                                                              writeState.directory,
+                                                              writeState.segmentInfo.name,
+                                                              fieldInfo.getPointDimensionCount(),
+                                                              fieldInfo.getPointNumBytes(),
+                                                              SimpleTextBKDWriter.DEFAULT_MAX_POINTS_IN_LEAF_NODE,
+                                                              SimpleTextBKDWriter.DEFAULT_MAX_MB_SORT_IN_HEAP,
+                                                              values.size(),
+                                                              singleValuePerDoc)) {
 
       values.intersect(new IntersectVisitor() {
           @Override
@@ -198,26 +106,6 @@ class SimpleTextPointsWriter extends PointsWriter {
     }
   }
 
-  private void write(IndexOutput out, String s) throws IOException {
-    SimpleTextUtil.write(out, s, scratch);
-  }
-
-  private void writeInt(IndexOutput out, int x) throws IOException {
-    SimpleTextUtil.write(out, Integer.toString(x), scratch);
-  }
-
-  private void writeLong(IndexOutput out, long x) throws IOException {
-    SimpleTextUtil.write(out, Long.toString(x), scratch);
-  }
-
-  private void write(IndexOutput out, BytesRef b) throws IOException {
-    SimpleTextUtil.write(out, b);
-  }
-
-  private void newline(IndexOutput out) throws IOException {
-    SimpleTextUtil.writeNewline(out);
-  }
-
   @Override
   public void finish() throws IOException {
     SimpleTextUtil.write(dataOut, END);
@@ -250,4 +138,24 @@ class SimpleTextPointsWriter extends PointsWriter {
       }
     }
   }
+
+  private void write(IndexOutput out, String s) throws IOException {
+    SimpleTextUtil.write(out, s, scratch);
+  }
+
+  private void writeInt(IndexOutput out, int x) throws IOException {
+    SimpleTextUtil.write(out, Integer.toString(x), scratch);
+  }
+
+  private void writeLong(IndexOutput out, long x) throws IOException {
+    SimpleTextUtil.write(out, Long.toString(x), scratch);
+  }
+
+  private void write(IndexOutput out, BytesRef b) throws IOException {
+    SimpleTextUtil.write(out, b);
+  }
+
+  private void newline(IndexOutput out) throws IOException {
+    SimpleTextUtil.writeNewline(out);
+  }
 }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/5e8db2e0/lucene/core/src/java/org/apache/lucene/codecs/lucene60/Lucene60PointsFormat.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene60/Lucene60PointsFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene60/Lucene60PointsFormat.java
index e558d0d..1d2285c 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene60/Lucene60PointsFormat.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene60/Lucene60PointsFormat.java
@@ -28,7 +28,8 @@ import org.apache.lucene.index.SegmentWriteState;
 
 /**
  * Lucene 6.0 point format, which encodes dimensional values in a block KD-tree structure
- * for fast shape intersection filtering. See <a href="https://www.cs.duke.edu/~pankaj/publications/papers/bkd-sstd.pdf">this paper</a> for details.
+ * for fast 1D range and N dimesional shape intersection filtering.
+ * See <a href="https://www.cs.duke.edu/~pankaj/publications/papers/bkd-sstd.pdf">this paper</a> for details.
  *
  * <p>This data structure is written as a series of blocks on disk, with an in-memory perfectly balanced
  * binary tree of split values referencing those blocks at the leaves.
@@ -50,10 +51,13 @@ import org.apache.lucene.index.SegmentWriteState;
  *  <li> maxPointsInLeafNode (vInt)
  *  <li> bytesPerDim (vInt)
  *  <li> count (vInt)
- *  <li> byte[bytesPerDim]<sup>count</sup> (packed <code>byte[]</code> all split values)
- *  <li> delta-blockFP (vLong)<sup>count</sup> (delta-coded file pointers to the on-disk leaf blocks))
+ *  <li> packed index (byte[])
  * </ul>
  *
+ * <p>The packed index uses hierarchical delta and prefix coding to compactly encode the file pointer for
+ * all leaf blocks, once the tree is traversed, as well as the split dimension and split value for each
+ * inner node of the tree.
+ *
  * <p>After all fields blocks + index data are written, {@link CodecUtil#writeFooter} writes the checksum.
  *
  * <p>The <code>.dii</code> file records the file pointer in the <code>.dim</code> file where each field's

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/5e8db2e0/lucene/core/src/java/org/apache/lucene/codecs/lucene60/package-info.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene60/package-info.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene60/package-info.java
index 8968a6d..a914001 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene60/package-info.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene60/package-info.java
@@ -16,7 +16,7 @@
  */
 
 /**
- * Components from the Lucene 6.0 index format.  See {@link org.apache.lucene.codecs.lucene62}
- * for an overview of the index format.
+ * Components from the Lucene 6.0 index format.  See {@link org.apache.lucene.codecs.lucene70}
+ * for an overview of the current index format.
  */
 package org.apache.lucene.codecs.lucene60;

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/5e8db2e0/lucene/core/src/java/org/apache/lucene/codecs/lucene62/package-info.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene62/package-info.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene62/package-info.java
index 2fe2dc7..fb55673 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene62/package-info.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene62/package-info.java
@@ -17,8 +17,8 @@
 
 /**
  * Components from the Lucene 6.2 index format
- * See {@link org.apache.lucene.codecs.lucene62} for an overview
- * of the index format.
+ * See {@link org.apache.lucene.codecs.lucene70} for an overview
+ * of the current index format.
  */
 
 package org.apache.lucene.codecs.lucene62;

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/5e8db2e0/lucene/core/src/java/org/apache/lucene/codecs/lucene70/package-info.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene70/package-info.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene70/package-info.java
index 9b432f7..cab2859 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene70/package-info.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene70/package-info.java
@@ -185,6 +185,12 @@
  * {@link org.apache.lucene.codecs.lucene50.Lucene50LiveDocsFormat Live documents}. 
  * An optional file indicating which documents are live.
  * </li>
+ * <li>
+ * {@link org.apache.lucene.codecs.lucene60.Lucene60PointsFormat Point values}.
+ * Optional pair of files, recording dimensionally indexed fields, to enable fast
+ * numeric range filtering and large numeric values like BigInteger and BigDecimal (1D)
+ * and geographic shape intersection (2D, 3D).
+ * </li>
  * </ul>
  * <p>Details on each of these are provided in their linked pages.</p>
  * </div>
@@ -300,7 +306,12 @@
  * <tr>
  * <td>{@link org.apache.lucene.codecs.lucene50.Lucene50LiveDocsFormat Live Documents}</td>
  * <td>.liv</td>
- * <td>Info about what files are live</td>
+ * <td>Info about what documents are live</td>
+ * </tr>
+ * <tr>
+ * <td>{@link org.apache.lucene.codecs.lucene60.Lucene60PointsFormat Point values}</td>
+ * <td>.dii, .dim</td>
+ * <td>Holds indexed points, if any</td>
  * </tr>
  * </table>
  * </div>
@@ -374,6 +385,8 @@
  * that is suitable for faceting/sorting/analytics.
  * <li>In version 5.4, DocValues have been improved to store more information on disk:
  * addresses for binary fields and ord indexes for multi-valued fields.
+ * <li>In version 6.0, Points were added, for multi-dimensional range/distance search.
+ * <li>In version 6.2, new Segment info format that reads/writes the index sort, to support index sorting.
  * <li>In version 7.0, DocValues have been improved to better support sparse doc values
  * thanks to an iterator API.
  * </li>

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/5e8db2e0/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java b/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
index 7bc08f3..fd8011d 100644
--- a/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
+++ b/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
@@ -1801,161 +1801,32 @@ public final class CheckIndex implements Closeable {
         }
         for (FieldInfo fieldInfo : fieldInfos) {
           if (fieldInfo.getPointDimensionCount() > 0) {
-            FixedBitSet docsSeen = new FixedBitSet(reader.maxDoc());
-            status.totalValueFields++;
-            int dimCount = fieldInfo.getPointDimensionCount();
-            int bytesPerDim = fieldInfo.getPointNumBytes();
-            int packedBytesCount = dimCount * bytesPerDim;
-            byte[] lastMinPackedValue = new byte[packedBytesCount];
-            byte[] lastMaxPackedValue = new byte[packedBytesCount];
-            BytesRef scratch = new BytesRef();
-            scratch.length = bytesPerDim;
-            byte[] lastPackedValue = new byte[packedBytesCount];
-
-            long[] pointCountSeen = new long[1];
-
             PointValues values = pointsReader.getValues(fieldInfo.name);
             if (values == null) {
               continue;
             }
-            byte[] globalMinPackedValue = values.getMinPackedValue();
+
+            status.totalValueFields++;
+
             long size = values.size();
             int docCount = values.getDocCount();
 
-            if (docCount > size) {
-              throw new RuntimeException("point values for field \"" + fieldInfo.name + "\" claims to have size=" + size + " points and inconsistent docCount=" + docCount);
-            }
+            VerifyPointsVisitor visitor = new VerifyPointsVisitor(fieldInfo.name, reader.maxDoc(), values);
+            values.intersect(visitor);
 
-            if (docCount > reader.maxDoc()) {
-              throw new RuntimeException("point values for field \"" + fieldInfo.name + "\" claims to have docCount=" + docCount + " but that's greater than maxDoc=" + reader.maxDoc());
+            if (visitor.getPointCountSeen() != size) {
+              throw new RuntimeException("point values for field \"" + fieldInfo.name + "\" claims to have size=" + size + " points, but in fact has " + visitor.getPointCountSeen());
             }
 
-            if (globalMinPackedValue == null) {
-              if (size != 0) {
-                throw new RuntimeException("getMinPackedValue is null points for field \"" + fieldInfo.name + "\" yet size=" + size);
-              }
-            } else if (globalMinPackedValue.length != packedBytesCount) {
-              throw new RuntimeException("getMinPackedValue for field \"" + fieldInfo.name + "\" return length=" + globalMinPackedValue.length + " array, but should be " + packedBytesCount);
-            }
-            byte[] globalMaxPackedValue = values.getMaxPackedValue();
-            if (globalMaxPackedValue == null) {
-              if (size != 0) {
-                throw new RuntimeException("getMaxPackedValue is null points for field \"" + fieldInfo.name + "\" yet size=" + size);
-              }
-            } else if (globalMaxPackedValue.length != packedBytesCount) {
-              throw new RuntimeException("getMaxPackedValue for field \"" + fieldInfo.name + "\" return length=" + globalMaxPackedValue.length + " array, but should be " + packedBytesCount);
-            }
-
-            values.intersect(new PointValues.IntersectVisitor() {
-
-                               private int lastDocID = -1;
-
-                               @Override
-                               public void visit(int docID) {
-                                 throw new RuntimeException("codec called IntersectVisitor.visit without a packed value for docID=" + docID);
-                               }
-
-                               @Override
-                               public void visit(int docID, byte[] packedValue) {
-                                 checkPackedValue("packed value", packedValue, docID);
-                                 pointCountSeen[0]++;
-                                 docsSeen.set(docID);
-
-                                 for(int dim=0;dim<dimCount;dim++) {
-                                   int offset = bytesPerDim * dim;
-
-                                   // Compare to last cell:
-                                   if (StringHelper.compare(bytesPerDim, packedValue, offset, lastMinPackedValue, offset) < 0) {
-                                     // This doc's point, in this dimension, is lower than the minimum value of the last cell checked:
-                                     throw new RuntimeException("packed points value " + Arrays.toString(packedValue) + " for field=\"" + fieldInfo.name + "\", docID=" + docID + " is out-of-bounds of the last cell min=" + Arrays.toString(lastMinPackedValue) + " max=" + Arrays.toString(lastMaxPackedValue) + " dim=" + dim);
-                                   }
-
-                                   if (StringHelper.compare(bytesPerDim, packedValue, offset, lastMaxPackedValue, offset) > 0) {
-                                     // This doc's point, in this dimension, is greater than the maximum value of the last cell checked:
-                                     throw new RuntimeException("packed points value " + Arrays.toString(packedValue) + " for field=\"" + fieldInfo.name + "\", docID=" + docID + " is out-of-bounds of the last cell min=" + Arrays.toString(lastMinPackedValue) + " max=" + Arrays.toString(lastMaxPackedValue) + " dim=" + dim);
-                                   }
-                                 }
-
-                                 // In the 1D case, PointValues must make a single in-order sweep through all values, and tie-break by
-                                 // increasing docID:
-                                 if (dimCount == 1) {
-                                   int cmp = StringHelper.compare(bytesPerDim, lastPackedValue, 0, packedValue, 0);
-                                   if (cmp > 0) {
-                                     throw new RuntimeException("packed points value " + Arrays.toString(packedValue) + " for field=\"" + fieldInfo.name + "\", for docID=" + docID + " is out-of-order vs the previous document's value " + Arrays.toString(lastPackedValue));
-                                   } else if (cmp == 0) {
-                                     if (docID < lastDocID) {
-                                       throw new RuntimeException("packed points value is the same, but docID=" + docID + " is out of order vs previous docID=" + lastDocID + ", field=\"" + fieldInfo.name + "\"");
-                                     }
-                                   }
-                                   System.arraycopy(packedValue, 0, lastPackedValue, 0, bytesPerDim);
-                                   lastDocID = docID;
-                                 }
-
-                                 status.totalValuePoints++;
-                               }
-
-                               @Override
-                               public PointValues.Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
-                                 checkPackedValue("min packed value", minPackedValue, -1);
-                                 System.arraycopy(minPackedValue, 0, lastMinPackedValue, 0, packedBytesCount);
-                                 checkPackedValue("max packed value", maxPackedValue, -1);
-                                 System.arraycopy(maxPackedValue, 0, lastMaxPackedValue, 0, packedBytesCount);
-
-                                 for(int dim=0;dim<dimCount;dim++) {
-                                   int offset = bytesPerDim * dim;
-
-                                   if (StringHelper.compare(bytesPerDim, minPackedValue, offset, maxPackedValue, offset) > 0) {
-                                     throw new RuntimeException("packed points cell minPackedValue " + Arrays.toString(minPackedValue) +
-                                                                " is out-of-bounds of the cell's maxPackedValue " + Arrays.toString(maxPackedValue) + " dim=" + dim + " field=\"" + fieldInfo.name + "\"");
-                                   }
-
-                                   // Make sure this cell is not outside of the global min/max:
-                                   if (StringHelper.compare(bytesPerDim, minPackedValue, offset, globalMinPackedValue, offset) < 0) {
-                                     throw new RuntimeException("packed points cell minPackedValue " + Arrays.toString(minPackedValue) +
-                                                                " is out-of-bounds of the global minimum " + Arrays.toString(globalMinPackedValue) + " dim=" + dim + " field=\"" + fieldInfo.name + "\"");
-                                   }
-
-                                   if (StringHelper.compare(bytesPerDim, maxPackedValue, offset, globalMinPackedValue, offset) < 0) {
-                                     throw new RuntimeException("packed points cell maxPackedValue " + Arrays.toString(maxPackedValue) +
-                                                                " is out-of-bounds of the global minimum " + Arrays.toString(globalMinPackedValue) + " dim=" + dim + " field=\"" + fieldInfo.name + "\"");
-                                   }
-
-                                   if (StringHelper.compare(bytesPerDim, minPackedValue, offset, globalMaxPackedValue, offset) > 0) {
-                                     throw new RuntimeException("packed points cell minPackedValue " + Arrays.toString(minPackedValue) +
-                                                                " is out-of-bounds of the global maximum " + Arrays.toString(globalMaxPackedValue) + " dim=" + dim + " field=\"" + fieldInfo.name + "\"");
-                                   }
-                                   if (StringHelper.compare(bytesPerDim, maxPackedValue, offset, globalMaxPackedValue, offset) > 0) {
-                                     throw new RuntimeException("packed points cell maxPackedValue " + Arrays.toString(maxPackedValue) +
-                                                                " is out-of-bounds of the global maximum " + Arrays.toString(globalMaxPackedValue) + " dim=" + dim + " field=\"" + fieldInfo.name + "\"");
-                                   }
-                                 }                                   
-
-                                 // We always pretend the query shape is so complex that it crosses every cell, so
-                                 // that packedValue is passed for every document
-                                 return PointValues.Relation.CELL_CROSSES_QUERY;
-                               }
-
-                               private void checkPackedValue(String desc, byte[] packedValue, int docID) {
-                                 if (packedValue == null) {
-                                   throw new RuntimeException(desc + " is null for docID=" + docID + " field=\"" + fieldInfo.name + "\"");
-                                 }
-
-                                 if (packedValue.length != packedBytesCount) {
-                                   throw new RuntimeException(desc + " has incorrect length=" + packedValue.length + " vs expected=" + packedBytesCount + " for docID=" + docID + " field=\"" + fieldInfo.name + "\"");
-                                 }
-                               }
-                             });
-
-            if (pointCountSeen[0] != size) {
-              throw new RuntimeException("point values for field \"" + fieldInfo.name + "\" claims to have size=" + size + " points, but in fact has " + pointCountSeen[0]);
+            if (visitor.getDocCountSeen() != docCount) {
+              throw new RuntimeException("point values for field \"" + fieldInfo.name + "\" claims to have docCount=" + docCount + " but in fact has " + visitor.getDocCountSeen());
             }
 
-            if (docsSeen.cardinality() != docCount) {
-              throw new RuntimeException("point values for field \"" + fieldInfo.name + "\" claims to have docCount=" + docCount + " but in fact has " + docsSeen.cardinality());
-            }
+            status.totalValuePoints += visitor.getPointCountSeen();
           }
         }
       }
+
       msg(infoStream, String.format(Locale.ROOT, "OK [%d fields, %d points] [took %.3f sec]", status.totalValueFields, status.totalValuePoints, nsToSec(System.nanoTime()-startNS)));
 
     } catch (Throwable e) {
@@ -1972,6 +1843,167 @@ public final class CheckIndex implements Closeable {
     return status;
   }
 
+  /** Walks the entire N-dimensional points space, verifying that all points fall within the last cell's boundaries.
+   *
+   * @lucene.internal */
+  public static class VerifyPointsVisitor implements PointValues.IntersectVisitor {
+    private long pointCountSeen;
+    private int lastDocID = -1;
+    private final int maxDoc;
+    private final FixedBitSet docsSeen;
+    private final byte[] lastMinPackedValue;
+    private final byte[] lastMaxPackedValue;
+    private final byte[] lastPackedValue;
+    private final byte[] globalMinPackedValue;
+    private final byte[] globalMaxPackedValue;
+    private final int packedBytesCount;
+    private final int numDims;
+    private final int bytesPerDim;
+    private final String fieldName;
+
+    /** Sole constructor */
+    public VerifyPointsVisitor(String fieldName, int maxDoc, PointValues values) throws IOException {
+      this.maxDoc = maxDoc;
+      this.fieldName = fieldName;
+      numDims = values.getNumDimensions();
+      bytesPerDim = values.getBytesPerDimension();
+      packedBytesCount = numDims * bytesPerDim;
+      globalMinPackedValue = values.getMinPackedValue();
+      globalMaxPackedValue = values.getMaxPackedValue();
+      docsSeen = new FixedBitSet(maxDoc);
+      lastMinPackedValue = new byte[packedBytesCount];
+      lastMaxPackedValue = new byte[packedBytesCount];
+      lastPackedValue = new byte[packedBytesCount];
+
+      if (values.getDocCount() > values.size()) {
+        throw new RuntimeException("point values for field \"" + fieldName + "\" claims to have size=" + values.size() + " points and inconsistent docCount=" + values.getDocCount());
+      }
+
+      if (values.getDocCount() > maxDoc) {
+        throw new RuntimeException("point values for field \"" + fieldName + "\" claims to have docCount=" + values.getDocCount() + " but that's greater than maxDoc=" + maxDoc);
+      }
+
+      if (globalMinPackedValue == null) {
+        if (values.size() != 0) {
+          throw new RuntimeException("getMinPackedValue is null points for field \"" + fieldName + "\" yet size=" + values.size());
+        }
+      } else if (globalMinPackedValue.length != packedBytesCount) {
+        throw new RuntimeException("getMinPackedValue for field \"" + fieldName + "\" return length=" + globalMinPackedValue.length + " array, but should be " + packedBytesCount);
+      }
+      if (globalMaxPackedValue == null) {
+        if (values.size() != 0) {
+          throw new RuntimeException("getMaxPackedValue is null points for field \"" + fieldName + "\" yet size=" + values.size());
+        }
+      } else if (globalMaxPackedValue.length != packedBytesCount) {
+        throw new RuntimeException("getMaxPackedValue for field \"" + fieldName + "\" return length=" + globalMaxPackedValue.length + " array, but should be " + packedBytesCount);
+      }
+    }
+
+    /** Returns total number of points in this BKD tree */
+    public long getPointCountSeen() {
+      return pointCountSeen;
+    }
+
+    /** Returns total number of unique docIDs in this BKD tree */
+    public long getDocCountSeen() {
+      return docsSeen.cardinality();
+    }
+
+    @Override
+    public void visit(int docID) {
+      throw new RuntimeException("codec called IntersectVisitor.visit without a packed value for docID=" + docID);
+    }
+
+    @Override
+    public void visit(int docID, byte[] packedValue) {
+      checkPackedValue("packed value", packedValue, docID);
+      pointCountSeen++;
+      docsSeen.set(docID);
+
+      for(int dim=0;dim<numDims;dim++) {
+        int offset = bytesPerDim * dim;
+
+        // Compare to last cell:
+        if (StringHelper.compare(bytesPerDim, packedValue, offset, lastMinPackedValue, offset) < 0) {
+          // This doc's point, in this dimension, is lower than the minimum value of the last cell checked:
+          throw new RuntimeException("packed points value " + Arrays.toString(packedValue) + " for field=\"" + fieldName + "\", docID=" + docID + " is out-of-bounds of the last cell min=" + Arrays.toString(lastMinPackedValue) + " max=" + Arrays.toString(lastMaxPackedValue) + " dim=" + dim);
+        }
+
+        if (StringHelper.compare(bytesPerDim, packedValue, offset, lastMaxPackedValue, offset) > 0) {
+          // This doc's point, in this dimension, is greater than the maximum value of the last cell checked:
+          throw new RuntimeException("packed points value " + Arrays.toString(packedValue) + " for field=\"" + fieldName + "\", docID=" + docID + " is out-of-bounds of the last cell min=" + Arrays.toString(lastMinPackedValue) + " max=" + Arrays.toString(lastMaxPackedValue) + " dim=" + dim);
+        }
+      }
+
+      // In the 1D case, PointValues must make a single in-order sweep through all values, and tie-break by
+      // increasing docID:
+      if (numDims == 1) {
+        int cmp = StringHelper.compare(bytesPerDim, lastPackedValue, 0, packedValue, 0);
+        if (cmp > 0) {
+          throw new RuntimeException("packed points value " + Arrays.toString(packedValue) + " for field=\"" + fieldName + "\", for docID=" + docID + " is out-of-order vs the previous document's value " + Arrays.toString(lastPackedValue));
+        } else if (cmp == 0) {
+          if (docID < lastDocID) {
+            throw new RuntimeException("packed points value is the same, but docID=" + docID + " is out of order vs previous docID=" + lastDocID + ", field=\"" + fieldName + "\"");
+          }
+        }
+        System.arraycopy(packedValue, 0, lastPackedValue, 0, bytesPerDim);
+        lastDocID = docID;
+      }
+    }
+
+    @Override
+    public PointValues.Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
+      checkPackedValue("min packed value", minPackedValue, -1);
+      System.arraycopy(minPackedValue, 0, lastMinPackedValue, 0, packedBytesCount);
+      checkPackedValue("max packed value", maxPackedValue, -1);
+      System.arraycopy(maxPackedValue, 0, lastMaxPackedValue, 0, packedBytesCount);
+
+      for(int dim=0;dim<numDims;dim++) {
+        int offset = bytesPerDim * dim;
+
+        if (StringHelper.compare(bytesPerDim, minPackedValue, offset, maxPackedValue, offset) > 0) {
+          throw new RuntimeException("packed points cell minPackedValue " + Arrays.toString(minPackedValue) +
+                                     " is out-of-bounds of the cell's maxPackedValue " + Arrays.toString(maxPackedValue) + " dim=" + dim + " field=\"" + fieldName + "\"");
+        }
+
+        // Make sure this cell is not outside of the global min/max:
+        if (StringHelper.compare(bytesPerDim, minPackedValue, offset, globalMinPackedValue, offset) < 0) {
+          throw new RuntimeException("packed points cell minPackedValue " + Arrays.toString(minPackedValue) +
+                                     " is out-of-bounds of the global minimum " + Arrays.toString(globalMinPackedValue) + " dim=" + dim + " field=\"" + fieldName + "\"");
+        }
+
+        if (StringHelper.compare(bytesPerDim, maxPackedValue, offset, globalMinPackedValue, offset) < 0) {
+          throw new RuntimeException("packed points cell maxPackedValue " + Arrays.toString(maxPackedValue) +
+                                     " is out-of-bounds of the global minimum " + Arrays.toString(globalMinPackedValue) + " dim=" + dim + " field=\"" + fieldName + "\"");
+        }
+
+        if (StringHelper.compare(bytesPerDim, minPackedValue, offset, globalMaxPackedValue, offset) > 0) {
+          throw new RuntimeException("packed points cell minPackedValue " + Arrays.toString(minPackedValue) +
+                                     " is out-of-bounds of the global maximum " + Arrays.toString(globalMaxPackedValue) + " dim=" + dim + " field=\"" + fieldName + "\"");
+        }
+        if (StringHelper.compare(bytesPerDim, maxPackedValue, offset, globalMaxPackedValue, offset) > 0) {
+          throw new RuntimeException("packed points cell maxPackedValue " + Arrays.toString(maxPackedValue) +
+                                     " is out-of-bounds of the global maximum " + Arrays.toString(globalMaxPackedValue) + " dim=" + dim + " field=\"" + fieldName + "\"");
+        }
+      }                                   
+
+      // We always pretend the query shape is so complex that it crosses every cell, so
+      // that packedValue is passed for every document
+      return PointValues.Relation.CELL_CROSSES_QUERY;
+    }
+
+    private void checkPackedValue(String desc, byte[] packedValue, int docID) {
+      if (packedValue == null) {
+        throw new RuntimeException(desc + " is null for docID=" + docID + " field=\"" + fieldName + "\"");
+      }
+
+      if (packedValue.length != packedBytesCount) {
+        throw new RuntimeException(desc + " has incorrect length=" + packedValue.length + " vs expected=" + packedBytesCount + " for docID=" + docID + " field=\"" + fieldName + "\"");
+      }
+    }
+  }
+
+  
   /**
    * Test stored fields.
    * @lucene.experimental

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/5e8db2e0/lucene/core/src/java/org/apache/lucene/util/bkd/BKDReader.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDReader.java b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDReader.java
index 6bf7dfc..6cccf4c 100644
--- a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDReader.java
+++ b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDReader.java
@@ -17,14 +17,15 @@
 package org.apache.lucene.util.bkd;
 
 import java.io.IOException;
-import java.util.Arrays;
 
 import org.apache.lucene.codecs.CodecUtil;
 import org.apache.lucene.index.CorruptIndexException;
 import org.apache.lucene.index.PointValues;
+import org.apache.lucene.store.ByteArrayDataInput;
 import org.apache.lucene.store.IndexInput;
 import org.apache.lucene.util.Accountable;
 import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.MathUtil;
 import org.apache.lucene.util.RamUsageEstimator;
 import org.apache.lucene.util.StringHelper;
 
@@ -32,14 +33,12 @@ import org.apache.lucene.util.StringHelper;
  *
  * @lucene.experimental */
 
-public class BKDReader extends PointValues implements Accountable {
+public final class BKDReader extends PointValues implements Accountable {
   // Packed array of byte[] holding all split values in the full binary tree:
-  final private byte[] splitPackedValues; 
-  final long[] leafBlockFPs;
-  final private int leafNodeOffset;
+  final int leafNodeOffset;
   final int numDims;
   final int bytesPerDim;
-  final int bytesPerIndexEntry;
+  final int numLeaves;
   final IndexInput in;
   final int maxPointsInLeafNode;
   final byte[] minPackedValue;
@@ -49,6 +48,14 @@ public class BKDReader extends PointValues implements Accountable {
   final int version;
   protected final int packedBytesLength;
 
+  // Used for 6.4.0+ index format:
+  final byte[] packedIndex;
+
+  // Used for Legacy (pre-6.4.0) index format, to hold a compact form of the index:
+  final private byte[] splitPackedValues; 
+  final int bytesPerIndexEntry;
+  final long[] leafBlockFPs;
+
   /** Caller must pre-seek the provided {@link IndexInput} to the index location that {@link BKDWriter#finish} returned */
   public BKDReader(IndexInput in) throws IOException {
     version = CodecUtil.checkHeader(in, BKDWriter.CODEC_NAME, BKDWriter.VERSION_START, BKDWriter.VERSION_CURRENT);
@@ -59,7 +66,7 @@ public class BKDReader extends PointValues implements Accountable {
     packedBytesLength = numDims * bytesPerDim;
 
     // Read index:
-    int numLeaves = in.readVInt();
+    numLeaves = in.readVInt();
     assert numLeaves > 0;
     leafNodeOffset = numLeaves;
 
@@ -78,205 +85,380 @@ public class BKDReader extends PointValues implements Accountable {
     pointCount = in.readVLong();
     docCount = in.readVInt();
 
-    splitPackedValues = new byte[bytesPerIndexEntry*numLeaves];
-
-    // TODO: don't write split packed values[0]!
-    in.readBytes(splitPackedValues, 0, splitPackedValues.length);
-
-    // Read the file pointers to the start of each leaf block:
-    long[] leafBlockFPs = new long[numLeaves];
-    long lastFP = 0;
-    for(int i=0;i<numLeaves;i++) {
-      long delta = in.readVLong();
-      leafBlockFPs[i] = lastFP + delta;
-      lastFP += delta;
-    }
-
-    // Possibly rotate the leaf block FPs, if the index not fully balanced binary tree (only happens
-    // if it was created by BKDWriter.merge).  In this case the leaf nodes may straddle the two bottom
-    // levels of the binary tree:
-    if (numDims == 1 && numLeaves > 1) {
-      //System.out.println("BKDR: numLeaves=" + numLeaves);
-      int levelCount = 2;
-      while (true) {
-        //System.out.println("  cycle levelCount=" + levelCount);
-        if (numLeaves >= levelCount && numLeaves <= 2*levelCount) {
-          int lastLevel = 2*(numLeaves - levelCount);
-          assert lastLevel >= 0;
-          /*
-          System.out.println("BKDR: lastLevel=" + lastLevel + " vs " + levelCount);
-          System.out.println("FPs before:");
-          for(int i=0;i<leafBlockFPs.length;i++) {
-            System.out.println("  " + i + " " + leafBlockFPs[i]);
-          }
-          */
-          if (lastLevel != 0) {
-            // Last level is partially filled, so we must rotate the leaf FPs to match.  We do this here, after loading
-            // at read-time, so that we can still delta code them on disk at write:
-            //System.out.println("BKDR: now rotate index");
-            long[] newLeafBlockFPs = new long[numLeaves];
-            System.arraycopy(leafBlockFPs, lastLevel, newLeafBlockFPs, 0, leafBlockFPs.length - lastLevel);
-            System.arraycopy(leafBlockFPs, 0, newLeafBlockFPs, leafBlockFPs.length - lastLevel, lastLevel);
-            leafBlockFPs = newLeafBlockFPs;
-          }
-          /*
-          System.out.println("FPs:");
-          for(int i=0;i<leafBlockFPs.length;i++) {
-            System.out.println("  " + i + " " + leafBlockFPs[i]);
+    if (version >= BKDWriter.VERSION_PACKED_INDEX) {
+      int numBytes = in.readVInt();
+      packedIndex = new byte[numBytes];
+      in.readBytes(packedIndex, 0, numBytes);
+      leafBlockFPs = null;
+      splitPackedValues = null;
+    } else {
+      // legacy un-packed index
+
+      splitPackedValues = new byte[bytesPerIndexEntry*numLeaves];
+
+      in.readBytes(splitPackedValues, 0, splitPackedValues.length);
+
+      // Read the file pointers to the start of each leaf block:
+      long[] leafBlockFPs = new long[numLeaves];
+      long lastFP = 0;
+      for(int i=0;i<numLeaves;i++) {
+        long delta = in.readVLong();
+        leafBlockFPs[i] = lastFP + delta;
+        lastFP += delta;
+      }
+
+      // Possibly rotate the leaf block FPs, if the index not fully balanced binary tree (only happens
+      // if it was created by BKDWriter.merge or OneDimWriter).  In this case the leaf nodes may straddle the two bottom
+      // levels of the binary tree:
+      if (numDims == 1 && numLeaves > 1) {
+        int levelCount = 2;
+        while (true) {
+          if (numLeaves >= levelCount && numLeaves <= 2*levelCount) {
+            int lastLevel = 2*(numLeaves - levelCount);
+            assert lastLevel >= 0;
+            if (lastLevel != 0) {
+              // Last level is partially filled, so we must rotate the leaf FPs to match.  We do this here, after loading
+              // at read-time, so that we can still delta code them on disk at write:
+              long[] newLeafBlockFPs = new long[numLeaves];
+              System.arraycopy(leafBlockFPs, lastLevel, newLeafBlockFPs, 0, leafBlockFPs.length - lastLevel);
+              System.arraycopy(leafBlockFPs, 0, newLeafBlockFPs, leafBlockFPs.length - lastLevel, lastLevel);
+              leafBlockFPs = newLeafBlockFPs;
+            }
+            break;
           }
-          */
-          break;
-        }
 
-        levelCount *= 2;
+          levelCount *= 2;
+        }
       }
+      
+      this.leafBlockFPs = leafBlockFPs;
+      packedIndex = null;
     }
 
-    this.leafBlockFPs = leafBlockFPs;
     this.in = in;
   }
 
-  /** Called by consumers that have their own on-disk format for the index (e.g. SimpleText) */
-  protected BKDReader(IndexInput in, int numDims, int maxPointsInLeafNode, int bytesPerDim, long[] leafBlockFPs, byte[] splitPackedValues,
-                      byte[] minPackedValue, byte[] maxPackedValue, long pointCount, int docCount) throws IOException {
-    this.in = in;
-    this.numDims = numDims;
-    this.maxPointsInLeafNode = maxPointsInLeafNode;
-    this.bytesPerDim = bytesPerDim;
-    // no version check here because callers of this API (SimpleText) have no back compat:
-    bytesPerIndexEntry = numDims == 1 ? bytesPerDim : bytesPerDim + 1;
-    packedBytesLength = numDims * bytesPerDim;
-    this.leafNodeOffset = leafBlockFPs.length;
-    this.leafBlockFPs = leafBlockFPs;
-    this.splitPackedValues = splitPackedValues;
-    this.minPackedValue = minPackedValue;
-    this.maxPackedValue = maxPackedValue;
-    this.pointCount = pointCount;
-    this.docCount = docCount;
-    this.version = BKDWriter.VERSION_CURRENT;
-    assert minPackedValue.length == packedBytesLength;
-    assert maxPackedValue.length == packedBytesLength;
+  long getMinLeafBlockFP() {
+    if (packedIndex != null) {
+      return new ByteArrayDataInput(packedIndex).readVLong();
+    } else {
+      long minFP = Long.MAX_VALUE;
+      for(long fp : leafBlockFPs) {
+        minFP = Math.min(minFP, fp);
+      }
+      return minFP;
+    }
   }
 
-  private static class VerifyVisitor implements IntersectVisitor {
-    byte[] cellMinPacked;
-    byte[] cellMaxPacked;
-    byte[] lastPackedValue;
-    final int numDims;
-    final int bytesPerDim;
-    final int maxDoc;
+  /** Used to walk the in-heap index
+   *
+   * @lucene.internal */
+  public abstract class IndexTree implements Cloneable {
+    protected int nodeID;
+    // level is 1-based so that we can do level-1 w/o checking each time:
+    protected int level;
+    protected int splitDim;
+    protected final byte[][] splitPackedValueStack;
+
+    protected IndexTree() {
+      int treeDepth = getTreeDepth();
+      splitPackedValueStack = new byte[treeDepth+1][];
+      nodeID = 1;
+      level = 1;
+      splitPackedValueStack[level] = new byte[packedBytesLength];
+    }      
+
+    public void pushLeft() {
+      nodeID *= 2;
+      level++;
+      if (splitPackedValueStack[level] == null) {
+        splitPackedValueStack[level] = new byte[packedBytesLength];
+      }
+    }
+
+    /** Clone, but you are not allowed to pop up past the point where the clone happened. */
+    public abstract IndexTree clone();
+    
+    public void pushRight() {
+      nodeID = nodeID * 2 + 1;
+      level++;
+      if (splitPackedValueStack[level] == null) {
+        splitPackedValueStack[level] = new byte[packedBytesLength];
+      }
+    }
+
+    public void pop() {
+      nodeID /= 2;
+      level--;
+      splitDim = -1;
+      //System.out.println("  pop nodeID=" + nodeID);
+    }
 
-    public VerifyVisitor(int numDims, int bytesPerDim, int maxDoc) {
-      this.numDims = numDims;
-      this.bytesPerDim = bytesPerDim;
-      this.maxDoc = maxDoc;
+    public boolean isLeafNode() {
+      return nodeID >= leafNodeOffset;
     }
 
-    @Override
-    public void visit(int docID) {
-      throw new UnsupportedOperationException();
+    public boolean nodeExists() {
+      return nodeID - leafNodeOffset < leafNodeOffset;
+    }
+
+    public int getNodeID() {
+      return nodeID;
+    }
+
+    public byte[] getSplitPackedValue() {
+      assert isLeafNode() == false;
+      assert splitPackedValueStack[level] != null: "level=" + level;
+      return splitPackedValueStack[level];
+    }
+                                                       
+    /** Only valid after pushLeft or pushRight, not pop! */
+    public int getSplitDim() {
+      assert isLeafNode() == false;
+      return splitDim;
+    }
+
+    /** Only valid after pushLeft or pushRight, not pop! */
+    public abstract BytesRef getSplitDimValue();
+    
+    /** Only valid after pushLeft or pushRight, not pop! */
+    public abstract long getLeafBlockFP();
+  }
+
+  /** Reads the original simple yet heap-heavy index format */
+  private final class LegacyIndexTree extends IndexTree {
+
+    private long leafBlockFP;
+    private final byte[] splitDimValue = new byte[bytesPerDim];
+    private final BytesRef scratch = new BytesRef();
+
+    public LegacyIndexTree() {
+      setNodeData();
+      scratch.bytes = splitDimValue;
+      scratch.length = bytesPerDim;
     }
 
     @Override
-    public void visit(int docID, byte[] packedValue) {
-      if (docID < 0 || docID >= maxDoc) {
-        throw new RuntimeException("docID=" + docID + " is out of bounds of 0.." + maxDoc);
-      }
-      for(int dim=0;dim<numDims;dim++) {
-        if (StringHelper.compare(bytesPerDim, cellMinPacked, dim*bytesPerDim, packedValue, dim*bytesPerDim) > 0) {
-          throw new RuntimeException("value=" + new BytesRef(packedValue, dim*bytesPerDim, bytesPerDim) + " for docID=" + docID + " dim=" + dim + " is less than this leaf block's minimum=" + new BytesRef(cellMinPacked, dim*bytesPerDim, bytesPerDim));
-        }
-        if (StringHelper.compare(bytesPerDim, cellMaxPacked, dim*bytesPerDim, packedValue, dim*bytesPerDim) < 0) {
-          throw new RuntimeException("value=" + new BytesRef(packedValue, dim*bytesPerDim, bytesPerDim) + " for docID=" + docID + " dim=" + dim + " is greater than this leaf block's maximum=" + new BytesRef(cellMaxPacked, dim*bytesPerDim, bytesPerDim));
-        }
-      }
+    public LegacyIndexTree clone() {
+      LegacyIndexTree index = new LegacyIndexTree();
+      index.nodeID = nodeID;
+      index.level = level;
+      index.splitDim = splitDim;
+      index.leafBlockFP = leafBlockFP;
+      index.splitPackedValueStack[index.level] = splitPackedValueStack[index.level].clone();
+
+      return index;
+    }
+    
+    @Override
+    public void pushLeft() {
+      super.pushLeft();
+      setNodeData();
+    }
+    
+    @Override
+    public void pushRight() {
+      super.pushRight();
+      setNodeData();
+    }
 
-      if (numDims == 1) {
-        // With only 1D, all values should always be in sorted order
-        if (lastPackedValue == null) {
-          lastPackedValue = Arrays.copyOf(packedValue, packedValue.length);
-        } else if (StringHelper.compare(bytesPerDim, lastPackedValue, 0, packedValue, 0) > 0) {
-          throw new RuntimeException("value=" + new BytesRef(packedValue) + " for docID=" + docID + " dim=0" + " sorts before last value=" + new BytesRef(lastPackedValue));
+    private void setNodeData() {
+      if (isLeafNode()) {
+        leafBlockFP = leafBlockFPs[nodeID - leafNodeOffset];
+        splitDim = -1;
+      } else {
+        leafBlockFP = -1;
+        int address = nodeID * bytesPerIndexEntry;
+        if (numDims == 1) {
+          splitDim = 0;
+          if (version < BKDWriter.VERSION_IMPLICIT_SPLIT_DIM_1D) {
+            // skip over wastefully encoded 0 splitDim:
+            assert splitPackedValues[address] == 0;
+            address++;
+          }
         } else {
-          System.arraycopy(packedValue, 0, lastPackedValue, 0, bytesPerDim);
+          splitDim = splitPackedValues[address++] & 0xff;
         }
+        System.arraycopy(splitPackedValues, address, splitDimValue, 0, bytesPerDim);
       }
     }
 
     @Override
-    public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
-      throw new UnsupportedOperationException();
+    public long getLeafBlockFP() {
+      assert isLeafNode();
+      return leafBlockFP;
+    }
+
+    @Override
+    public BytesRef getSplitDimValue() {
+      assert isLeafNode() == false;
+      return scratch;
     }
-  }
 
-  /** Only used for debugging, to make sure all values in each leaf block fall within the range expected by the index */
-  // TODO: maybe we can get this into CheckIndex?
-  public void verify(int maxDoc) throws IOException {
-    //System.out.println("BKDR.verify this=" + this);
-    // Visits every doc in every leaf block and confirms that
-    // their values agree with the index:
-    byte[] rootMinPacked = new byte[packedBytesLength];
-    byte[] rootMaxPacked = new byte[packedBytesLength];
-    Arrays.fill(rootMaxPacked, (byte) 0xff);
-    verify(getIntersectState(new VerifyVisitor(numDims, bytesPerDim, maxDoc)), 1, rootMinPacked, rootMaxPacked);
+    @Override
+    public void pop() {
+      super.pop();
+      leafBlockFP = -1;
+    }
   }
 
-  private void verify(IntersectState state, int nodeID, byte[] cellMinPacked, byte[] cellMaxPacked) throws IOException {
+  /** Reads the new packed byte[] index format which can be up to ~63% smaller than the legacy index format on 20M NYC taxis tests.  This
+   *  format takes advantage of the limited access pattern to the BKD tree at search time, i.e. starting at the root node and recursing
+   *  downwards one child at a time. */
+  private final class PackedIndexTree extends IndexTree {
+    // used to read the packed byte[]
+    private final ByteArrayDataInput in;
+    // holds the minimum (left most) leaf block file pointer for each level we've recursed to:
+    private final long[] leafBlockFPStack;
+    // holds the address, in the packed byte[] index, of the left-node of each level:
+    private final int[] leftNodePositions;
+    // holds the address, in the packed byte[] index, of the right-node of each level:
+    private final int[] rightNodePositions;
+    // holds the splitDim for each level:
+    private final int[] splitDims;
+    // true if the per-dim delta we read for the node at this level is a negative offset vs. the last split on this dim; this is a packed
+    // 2D array, i.e. to access array[level][dim] you read from negativeDeltas[level*numDims+dim].  this will be true if the last time we
+    // split on this dimension, we next pushed to the left sub-tree:
+    private final boolean[] negativeDeltas;
+    // holds the packed per-level split values; the intersect method uses this to save the cell min/max as it recurses:
+    private final byte[][] splitValuesStack;
+    // scratch value to return from getPackedValue:
+    private final BytesRef scratch;
+
+    public PackedIndexTree() {
+      int treeDepth = getTreeDepth();
+      leafBlockFPStack = new long[treeDepth+1];
+      leftNodePositions = new int[treeDepth+1];
+      rightNodePositions = new int[treeDepth+1];
+      splitValuesStack = new byte[treeDepth+1][];
+      splitDims = new int[treeDepth+1];
+      negativeDeltas = new boolean[numDims*(treeDepth+1)];
+
+      in = new ByteArrayDataInput(packedIndex);
+      splitValuesStack[0] = new byte[packedBytesLength];
+      readNodeData(false);
+      scratch = new BytesRef();
+      scratch.length = bytesPerDim;
+    }
 
-    if (nodeID >= leafNodeOffset) {
-      int leafID = nodeID - leafNodeOffset;
+    @Override
+    public PackedIndexTree clone() {
+      PackedIndexTree index = new PackedIndexTree();
+      index.nodeID = nodeID;
+      index.level = level;
+      index.splitDim = splitDim;
+      System.arraycopy(negativeDeltas, level*numDims, index.negativeDeltas, level*numDims, numDims);
+      index.leafBlockFPStack[level] = leafBlockFPStack[level];
+      index.leftNodePositions[level] = leftNodePositions[level];
+      index.rightNodePositions[level] = rightNodePositions[level];
+      index.splitValuesStack[index.level] = splitValuesStack[index.level].clone();
+      System.arraycopy(negativeDeltas, level*numDims, index.negativeDeltas, level*numDims, numDims);
+      index.splitDims[level] = splitDims[level];
+      return index;
+    }
 
-      // In the unbalanced case it's possible the left most node only has one child:
-      if (leafID < leafBlockFPs.length) {
-        //System.out.println("CHECK nodeID=" + nodeID + " leaf=" + (nodeID-leafNodeOffset) + " offset=" + leafNodeOffset + " fp=" + leafBlockFPs[leafID]);
-        //System.out.println("BKDR.verify leafID=" + leafID + " nodeID=" + nodeID + " fp=" + leafBlockFPs[leafID] + " min=" + new BytesRef(cellMinPacked) + " max=" + new BytesRef(cellMaxPacked));
+    @Override
+    public void pushLeft() {
+      int nodePosition = leftNodePositions[level];
+      super.pushLeft();
+      System.arraycopy(negativeDeltas, (level-1)*numDims, negativeDeltas, level*numDims, numDims);
+      assert splitDim != -1;
+      negativeDeltas[level*numDims+splitDim] = true;
+      in.setPosition(nodePosition);
+      readNodeData(true);
+    }
+    
+    @Override
+    public void pushRight() {
+      int nodePosition = rightNodePositions[level];
+      super.pushRight();
+      System.arraycopy(negativeDeltas, (level-1)*numDims, negativeDeltas, level*numDims, numDims);
+      assert splitDim != -1;
+      negativeDeltas[level*numDims+splitDim] = false;
+      in.setPosition(nodePosition);
+      readNodeData(false);
+    }
 
-        // Leaf node: check that all values are in fact in bounds:
-        VerifyVisitor visitor = (VerifyVisitor) state.visitor;
-        visitor.cellMinPacked = cellMinPacked;
-        visitor.cellMaxPacked = cellMaxPacked;
+    @Override
+    public void pop() {
+      super.pop();
+      splitDim = splitDims[level];
+    }
 
-        int count = readDocIDs(state.in, leafBlockFPs[leafID], state.scratchDocIDs);
-        visitDocValues(state.commonPrefixLengths, state.scratchPackedValue, state.in, state.scratchDocIDs, count, state.visitor);
-      } else {
-        //System.out.println("BKDR.verify skip leafID=" + leafID);
+    @Override
+    public long getLeafBlockFP() {
+      assert isLeafNode(): "nodeID=" + nodeID + " is not a leaf";
+      return leafBlockFPStack[level];
+    }
+
+    @Override
+    public BytesRef getSplitDimValue() {
+      assert isLeafNode() == false;
+      scratch.bytes = splitValuesStack[level];
+      scratch.offset = splitDim * bytesPerDim;
+      return scratch;
+    }
+
+    private void readNodeData(boolean isLeft) {
+
+      leafBlockFPStack[level] = leafBlockFPStack[level-1];
+
+      // read leaf block FP delta
+      if (isLeft == false) {
+        leafBlockFPStack[level] += in.readVLong();
       }
-    } else {
-      // Non-leaf node:
-
-      int address = nodeID * bytesPerIndexEntry;
-      int splitDim;
-      if (numDims == 1) {
-        splitDim = 0;
-        if (version < BKDWriter.VERSION_IMPLICIT_SPLIT_DIM_1D) {
-          // skip over wastefully encoded 0 splitDim:
-          assert splitPackedValues[address] == 0;
-          address++;
-        }
+
+      if (isLeafNode()) {
+        splitDim = -1;
       } else {
-        splitDim = splitPackedValues[address++] & 0xff;
-      }
-      
-      assert splitDim < numDims;
 
-      byte[] splitPackedValue = new byte[packedBytesLength];
+        // read split dim, prefix, firstDiffByteDelta encoded as int:
+        int code = in.readVInt();
+        splitDim = code % numDims;
+        splitDims[level] = splitDim;
+        code /= numDims;
+        int prefix = code % (1+bytesPerDim);
+        int suffix = bytesPerDim - prefix;
 
-      // Recurse on left sub-tree:
-      System.arraycopy(cellMaxPacked, 0, splitPackedValue, 0, packedBytesLength);
-      System.arraycopy(splitPackedValues, address, splitPackedValue, splitDim*bytesPerDim, bytesPerDim);
-      verify(state,
-             2*nodeID,
-             cellMinPacked, splitPackedValue);
+        if (splitValuesStack[level] == null) {
+          splitValuesStack[level] = new byte[packedBytesLength];
+        }
+        System.arraycopy(splitValuesStack[level-1], 0, splitValuesStack[level], 0, packedBytesLength);
+        if (suffix > 0) {
+          int firstDiffByteDelta = code / (1+bytesPerDim);
+          if (negativeDeltas[level*numDims + splitDim]) {
+            firstDiffByteDelta = -firstDiffByteDelta;
+          }
+          int oldByte = splitValuesStack[level][splitDim*bytesPerDim+prefix] & 0xFF;
+          splitValuesStack[level][splitDim*bytesPerDim+prefix] = (byte) (oldByte + firstDiffByteDelta);
+          in.readBytes(splitValuesStack[level], splitDim*bytesPerDim+prefix+1, suffix-1);
+        } else {
+          // our split value is == last split value in this dim, which can happen when there are many duplicate values
+        }
 
-      // Recurse on right sub-tree:
-      System.arraycopy(cellMinPacked, 0, splitPackedValue, 0, packedBytesLength);
-      System.arraycopy(splitPackedValues, address, splitPackedValue, splitDim*bytesPerDim, bytesPerDim);
-      verify(state,
-             2*nodeID+1,
-             splitPackedValue, cellMaxPacked);
+        int leftNumBytes;
+        if (nodeID * 2 < leafNodeOffset) {
+          leftNumBytes = in.readVInt();
+        } else {
+          leftNumBytes = 0;
+        }
+
+        leftNodePositions[level] = in.getPosition();
+        rightNodePositions[level] = leftNodePositions[level] + leftNumBytes;
+      }
     }
   }
 
+  private int getTreeDepth() {
+    // First +1 because all the non-leave nodes makes another power
+    // of 2; e.g. to have a fully balanced tree with 4 leaves you
+    // need a depth=3 tree:
+
+    // Second +1 because MathUtil.log computes floor of the logarithm; e.g.
+    // with 5 leaves you need a depth=4 tree:
+    return MathUtil.log(numLeaves, 2) + 2;
+  }
+
   /** Used to track all state for a single call to {@link #intersect}. */
   public static final class IntersectState {
     final IndexInput in;
@@ -285,57 +467,73 @@ public class BKDReader extends PointValues implements Accountable {
     final int[] commonPrefixLengths;
 
     final IntersectVisitor visitor;
+    public final IndexTree index;
 
     public IntersectState(IndexInput in, int numDims,
                           int packedBytesLength,
                           int maxPointsInLeafNode,
-                          IntersectVisitor visitor) {
+                          IntersectVisitor visitor,
+                          IndexTree indexVisitor) {
       this.in = in;
       this.visitor = visitor;
       this.commonPrefixLengths = new int[numDims];
       this.scratchDocIDs = new int[maxPointsInLeafNode];
       this.scratchPackedValue = new byte[packedBytesLength];
+      this.index = indexVisitor;
     }
   }
 
   public void intersect(IntersectVisitor visitor) throws IOException {
-    intersect(getIntersectState(visitor), 1, minPackedValue, maxPackedValue);
+    intersect(getIntersectState(visitor), minPackedValue, maxPackedValue);
   }
 
   /** Fast path: this is called when the query box fully encompasses all cells under this node. */
-  private void addAll(IntersectState state, int nodeID) throws IOException {
+  private void addAll(IntersectState state) throws IOException {
     //System.out.println("R: addAll nodeID=" + nodeID);
 
-    if (nodeID >= leafNodeOffset) {
+    if (state.index.isLeafNode()) {
       //System.out.println("ADDALL");
-      visitDocIDs(state.in, leafBlockFPs[nodeID-leafNodeOffset], state.visitor);
+      if (state.index.nodeExists()) {
+        visitDocIDs(state.in, state.index.getLeafBlockFP(), state.visitor);
+      }
       // TODO: we can assert that the first value here in fact matches what the index claimed?
     } else {
-      addAll(state, 2*nodeID);
-      addAll(state, 2*nodeID+1);
+      state.index.pushLeft();
+      addAll(state);
+      state.index.pop();
+
+      state.index.pushRight();
+      addAll(state);
+      state.index.pop();
     }
   }
 
   /** Create a new {@link IntersectState} */
   public IntersectState getIntersectState(IntersectVisitor visitor) {
+    IndexTree index;
+    if (packedIndex != null) {
+      index = new PackedIndexTree();
+    } else {
+      index = new LegacyIndexTree();
+    }
     return new IntersectState(in.clone(), numDims,
                               packedBytesLength,
                               maxPointsInLeafNode,
-                              visitor);
+                              visitor,
+                              index);
   }
 
   /** Visits all docIDs and packed values in a single leaf block */
-  public void visitLeafBlockValues(int nodeID, IntersectState state) throws IOException {
-    int leafID = nodeID - leafNodeOffset;
+  public void visitLeafBlockValues(IndexTree index, IntersectState state) throws IOException {
 
     // Leaf node; scan and filter all points in this block:
-    int count = readDocIDs(state.in, leafBlockFPs[leafID], state.scratchDocIDs);
+    int count = readDocIDs(state.in, index.getLeafBlockFP(), state.scratchDocIDs);
 
     // Again, this time reading values and checking with the visitor
     visitDocValues(state.commonPrefixLengths, state.scratchPackedValue, state.in, state.scratchDocIDs, count, state.visitor);
   }
 
-  protected void visitDocIDs(IndexInput in, long blockFP, IntersectVisitor visitor) throws IOException {
+  private void visitDocIDs(IndexInput in, long blockFP, IntersectVisitor visitor) throws IOException {
     // Leaf node
     in.seek(blockFP);
 
@@ -350,7 +548,7 @@ public class BKDReader extends PointValues implements Accountable {
     }
   }
 
-  protected int readDocIDs(IndexInput in, long blockFP, int[] docIDs) throws IOException {
+  int readDocIDs(IndexInput in, long blockFP, int[] docIDs) throws IOException {
     in.seek(blockFP);
 
     // How many points are stored in this leaf cell:
@@ -365,7 +563,7 @@ public class BKDReader extends PointValues implements Accountable {
     return count;
   }
 
-  protected void visitDocValues(int[] commonPrefixLengths, byte[] scratchPackedValue, IndexInput in, int[] docIDs, int count, IntersectVisitor visitor) throws IOException {
+  void visitDocValues(int[] commonPrefixLengths, byte[] scratchPackedValue, IndexInput in, int[] docIDs, int count, IntersectVisitor visitor) throws IOException {
     visitor.grow(count);
 
     readCommonPrefixes(commonPrefixLengths, scratchPackedValue, in);
@@ -434,13 +632,10 @@ public class BKDReader extends PointValues implements Accountable {
     }
   }
 
-  private void intersect(IntersectState state,
-                         int nodeID,
-                         byte[] cellMinPacked, byte[] cellMaxPacked)
-    throws IOException {
+  private void intersect(IntersectState state, byte[] cellMinPacked, byte[] cellMaxPacked) throws IOException {
 
     /*
-    System.out.println("\nR: intersect nodeID=" + nodeID);
+    System.out.println("\nR: intersect nodeID=" + state.index.getNodeID());
     for(int dim=0;dim<numDims;dim++) {
       System.out.println("  dim=" + dim + "\n    cellMin=" + new BytesRef(cellMinPacked, dim*bytesPerDim, bytesPerDim) + "\n    cellMax=" + new BytesRef(cellMaxPacked, dim*bytesPerDim, bytesPerDim));
     }
@@ -450,24 +645,18 @@ public class BKDReader extends PointValues implements Accountable {
 
     if (r == Relation.CELL_OUTSIDE_QUERY) {
       // This cell is fully outside of the query shape: stop recursing
-      return;
     } else if (r == Relation.CELL_INSIDE_QUERY) {
       // This cell is fully inside of the query shape: recursively add all points in this cell without filtering
-      addAll(state, nodeID);
-      return;
-    } else {
-      // The cell crosses the shape boundary, or the cell fully contains the query, so we fall through and do full filtering
-    }
-
-    if (nodeID >= leafNodeOffset) {
+      addAll(state);
+      // The cell crosses the shape boundary, or the cell fully contains the query, so we fall through and do full filtering:
+    } else if (state.index.isLeafNode()) {
+      
       // TODO: we can assert that the first value here in fact matches what the index claimed?
-
-      int leafID = nodeID - leafNodeOffset;
       
       // In the unbalanced case it's possible the left most node only has one child:
-      if (leafID < leafBlockFPs.length) {
+      if (state.index.nodeExists()) {
         // Leaf node; scan and filter all points in this block:
-        int count = readDocIDs(state.in, leafBlockFPs[leafID], state.scratchDocIDs);
+        int count = readDocIDs(state.in, state.index.getLeafBlockFP(), state.scratchDocIDs);
 
         // Again, this time reading values and checking with the visitor
         visitDocValues(state.commonPrefixLengths, state.scratchPackedValue, state.in, state.scratchDocIDs, count, state.visitor);
@@ -476,65 +665,45 @@ public class BKDReader extends PointValues implements Accountable {
     } else {
       
       // Non-leaf node: recurse on the split left and right nodes
-
-      int address = nodeID * bytesPerIndexEntry;
-      int splitDim;
-      if (numDims == 1) {
-        splitDim = 0;
-        if (version < BKDWriter.VERSION_IMPLICIT_SPLIT_DIM_1D) {
-          // skip over wastefully encoded 0 splitDim:
-          assert splitPackedValues[address] == 0;
-          address++;
-        }
-      } else {
-        splitDim = splitPackedValues[address++] & 0xff;
-      }
-      
+      int splitDim = state.index.getSplitDim();
+      assert splitDim >= 0: "splitDim=" + splitDim;
       assert splitDim < numDims;
 
-      // TODO: can we alloc & reuse this up front?
+      byte[] splitPackedValue = state.index.getSplitPackedValue();
+      BytesRef splitDimValue = state.index.getSplitDimValue();
+      assert splitDimValue.length == bytesPerDim;
+      //System.out.println("  splitDimValue=" + splitDimValue + " splitDim=" + splitDim);
 
-      byte[] splitPackedValue = new byte[packedBytesLength];
+      // make sure cellMin <= splitValue <= cellMax:
+      assert StringHelper.compare(bytesPerDim, cellMinPacked, splitDim*bytesPerDim, splitDimValue.bytes, splitDimValue.offset) <= 0: "bytesPerDim=" + bytesPerDim + " splitDim=" + splitDim + " numDims=" + numDims;
+      assert StringHelper.compare(bytesPerDim, cellMaxPacked, splitDim*bytesPerDim, splitDimValue.bytes, splitDimValue.offset) >= 0: "bytesPerDim=" + bytesPerDim + " splitDim=" + splitDim + " numDims=" + numDims;
 
       // Recurse on left sub-tree:
       System.arraycopy(cellMaxPacked, 0, splitPackedValue, 0, packedBytesLength);
-      System.arraycopy(splitPackedValues, address, splitPackedValue, splitDim*bytesPerDim, bytesPerDim);
-      intersect(state,
-                2*nodeID,
-                cellMinPacked, splitPackedValue);
+      System.arraycopy(splitDimValue.bytes, splitDimValue.offset, splitPackedValue, splitDim*bytesPerDim, bytesPerDim);
+      state.index.pushLeft();
+      intersect(state, cellMinPacked, splitPackedValue);
+      state.index.pop();
+
+      // Restore the split dim value since it may have been overwritten while recursing:
+      System.arraycopy(splitPackedValue, splitDim*bytesPerDim, splitDimValue.bytes, splitDimValue.offset, bytesPerDim);
 
       // Recurse on right sub-tree:
       System.arraycopy(cellMinPacked, 0, splitPackedValue, 0, packedBytesLength);
-      System.arraycopy(splitPackedValues, address, splitPackedValue, splitDim*bytesPerDim, bytesPerDim);
-      intersect(state,
-                2*nodeID+1,
-                splitPackedValue, cellMaxPacked);
+      System.arraycopy(splitDimValue.bytes, splitDimValue.offset, splitPackedValue, splitDim*bytesPerDim, bytesPerDim);
+      state.index.pushRight();
+      intersect(state, splitPackedValue, cellMaxPacked);
+      state.index.pop();
     }
   }
 
-  /** Copies the split value for this node into the provided byte array */
-  public void copySplitValue(int nodeID, byte[] splitPackedValue) {
-    int address = nodeID * bytesPerIndexEntry;
-    int splitDim;
-    if (numDims == 1) {
-      splitDim = 0;
-      if (version < BKDWriter.VERSION_IMPLICIT_SPLIT_DIM_1D) {
-        // skip over wastefully encoded 0 splitDim:
-        assert splitPackedValues[address] == 0;
-        address++;
-      }
-    } else {
-      splitDim = splitPackedValues[address++] & 0xff;
-    }
-    
-    assert splitDim < numDims;
-    System.arraycopy(splitPackedValues, address, splitPackedValue, splitDim*bytesPerDim, bytesPerDim);
-  }
-
   @Override
   public long ramBytesUsed() {
-    return RamUsageEstimator.sizeOf(splitPackedValues) +
-        RamUsageEstimator.sizeOf(leafBlockFPs);
+    if (packedIndex != null) {
+      return packedIndex.length;
+    } else {
+      return RamUsageEstimator.sizeOf(splitPackedValues) + RamUsageEstimator.sizeOf(leafBlockFPs);
+    }
   }
 
   @Override

[32/50] [abbrv] lucene-solr:jira/solr-8593: LUCENE-7591 - approximate to no. of terms when DVs are not available

Posted by kr...@apache.org.

LUCENE-7591 - approximate to no. of terms when DVs are not available


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/87d8b545
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/87d8b545
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/87d8b545

Branch: refs/heads/jira/solr-8593
Commit: 87d8b5450a6d75fdd4b724b24a3722054b6d00f8
Parents: 25c7855
Author: Tommaso Teofili <to...@apache.org>
Authored: Mon Dec 12 10:00:21 2016 +0100
Committer: Tommaso Teofili <to...@apache.org>
Committed: Mon Dec 12 10:00:21 2016 +0100

----------------------------------------------------------------------
 .../org/apache/lucene/classification/utils/DatasetSplitter.java   | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/87d8b545/lucene/classification/src/java/org/apache/lucene/classification/utils/DatasetSplitter.java
----------------------------------------------------------------------
diff --git a/lucene/classification/src/java/org/apache/lucene/classification/utils/DatasetSplitter.java b/lucene/classification/src/java/org/apache/lucene/classification/utils/DatasetSplitter.java
index 8bb0b1d..7ab674e 100644
--- a/lucene/classification/src/java/org/apache/lucene/classification/utils/DatasetSplitter.java
+++ b/lucene/classification/src/java/org/apache/lucene/classification/utils/DatasetSplitter.java
@@ -94,7 +94,8 @@ public class DatasetSplitter {
         }
       }
       if (classValues == null) {
-        throw new IllegalStateException("field \"" + classFieldName + "\" must have sorted (set) doc values");
+        // approximate with no. of terms
+        noOfClasses += leave.reader().terms(classFieldName).size();
       }
       noOfClasses += valueCount;
     }

[23/50] [abbrv] lucene-solr:jira/solr-8593: fix stale comment

Posted by kr...@apache.org.

fix stale comment


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/93c11462
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/93c11462
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/93c11462

Branch: refs/heads/jira/solr-8593
Commit: 93c11462bbe2c442f20a6d090911c5a1a4546564
Parents: 1055209
Author: Mike McCandless <mi...@apache.org>
Authored: Thu Dec 8 18:17:25 2016 -0500
Committer: Mike McCandless <mi...@apache.org>
Committed: Thu Dec 8 18:17:25 2016 -0500

----------------------------------------------------------------------
 .../apache/lucene/codecs/simpletext/SimpleTextPointsWriter.java    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/93c11462/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextPointsWriter.java
----------------------------------------------------------------------
diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextPointsWriter.java b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextPointsWriter.java
index 9d2db89..c3217f3 100644
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextPointsWriter.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextPointsWriter.java
@@ -72,7 +72,7 @@ class SimpleTextPointsWriter extends PointsWriter {
     PointValues values = reader.getValues(fieldInfo.name);
     boolean singleValuePerDoc = values.size() == values.getDocCount();
 
-    // We use the normal BKDWriter, but subclass to customize how it writes the index and blocks to disk:
+    // We use our own fork of the BKDWriter to customize how it writes the index and blocks to disk:
     try (SimpleTextBKDWriter writer = new SimpleTextBKDWriter(writeState.segmentInfo.maxDoc(),
                                                               writeState.directory,
                                                               writeState.segmentInfo.name,

[46/50] [abbrv] lucene-solr:jira/solr-8593: LUCENE-7590: add sum, variance and stdev stats to NumericDVStats

Posted by kr...@apache.org.

LUCENE-7590: add sum, variance and stdev stats to NumericDVStats


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/295cab72
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/295cab72
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/295cab72

Branch: refs/heads/jira/solr-8593
Commit: 295cab7216ca76debaf4d354409741058a8641a1
Parents: e4f31fa
Author: Shai Erera <sh...@apache.org>
Authored: Thu Dec 15 12:52:37 2016 +0200
Committer: Shai Erera <sh...@apache.org>
Committed: Thu Dec 15 14:42:08 2016 +0200

----------------------------------------------------------------------
 lucene/CHANGES.txt                              |  3 +
 .../apache/lucene/search/DocValuesStats.java    | 39 +++++++++++-
 .../search/TestDocValuesStatsCollector.java     | 62 +++++++++++++++++---
 3 files changed, 95 insertions(+), 9 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/295cab72/lucene/CHANGES.txt
----------------------------------------------------------------------
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index f38c0d5..0e327d2 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -67,6 +67,9 @@ New features
 
 * LUCENE-7466: Added AxiomaticSimilarity. (Peilin Yang via Tommaso Teofili)
 
+* LUCENE-7590: Added DocValuesStatsCollector to compute statistics on DocValues
+  fields. (Shai Erera)
+
 Bug Fixes
 
 * LUCENE-7547: JapaneseTokenizerFactory was failing to close the

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/295cab72/lucene/misc/src/java/org/apache/lucene/search/DocValuesStats.java
----------------------------------------------------------------------
diff --git a/lucene/misc/src/java/org/apache/lucene/search/DocValuesStats.java b/lucene/misc/src/java/org/apache/lucene/search/DocValuesStats.java
index 998bef4..c8b7752 100644
--- a/lucene/misc/src/java/org/apache/lucene/search/DocValuesStats.java
+++ b/lucene/misc/src/java/org/apache/lucene/search/DocValuesStats.java
@@ -98,6 +98,7 @@ public abstract class DocValuesStats<T> {
   public static abstract class NumericDocValuesStats<T extends Number> extends DocValuesStats<T> {
 
     protected double mean = 0.0;
+    protected double variance = 0.0;
 
     protected NumericDocValues ndv;
 
@@ -116,15 +117,32 @@ public abstract class DocValuesStats<T> {
       return ndv.advanceExact(doc);
     }
 
-    /** The mean of all values of the field. Undefined when {@link #count} is zero. */
+    /** The mean of all values of the field. */
     public final double mean() {
       return mean;
     }
+
+    /** Returns the variance of all values of the field. */
+    public final double variance() {
+      int count = count();
+      return count > 0 ? variance / count : 0;
+    }
+
+    /** Returns the stdev of all values of the field. */
+    public final double stdev() {
+      return Math.sqrt(variance());
+    }
+
+    /** Returns the sum of values of the field. Note that if the values are large, the {@code sum} might overflow. */
+    public abstract T sum();
   }
 
   /** Holds DocValues statistics for a numeric field storing {@code long} values. */
   public static final class LongDocValuesStats extends NumericDocValuesStats<Long> {
 
+    // To avoid boxing 'long' to 'Long' while the sum is computed, declare it as private variable.
+    private long sum = 0;
+
     public LongDocValuesStats(String field) {
       super(field, Long.MAX_VALUE, Long.MIN_VALUE);
     }
@@ -138,13 +156,24 @@ public abstract class DocValuesStats<T> {
       if (val < min) {
         min = val;
       }
+      sum += val;
+      double oldMean = mean;
       mean += (val - mean) / count;
+      variance += (val - mean) * (val - oldMean);
+    }
+
+    @Override
+    public Long sum() {
+      return sum;
     }
   }
 
   /** Holds DocValues statistics for a numeric field storing {@code double} values. */
   public static final class DoubleDocValuesStats extends NumericDocValuesStats<Double> {
 
+    // To avoid boxing 'double' to 'Double' while the sum is computed, declare it as private variable.
+    private double sum = 0;
+
     public DoubleDocValuesStats(String field) {
       super(field, Double.MAX_VALUE, Double.MIN_VALUE);
     }
@@ -158,7 +187,15 @@ public abstract class DocValuesStats<T> {
       if (Double.compare(val, min) < 0) {
         min = val;
       }
+      sum += val;
+      double oldMean = mean;
       mean += (val - mean) / count;
+      variance += (val - mean) * (val - oldMean);
+    }
+
+    @Override
+    public Double sum() {
+      return sum;
     }
   }
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/295cab72/lucene/misc/src/test/org/apache/lucene/search/TestDocValuesStatsCollector.java
----------------------------------------------------------------------
diff --git a/lucene/misc/src/test/org/apache/lucene/search/TestDocValuesStatsCollector.java b/lucene/misc/src/test/org/apache/lucene/search/TestDocValuesStatsCollector.java
index 65f82e6..8f8b09e 100644
--- a/lucene/misc/src/test/org/apache/lucene/search/TestDocValuesStatsCollector.java
+++ b/lucene/misc/src/test/org/apache/lucene/search/TestDocValuesStatsCollector.java
@@ -18,6 +18,8 @@ package org.apache.lucene.search;
 
 import java.io.IOException;
 import java.util.Arrays;
+import java.util.DoubleSummaryStatistics;
+import java.util.LongSummaryStatistics;
 import java.util.stream.DoubleStream;
 import java.util.stream.LongStream;
 
@@ -57,7 +59,33 @@ public class TestDocValuesStatsCollector extends LuceneTestCase {
     }
   }
 
-  public void testRandomDocsWithLongValues() throws IOException {
+  public void testOneDoc() throws IOException {
+    try (Directory dir = newDirectory();
+        IndexWriter indexWriter = new IndexWriter(dir, newIndexWriterConfig())) {
+      String field = "numeric";
+      Document doc = new Document();
+      doc.add(new NumericDocValuesField(field, 1));
+      doc.add(new StringField("id", "doc1", Store.NO));
+      indexWriter.addDocument(doc);
+
+      try (DirectoryReader reader = DirectoryReader.open(indexWriter)) {
+        IndexSearcher searcher = new IndexSearcher(reader);
+        LongDocValuesStats stats = new LongDocValuesStats(field);
+        searcher.search(new MatchAllDocsQuery(), new DocValuesStatsCollector(stats));
+
+        assertEquals(1, stats.count());
+        assertEquals(0, stats.missing());
+        assertEquals(1, stats.max().longValue());
+        assertEquals(1, stats.min().longValue());
+        assertEquals(1, stats.sum().longValue());
+        assertEquals(1, stats.mean(), 0.0001);
+        assertEquals(0, stats.variance(), 0.0001);
+        assertEquals(0, stats.stdev(), 0.0001);
+      }
+    }
+  }
+
+  public void testDocsWithLongValues() throws IOException {
     try (Directory dir = newDirectory();
         IndexWriter indexWriter = new IndexWriter(dir, newIndexWriterConfig())) {
       String field = "numeric";
@@ -94,15 +122,20 @@ public class TestDocValuesStatsCollector extends LuceneTestCase {
         assertEquals(expCount, stats.count());
         assertEquals(getZeroValues(docValues).count() - reader.numDeletedDocs(), stats.missing());
         if (stats.count() > 0) {
-          assertEquals(getPositiveValues(docValues).max().getAsLong(), stats.max().longValue());
-          assertEquals(getPositiveValues(docValues).min().getAsLong(), stats.min().longValue());
-          assertEquals(getPositiveValues(docValues).average().getAsDouble(), stats.mean(), 0.00001);
+          LongSummaryStatistics sumStats = getPositiveValues(docValues).summaryStatistics();
+          assertEquals(sumStats.getMax(), stats.max().longValue());
+          assertEquals(sumStats.getMin(), stats.min().longValue());
+          assertEquals(sumStats.getAverage(), stats.mean(), 0.00001);
+          assertEquals(sumStats.getSum(), stats.sum().longValue());
+          double variance = computeVariance(docValues, stats.mean, stats.count());
+          assertEquals(variance, stats.variance(), 0.00001);
+          assertEquals(Math.sqrt(variance), stats.stdev(), 0.00001);
         }
       }
     }
   }
 
-  public void testRandomDocsWithDoubleValues() throws IOException {
+  public void testDocsWithDoubleValues() throws IOException {
     try (Directory dir = newDirectory();
         IndexWriter indexWriter = new IndexWriter(dir, newIndexWriterConfig())) {
       String field = "numeric";
@@ -139,9 +172,14 @@ public class TestDocValuesStatsCollector extends LuceneTestCase {
         assertEquals(expCount, stats.count());
         assertEquals(getZeroValues(docValues).count() - reader.numDeletedDocs(), stats.missing());
         if (stats.count() > 0) {
-          assertEquals(getPositiveValues(docValues).max().getAsDouble(), stats.max().doubleValue(), 0.00001);
-          assertEquals(getPositiveValues(docValues).min().getAsDouble(), stats.min().doubleValue(), 0.00001);
-          assertEquals(getPositiveValues(docValues).average().getAsDouble(), stats.mean(), 0.00001);
+          DoubleSummaryStatistics sumStats = getPositiveValues(docValues).summaryStatistics();
+          assertEquals(sumStats.getMax(), stats.max().doubleValue(), 0.00001);
+          assertEquals(sumStats.getMin(), stats.min().doubleValue(), 0.00001);
+          assertEquals(sumStats.getAverage(), stats.mean(), 0.00001);
+          assertEquals(sumStats.getSum(), stats.sum(), 0.00001);
+          double variance = computeVariance(docValues, stats.mean, stats.count());
+          assertEquals(variance, stats.variance(), 0.00001);
+          assertEquals(Math.sqrt(variance), stats.stdev(), 0.00001);
         }
       }
     }
@@ -163,4 +201,12 @@ public class TestDocValuesStatsCollector extends LuceneTestCase {
     return Arrays.stream(docValues).filter(v -> v == 0);
   }
 
+  private static double computeVariance(long[] values, double mean, int count) {
+    return getPositiveValues(values).mapToDouble(v -> (v - mean) * (v-mean)).sum() / count;
+  }
+
+  private static double computeVariance(double[] values, double mean, int count) {
+    return getPositiveValues(values).map(v -> (v - mean) * (v-mean)).sum() / count;
+  }
+
 }

[40/50] [abbrv] lucene-solr:jira/solr-8593: Rename constructor parameter name

Posted by kr...@apache.org.

Rename constructor parameter name


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/22d9af41
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/22d9af41
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/22d9af41

Branch: refs/heads/jira/solr-8593
Commit: 22d9af41a435feaa3307880b7c7ed4f5860faa21
Parents: 85582da
Author: Shai Erera <sh...@apache.org>
Authored: Wed Dec 14 13:49:42 2016 +0200
Committer: Shai Erera <sh...@apache.org>
Committed: Wed Dec 14 13:49:42 2016 +0200

----------------------------------------------------------------------
 .../src/java/org/apache/lucene/search/DocValuesStats.java    | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/22d9af41/lucene/misc/src/java/org/apache/lucene/search/DocValuesStats.java
----------------------------------------------------------------------
diff --git a/lucene/misc/src/java/org/apache/lucene/search/DocValuesStats.java b/lucene/misc/src/java/org/apache/lucene/search/DocValuesStats.java
index 38158cf..998bef4 100644
--- a/lucene/misc/src/java/org/apache/lucene/search/DocValuesStats.java
+++ b/lucene/misc/src/java/org/apache/lucene/search/DocValuesStats.java
@@ -125,8 +125,8 @@ public abstract class DocValuesStats<T> {
   /** Holds DocValues statistics for a numeric field storing {@code long} values. */
   public static final class LongDocValuesStats extends NumericDocValuesStats<Long> {
 
-    public LongDocValuesStats(String description) {
-      super(description, Long.MAX_VALUE, Long.MIN_VALUE);
+    public LongDocValuesStats(String field) {
+      super(field, Long.MAX_VALUE, Long.MIN_VALUE);
     }
 
     @Override
@@ -145,8 +145,8 @@ public abstract class DocValuesStats<T> {
   /** Holds DocValues statistics for a numeric field storing {@code double} values. */
   public static final class DoubleDocValuesStats extends NumericDocValuesStats<Double> {
 
-    public DoubleDocValuesStats(String description) {
-      super(description, Double.MAX_VALUE, Double.MIN_VALUE);
+    public DoubleDocValuesStats(String field) {
+      super(field, Double.MAX_VALUE, Double.MIN_VALUE);
     }
 
     @Override

[38/50] [abbrv] lucene-solr:jira/solr-8593: Fix LeafReader.getNumericDocValues javadoc

Posted by kr...@apache.org.

Fix LeafReader.getNumericDocValues javadoc


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/770f1eb8
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/770f1eb8
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/770f1eb8

Branch: refs/heads/jira/solr-8593
Commit: 770f1eb8ad6af5cce55d1bdf52f1288216c9691f
Parents: ad7152a
Author: Shai Erera <sh...@apache.org>
Authored: Wed Dec 14 13:07:19 2016 +0200
Committer: Shai Erera <sh...@apache.org>
Committed: Wed Dec 14 13:07:19 2016 +0200

----------------------------------------------------------------------
 lucene/core/src/java/org/apache/lucene/index/LeafReader.java | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/770f1eb8/lucene/core/src/java/org/apache/lucene/index/LeafReader.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/LeafReader.java b/lucene/core/src/java/org/apache/lucene/index/LeafReader.java
index acdd0d8..73394f2 100644
--- a/lucene/core/src/java/org/apache/lucene/index/LeafReader.java
+++ b/lucene/core/src/java/org/apache/lucene/index/LeafReader.java
@@ -242,7 +242,7 @@ public abstract class LeafReader extends IndexReader {
   /** Returns {@link NumericDocValues} for this field, or
    *  null if no numeric doc values were indexed for
    *  this field.  The returned instance should only be
-   *  used by a single thread.  This will never return null. */
+   *  used by a single thread. */
   public abstract NumericDocValues getNumericDocValues(String field) throws IOException;
 
   /** Returns {@link BinaryDocValues} for this field, or

[36/50] [abbrv] lucene-solr:jira/solr-8593: fix RangeFieldQuery.scorer to return null if no docs in a segment indexed the field

Posted by kr...@apache.org.

fix RangeFieldQuery.scorer to return null if no docs in a segment indexed the field


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/9aa5b734
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/9aa5b734
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/9aa5b734

Branch: refs/heads/jira/solr-8593
Commit: 9aa5b734c38ed0b9327577bd2b1413d448230eab
Parents: 8c79ab2
Author: Nicholas Knize <nk...@gmail.com>
Authored: Tue Dec 13 15:07:06 2016 -0600
Committer: Nicholas Knize <nk...@gmail.com>
Committed: Tue Dec 13 15:07:06 2016 -0600

----------------------------------------------------------------------
 .../src/java/org/apache/lucene/document/RangeFieldQuery.java       | 1 +
 .../test/org/apache/lucene/search/BaseRangeFieldQueryTestCase.java | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/9aa5b734/lucene/sandbox/src/java/org/apache/lucene/document/RangeFieldQuery.java
----------------------------------------------------------------------
diff --git a/lucene/sandbox/src/java/org/apache/lucene/document/RangeFieldQuery.java b/lucene/sandbox/src/java/org/apache/lucene/document/RangeFieldQuery.java
index 7ebdec4..5249191 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/document/RangeFieldQuery.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/document/RangeFieldQuery.java
@@ -165,6 +165,7 @@ abstract class RangeFieldQuery extends Query {
         FieldInfo fieldInfo = reader.getFieldInfos().fieldInfo(field);
         if (fieldInfo == null) {
           // no docs in this segment indexed this field
+          return null;
         }
         checkFieldInfo(fieldInfo);
         boolean allDocsMatch = true;

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/9aa5b734/lucene/sandbox/src/test/org/apache/lucene/search/BaseRangeFieldQueryTestCase.java
----------------------------------------------------------------------
diff --git a/lucene/sandbox/src/test/org/apache/lucene/search/BaseRangeFieldQueryTestCase.java b/lucene/sandbox/src/test/org/apache/lucene/search/BaseRangeFieldQueryTestCase.java
index ff61ff6..ceafd53 100644
--- a/lucene/sandbox/src/test/org/apache/lucene/search/BaseRangeFieldQueryTestCase.java
+++ b/lucene/sandbox/src/test/org/apache/lucene/search/BaseRangeFieldQueryTestCase.java
@@ -93,7 +93,7 @@ public abstract class BaseRangeFieldQueryTestCase extends LuceneTestCase {
         ranges[id] = new Range[] {nextRange(dimensions)};
       }
       if (x == 17) {
-        // dome docs don't have a box:
+        // some docs don't have a box:
         ranges[id][0].isMissing = true;
         if (VERBOSE) {
           System.out.println("  id=" + id + " is missing");

[44/50] [abbrv] lucene-solr:jira/solr-8593: SOLR-8959: Refactored TestSegmentSorting out of TestMiniSolrCloudCluster

Posted by kr...@apache.org.

SOLR-8959: Refactored TestSegmentSorting out of TestMiniSolrCloudCluster


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/6525bb56
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/6525bb56
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/6525bb56

Branch: refs/heads/jira/solr-8593
Commit: 6525bb56f027655e5a01f028fa373305c0d01caa
Parents: 5123743
Author: Chris Hostetter <ho...@apache.org>
Authored: Wed Dec 14 13:18:56 2016 -0700
Committer: Chris Hostetter <ho...@apache.org>
Committed: Wed Dec 14 13:18:56 2016 -0700

----------------------------------------------------------------------
 solr/CHANGES.txt                                |   3 +
 .../cloud/SegmentTerminateEarlyTestState.java   |  12 +-
 .../solr/cloud/TestMiniSolrCloudCluster.java    |  50 -------
 .../apache/solr/cloud/TestSegmentSorting.java   | 133 +++++++++++++++++++
 4 files changed, 145 insertions(+), 53 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/6525bb56/solr/CHANGES.txt
----------------------------------------------------------------------
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 5f0357b..73b0e9b 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -290,6 +290,9 @@ Other Changes
 
 * SOLR-9846: OverseerAutoReplicaFailoverThread can take too long to stop and leak out of unit tests. (Mark Miller)
 
+* SOLR-8959: Refactored TestSegmentSorting out of TestMiniSolrCloudCluster (hossman)
+
+
 ==================  6.3.0 ==================
 
 Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this release.

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/6525bb56/solr/core/src/test/org/apache/solr/cloud/SegmentTerminateEarlyTestState.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/SegmentTerminateEarlyTestState.java b/solr/core/src/test/org/apache/solr/cloud/SegmentTerminateEarlyTestState.java
index 199423b..b3df9e7 100644
--- a/solr/core/src/test/org/apache/solr/cloud/SegmentTerminateEarlyTestState.java
+++ b/solr/core/src/test/org/apache/solr/cloud/SegmentTerminateEarlyTestState.java
@@ -22,6 +22,7 @@ import java.time.ZonedDateTime;
 import java.util.HashSet;
 import java.util.Map;
 import java.util.Set;
+import java.util.Random;
 
 import org.apache.solr.client.solrj.SolrQuery;
 import org.apache.solr.client.solrj.impl.CloudSolrClient;
@@ -47,7 +48,12 @@ class SegmentTerminateEarlyTestState {
   Integer maxTimestampMM = null;
 
   int numDocs = 0;
+  final Random rand;
 
+  public SegmentTerminateEarlyTestState(Random rand) {
+    this.rand = rand;
+  }
+  
   void addDocuments(CloudSolrClient cloudSolrClient,
       int numCommits, int numDocsPerCommit, boolean optimize) throws Exception {
     for (int cc = 1; cc <= numCommits; ++cc) {
@@ -56,7 +62,7 @@ class SegmentTerminateEarlyTestState {
         final Integer docKey = new Integer(numDocs);
         SolrInputDocument doc = new SolrInputDocument();
         doc.setField(keyField, ""+docKey);
-        final int MM = TestMiniSolrCloudCluster.random().nextInt(60); // minutes
+        final int MM = rand.nextInt(60); // minutes
         if (minTimestampMM == null || MM <= minTimestampMM.intValue()) {
           if (minTimestampMM != null && MM < minTimestampMM.intValue()) {
             minTimestampDocKeys.clear();
@@ -116,7 +122,7 @@ class SegmentTerminateEarlyTestState {
     query.setFields(keyField, oddField, timestampField);
     final int rowsWanted = 1;
     query.setRows(rowsWanted);
-    final Boolean shardsInfoWanted = (TestMiniSolrCloudCluster.random().nextBoolean() ? null : new Boolean(TestMiniSolrCloudCluster.random().nextBoolean()));
+    final Boolean shardsInfoWanted = (rand.nextBoolean() ? null : new Boolean(rand.nextBoolean()));
     if (shardsInfoWanted != null) {
       query.set(ShardParams.SHARDS_INFO, shardsInfoWanted.booleanValue());
     }
@@ -163,7 +169,7 @@ class SegmentTerminateEarlyTestState {
     query.setSort(timestampField, SolrQuery.ORDER.desc);
     query.setFields(keyField, oddField, timestampField);
     query.setRows(1);
-    final Boolean shardsInfoWanted = (TestMiniSolrCloudCluster.random().nextBoolean() ? null : new Boolean(TestMiniSolrCloudCluster.random().nextBoolean()));
+    final Boolean shardsInfoWanted = (rand.nextBoolean() ? null : new Boolean(rand.nextBoolean()));
     if (shardsInfoWanted != null) {
       query.set(ShardParams.SHARDS_INFO, shardsInfoWanted.booleanValue());
     }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/6525bb56/solr/core/src/test/org/apache/solr/cloud/TestMiniSolrCloudCluster.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/TestMiniSolrCloudCluster.java b/solr/core/src/test/org/apache/solr/cloud/TestMiniSolrCloudCluster.java
index 97ecb67..de18875 100644
--- a/solr/core/src/test/org/apache/solr/cloud/TestMiniSolrCloudCluster.java
+++ b/solr/core/src/test/org/apache/solr/cloud/TestMiniSolrCloudCluster.java
@@ -16,7 +16,6 @@
  */
 package org.apache.solr.cloud;
 
-import java.io.File;
 import java.lang.invoke.MethodHandles;
 import java.net.URL;
 import java.util.ArrayList;
@@ -384,53 +383,4 @@ public class TestMiniSolrCloudCluster extends LuceneTestCase {
     }
   }
 
-  @Test
-  public void testSegmentTerminateEarly() throws Exception {
-
-    final String collectionName = "testSegmentTerminateEarlyCollection";
-
-    final SegmentTerminateEarlyTestState tstes = new SegmentTerminateEarlyTestState();
-
-    File solrXml = new File(SolrTestCaseJ4.TEST_HOME(), "solr.xml");
-    Builder jettyConfig = JettyConfig.builder();
-    jettyConfig.waitForLoadingCoresToFinish(null);
-    final MiniSolrCloudCluster miniCluster = createMiniSolrCloudCluster();
-    final CloudSolrClient cloudSolrClient = miniCluster.getSolrClient();
-    cloudSolrClient.setDefaultCollection(collectionName);
-
-    try {
-      // create collection
-      {
-        final String asyncId = (random().nextBoolean() ? null : "asyncId("+collectionName+".create)="+random().nextInt());
-        final Map<String, String> collectionProperties = new HashMap<>();
-        collectionProperties.put(CoreDescriptor.CORE_CONFIG, "solrconfig-sortingmergepolicyfactory.xml");
-        createCollection(miniCluster, collectionName, null, asyncId, Boolean.TRUE, collectionProperties);
-      }
-
-      ZkStateReader zkStateReader = cloudSolrClient.getZkStateReader();
-      AbstractDistribZkTestBase.waitForRecoveriesToFinish(collectionName, zkStateReader, true, true, 330);
-
-      // add some documents, then optimize to get merged-sorted segments
-      tstes.addDocuments(cloudSolrClient, 10, 10, true);
-
-      // CommonParams.SEGMENT_TERMINATE_EARLY parameter intentionally absent
-      tstes.queryTimestampDescending(cloudSolrClient);
-
-      // add a few more documents, but don't optimize to have some not-merge-sorted segments
-      tstes.addDocuments(cloudSolrClient, 2, 10, false);
-
-      // CommonParams.SEGMENT_TERMINATE_EARLY parameter now present
-      tstes.queryTimestampDescendingSegmentTerminateEarlyYes(cloudSolrClient);
-      tstes.queryTimestampDescendingSegmentTerminateEarlyNo(cloudSolrClient);
-
-      // CommonParams.SEGMENT_TERMINATE_EARLY parameter present but it won't be used
-      tstes.queryTimestampDescendingSegmentTerminateEarlyYesGrouped(cloudSolrClient);
-      tstes.queryTimestampAscendingSegmentTerminateEarlyYes(cloudSolrClient); // uses a sort order that is _not_ compatible with the merge sort order
-
-    }
-    finally {
-      miniCluster.shutdown();
-    }
-  }
-
 }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/6525bb56/solr/core/src/test/org/apache/solr/cloud/TestSegmentSorting.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/TestSegmentSorting.java b/solr/core/src/test/org/apache/solr/cloud/TestSegmentSorting.java
new file mode 100644
index 0000000..016b63e
--- /dev/null
+++ b/solr/core/src/test/org/apache/solr/cloud/TestSegmentSorting.java
@@ -0,0 +1,133 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.cloud;
+
+import java.lang.invoke.MethodHandles;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.lucene.index.TieredMergePolicy;
+import org.apache.solr.SolrTestCaseJ4;
+import org.apache.solr.client.solrj.impl.CloudSolrClient;
+import org.apache.solr.client.solrj.request.CollectionAdminRequest;
+import org.apache.solr.common.cloud.ZkStateReader;
+import org.apache.solr.core.CoreDescriptor;
+import org.apache.solr.index.TieredMergePolicyFactory;
+
+import org.junit.After;
+import org.junit.BeforeClass;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class TestSegmentSorting extends SolrCloudTestCase {
+
+  private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
+  private static final int NUM_SERVERS = 5;
+  private static final int NUM_SHARDS = 2;
+  private static final int REPLICATION_FACTOR = 2;
+
+  @BeforeClass
+  public static void setupCluster() throws Exception {
+    configureCluster(NUM_SERVERS).configure();
+  }
+
+  @After
+  public void ensureClusterEmpty() throws Exception {
+    cluster.deleteAllCollections();
+    cluster.getSolrClient().setDefaultCollection(null);
+  }
+  
+  private void createCollection(MiniSolrCloudCluster miniCluster, String collectionName, String createNodeSet, String asyncId,
+      Boolean indexToPersist, Map<String,String> collectionProperties) throws Exception {
+    String configName = "solrCloudCollectionConfig";
+    miniCluster.uploadConfigSet(SolrTestCaseJ4.TEST_PATH().resolve("collection1").resolve("conf"), configName);
+
+    final boolean persistIndex = (indexToPersist != null ? indexToPersist.booleanValue() : random().nextBoolean());
+    if (collectionProperties == null) {
+      collectionProperties = new HashMap<>();
+    }
+    collectionProperties.putIfAbsent(CoreDescriptor.CORE_CONFIG, "solrconfig-tlog.xml");
+    collectionProperties.putIfAbsent("solr.tests.maxBufferedDocs", "100000");
+    collectionProperties.putIfAbsent("solr.tests.ramBufferSizeMB", "100");
+    // use non-test classes so RandomizedRunner isn't necessary
+    if (random().nextBoolean()) {
+      collectionProperties.putIfAbsent(SolrTestCaseJ4.SYSTEM_PROPERTY_SOLR_TESTS_MERGEPOLICY, TieredMergePolicy.class.getName());
+      collectionProperties.putIfAbsent(SolrTestCaseJ4.SYSTEM_PROPERTY_SOLR_TESTS_USEMERGEPOLICY, "true");
+      collectionProperties.putIfAbsent(SolrTestCaseJ4.SYSTEM_PROPERTY_SOLR_TESTS_USEMERGEPOLICYFACTORY, "false");
+    } else {
+      collectionProperties.putIfAbsent(SolrTestCaseJ4.SYSTEM_PROPERTY_SOLR_TESTS_MERGEPOLICYFACTORY, TieredMergePolicyFactory.class.getName());
+      collectionProperties.putIfAbsent(SolrTestCaseJ4.SYSTEM_PROPERTY_SOLR_TESTS_USEMERGEPOLICYFACTORY, "true");
+      collectionProperties.putIfAbsent(SolrTestCaseJ4.SYSTEM_PROPERTY_SOLR_TESTS_USEMERGEPOLICY, "false");
+    }
+    collectionProperties.putIfAbsent("solr.tests.mergeScheduler", "org.apache.lucene.index.ConcurrentMergeScheduler");
+    collectionProperties.putIfAbsent("solr.directoryFactory", (persistIndex ? "solr.StandardDirectoryFactory" : "solr.RAMDirectoryFactory"));
+
+    if (asyncId == null) {
+      CollectionAdminRequest.createCollection(collectionName, configName, NUM_SHARDS, REPLICATION_FACTOR)
+          .setCreateNodeSet(createNodeSet)
+          .setProperties(collectionProperties)
+          .process(miniCluster.getSolrClient());
+    }
+    else {
+      CollectionAdminRequest.createCollection(collectionName, configName, NUM_SHARDS, REPLICATION_FACTOR)
+          .setCreateNodeSet(createNodeSet)
+          .setProperties(collectionProperties)
+          .processAndWait(miniCluster.getSolrClient(), 30);
+    }
+  }
+
+
+  public void testSegmentTerminateEarly() throws Exception {
+
+    final String collectionName = "testSegmentTerminateEarlyCollection";
+
+    final SegmentTerminateEarlyTestState tstes = new SegmentTerminateEarlyTestState(random());
+    
+    final CloudSolrClient cloudSolrClient = cluster.getSolrClient();
+    cloudSolrClient.setDefaultCollection(collectionName);
+
+    // create collection
+    {
+      final String asyncId = (random().nextBoolean() ? null : "asyncId("+collectionName+".create)="+random().nextInt());
+      final Map<String, String> collectionProperties = new HashMap<>();
+      collectionProperties.put(CoreDescriptor.CORE_CONFIG, "solrconfig-sortingmergepolicyfactory.xml");
+      createCollection(cluster, collectionName, null, asyncId, Boolean.TRUE, collectionProperties);
+    }
+    
+    ZkStateReader zkStateReader = cloudSolrClient.getZkStateReader();
+    AbstractDistribZkTestBase.waitForRecoveriesToFinish(collectionName, zkStateReader, true, true, 330);
+    
+    // add some documents, then optimize to get merged-sorted segments
+    tstes.addDocuments(cloudSolrClient, 10, 10, true);
+    
+    // CommonParams.SEGMENT_TERMINATE_EARLY parameter intentionally absent
+    tstes.queryTimestampDescending(cloudSolrClient);
+    
+    // add a few more documents, but don't optimize to have some not-merge-sorted segments
+    tstes.addDocuments(cloudSolrClient, 2, 10, false);
+    
+    // CommonParams.SEGMENT_TERMINATE_EARLY parameter now present
+    tstes.queryTimestampDescendingSegmentTerminateEarlyYes(cloudSolrClient);
+    tstes.queryTimestampDescendingSegmentTerminateEarlyNo(cloudSolrClient);
+    
+    // CommonParams.SEGMENT_TERMINATE_EARLY parameter present but it won't be used
+    tstes.queryTimestampDescendingSegmentTerminateEarlyYesGrouped(cloudSolrClient);
+    tstes.queryTimestampAscendingSegmentTerminateEarlyYes(cloudSolrClient); // uses a sort order that is _not_ compatible with the merge sort order
+    
+  }
+}

[04/50] [abbrv] lucene-solr:jira/solr-8593: improve IW javadocs

Posted by kr...@apache.org.

improve IW javadocs


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/58476b16
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/58476b16
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/58476b16

Branch: refs/heads/jira/solr-8593
Commit: 58476b1675befd88776c72fb7b178c294a39edae
Parents: fcccd31
Author: Mike McCandless <mi...@apache.org>
Authored: Fri Dec 2 15:30:37 2016 -0500
Committer: Mike McCandless <mi...@apache.org>
Committed: Fri Dec 2 15:31:01 2016 -0500

----------------------------------------------------------------------
 .../org/apache/lucene/index/IndexWriter.java    | 31 ++++++++++++--------
 1 file changed, 18 insertions(+), 13 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/58476b16/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
index 68f3b3b..9868785 100644
--- a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
@@ -133,19 +133,24 @@ import org.apache.lucene.util.Version;
   
   <a name="deletionPolicy"></a>
   <p>Expert: <code>IndexWriter</code> allows an optional
-  {@link IndexDeletionPolicy} implementation to be
-  specified.  You can use this to control when prior commits
-  are deleted from the index.  The default policy is {@link
-  KeepOnlyLastCommitDeletionPolicy} which removes all prior
-  commits as soon as a new commit is done (this matches
-  behavior before 2.2).  Creating your own policy can allow
-  you to explicitly keep previous "point in time" commits
-  alive in the index for some time, to allow readers to
-  refresh to the new commit without having the old commit
-  deleted out from under them.  This is necessary on
-  filesystems like NFS that do not support "delete on last
-  close" semantics, which Lucene's "point in time" search
-  normally relies on. </p>
+  {@link IndexDeletionPolicy} implementation to be specified.  You
+  can use this to control when prior commits are deleted from
+  the index.  The default policy is {@link KeepOnlyLastCommitDeletionPolicy}
+  which removes all prior commits as soon as a new commit is
+  done.  Creating your own policy can allow you to explicitly
+  keep previous "point in time" commits alive in the index for
+  some time, either because this is useful for your application,
+  or to give readers enough time to refresh to the new commit
+  without having the old commit deleted out from under them.
+  The latter is necessary when multiple computers take turns opening
+  their own {@code IndexWriter} and {@code IndexReader}s
+  against a single shared index mounted via remote filesystems
+  like NFS which do not support "delete on last close" semantics.
+  A single computer accessing an index via NFS is fine with the
+  default deletion policy since NFS clients emulate "delete on
+  last close" locally.  That said, accessing an index via NFS
+  will likely result in poor performance compared to a local IO
+  device. </p>
 
   <a name="mergePolicy"></a> <p>Expert:
   <code>IndexWriter</code> allows you to separately change

[15/50] [abbrv] lucene-solr:jira/solr-8593: added an extra testcase

Posted by kr...@apache.org.

added an extra testcase


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/3f6164c7
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/3f6164c7
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/3f6164c7

Branch: refs/heads/jira/solr-8593
Commit: 3f6164c76e2fc581abe4408066e08cf9fc817260
Parents: bd8b191
Author: Noble Paul <no...@apache.org>
Authored: Wed Dec 7 18:42:07 2016 +0530
Committer: Noble Paul <no...@apache.org>
Committed: Wed Dec 7 18:42:07 2016 +0530

----------------------------------------------------------------------
 .../TestPlainTextEntityProcessor.java           | 108 +++++++++++++++++++
 1 file changed, 108 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/3f6164c7/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestPlainTextEntityProcessor.java
----------------------------------------------------------------------
diff --git a/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestPlainTextEntityProcessor.java b/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestPlainTextEntityProcessor.java
index 82b757e..a286d84 100644
--- a/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestPlainTextEntityProcessor.java
+++ b/solr/contrib/dataimporthandler/src/test/org/apache/solr/handler/dataimport/TestPlainTextEntityProcessor.java
@@ -16,12 +16,23 @@
  */
 package org.apache.solr.handler.dataimport;
 
+import java.io.ByteArrayInputStream;
 import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
 import java.io.StringReader;
+import java.nio.charset.StandardCharsets;
+import java.sql.Blob;
+import java.sql.SQLException;
+import java.util.Arrays;
+import java.util.Collections;
 import java.util.Properties;
 
+import org.apache.solr.common.util.Utils;
 import org.junit.Test;
 
+import static java.nio.charset.StandardCharsets.UTF_8;
+
 /**
  * Test for PlainTextEntityProcessor
  *
@@ -42,6 +53,103 @@ public class TestPlainTextEntityProcessor extends AbstractDataImportHandlerTestC
     assertEquals(DS.s, sw.docs.get(0).getFieldValue("x"));
   }
 
+  static class BlobImpl implements Blob{
+    private final byte[] bytes;
+
+    BlobImpl(byte[] bytes) {
+      this.bytes = bytes;
+    }
+
+    @Override
+    public long length() throws SQLException {
+      return 0;
+    }
+
+    @Override
+    public byte[] getBytes(long pos, int length) throws SQLException {
+      return bytes;
+    }
+
+    @Override
+    public InputStream getBinaryStream() throws SQLException {
+      return new ByteArrayInputStream(bytes);
+    }
+
+    @Override
+    public long position(byte[] pattern, long start) throws SQLException {
+      return 0;
+    }
+
+    @Override
+    public long position(Blob pattern, long start) throws SQLException {
+      return 0;
+    }
+
+    @Override
+    public int setBytes(long pos, byte[] bytes) throws SQLException {
+      return 0;
+    }
+
+    @Override
+    public int setBytes(long pos, byte[] bytes, int offset, int len) throws SQLException {
+      return 0;
+    }
+
+    @Override
+    public OutputStream setBinaryStream(long pos) throws SQLException {
+      return null;
+    }
+
+    @Override
+    public void truncate(long len) throws SQLException {
+
+    }
+
+    @Override
+    public void free() throws SQLException {
+
+    }
+
+    @Override
+    public InputStream getBinaryStream(long pos, long length) throws SQLException {
+      return new ByteArrayInputStream(bytes);
+    }
+  }
+
+  @Test
+  public void testSimple2() throws IOException {
+    DataImporter di = new DataImporter();
+    MockDataSource.setIterator("select id, name, blob_field from lw_table4", Collections.singletonList(Utils.makeMap("blob_field",new BlobImpl(DS.s.getBytes(UTF_8)) ) ).iterator());
+
+    String dc =
+
+        " <dataConfig>" +
+            "<dataSource name=\"ds1\" type=\"MockDataSource\"/>\n" +
+        " <!-- dataSource for FieldReaderDataSource -->\n" +
+        " <dataSource dataField=\"root.blob_field\" name=\"fr\" type=\"FieldReaderDataSource\"/>\n" +
+        "\n" +
+        " <document name=\"items\">\n" +
+        "   <entity dataSource=\"ds1\" name=\"root\" pk=\"id\"  query=\"select id, name, blob_field from lw_table4\" transformer=\"TemplateTransformer\">\n" +
+        "           <field column=\"id\" name=\"id\"/>\n" +
+        "\n" +
+        "        <entity dataField=\"root.blob_field\" dataSource=\"fr\" format=\"text\" name=\"n1\" processor=\"PlainTextEntityProcessor\" url=\"blob_field\">\n" +
+        "                       <field column=\"plainText\" name=\"plainText\"/>\n" +
+        "           </entity>\n" +
+        "\n" +
+        "   </entity>\n" +
+        " </document>\n" +
+        "</dataConfig>";
+    System.out.println(dc);
+    di.loadAndInit(dc);
+    redirectTempProperties(di);
+
+    TestDocBuilder.SolrWriterImpl sw = new TestDocBuilder.SolrWriterImpl();
+    RequestInfo rp = new RequestInfo(null, createMap("command", "full-import"), null);
+    di.runCmd(rp, sw);
+    assertEquals(DS.s, sw.docs.get(0).getFieldValue("plainText"));
+  }
+
+
   public static class DS extends DataSource {
     static String s = "hello world";

[34/50] [abbrv] lucene-solr:jira/solr-8593: SOLR-9707: Don't forward DeleteByQuery requests to down replicas

Posted by kr...@apache.org.

SOLR-9707: Don't forward DeleteByQuery requests to down replicas


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/fecbbe08
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/fecbbe08
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/fecbbe08

Branch: refs/heads/jira/solr-8593
Commit: fecbbe081fd4a777f01517fdd8631e69797def38
Parents: 39ba130
Author: Varun Thacker <va...@apache.org>
Authored: Mon Dec 12 15:28:22 2016 -0800
Committer: Varun Thacker <va...@apache.org>
Committed: Mon Dec 12 15:28:22 2016 -0800

----------------------------------------------------------------------
 solr/CHANGES.txt                                             | 2 ++
 .../solr/update/processor/DistributedUpdateProcessor.java    | 8 +++-----
 2 files changed, 5 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/fecbbe08/solr/CHANGES.txt
----------------------------------------------------------------------
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 37cccae..a8a3f97 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -240,6 +240,8 @@ Bug Fixes
 * SOLR-9834: A variety of spots in the code can create a collection zk node after the collection has been 
   removed. (Mark Miller)
 
+* SOLR-9707: Don't forward DeleteByQuery requests to down replicas. (Jessica Cheng Mallet via Varun Thacker)
+
 Other Changes
 ----------------------
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/fecbbe08/solr/core/src/java/org/apache/solr/update/processor/DistributedUpdateProcessor.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/update/processor/DistributedUpdateProcessor.java b/solr/core/src/java/org/apache/solr/update/processor/DistributedUpdateProcessor.java
index b8bdd16..c62a90a 100644
--- a/solr/core/src/java/org/apache/solr/update/processor/DistributedUpdateProcessor.java
+++ b/solr/core/src/java/org/apache/solr/update/processor/DistributedUpdateProcessor.java
@@ -658,8 +658,7 @@ public class DistributedUpdateProcessor extends UpdateRequestProcessor {
     String shardId = cloudDesc.getShardId();
 
     try {
-      Replica leaderReplica = zkController.getZkStateReader().getLeaderRetry(
-          collection, shardId);
+      Replica leaderReplica = zkController.getZkStateReader().getLeaderRetry(collection, shardId);
       isLeader = leaderReplica.getName().equals(
           req.getCore().getCoreDescriptor().getCloudDescriptor()
               .getCoreNodeName());
@@ -668,7 +667,7 @@ public class DistributedUpdateProcessor extends UpdateRequestProcessor {
 
       forwardToLeader = false;
       List<ZkCoreNodeProps> replicaProps = zkController.getZkStateReader()
-          .getReplicaProps(collection, shardId, leaderReplica.getName());
+          .getReplicaProps(collection, shardId, leaderReplica.getName(), null, Replica.State.DOWN);
       if (replicaProps != null) {
         nodes = new ArrayList<>(replicaProps.size());
         for (ZkCoreNodeProps props : replicaProps) {
@@ -677,8 +676,7 @@ public class DistributedUpdateProcessor extends UpdateRequestProcessor {
       }
     } catch (InterruptedException e) {
       Thread.currentThread().interrupt();
-      throw new ZooKeeperException(SolrException.ErrorCode.SERVER_ERROR, "",
-          e);
+      throw new ZooKeeperException(SolrException.ErrorCode.SERVER_ERROR, "", e);
     }
 
     return nodes;

[26/50] [abbrv] lucene-solr:jira/solr-8593: LUCENE-6989: Fix Exception handling in MMapDirectory's unmap hack support code to work with Java 9's new InaccessibleObjectException that does not extend ReflectiveAccessException in Java 9.

Posted by kr...@apache.org.

LUCENE-6989: Fix Exception handling in MMapDirectory's unmap hack support code to work with Java 9's new InaccessibleObjectException that does not extend ReflectiveAccessException in Java 9.


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/22d04a7c
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/22d04a7c
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/22d04a7c

Branch: refs/heads/jira/solr-8593
Commit: 22d04a7c1149c1af42dc2890a416fc45e4d0aa5e
Parents: c185617
Author: Uwe Schindler <us...@apache.org>
Authored: Fri Dec 9 18:36:37 2016 +0100
Committer: Uwe Schindler <us...@apache.org>
Committed: Fri Dec 9 18:36:37 2016 +0100

----------------------------------------------------------------------
 lucene/CHANGES.txt                                              | 5 +++++
 lucene/core/src/java/org/apache/lucene/store/MMapDirectory.java | 4 ++--
 2 files changed, 7 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/22d04a7c/lucene/CHANGES.txt
----------------------------------------------------------------------
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 26a9dec..b9deb7e 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -85,6 +85,11 @@ Bug Fixes
   the incoming automaton is a special case and throw a clearer
   exception than NullPointerException (Tom Mortimer via Mike McCandless)
 
+* LUCENE-6989: Fix Exception handling in MMapDirectory's unmap hack
+  support code to work with Java 9's new InaccessibleObjectException
+  that does not extend ReflectiveAccessException in Java 9.
+  (Uwe Schindler)
+
 Improvements
 
 * LUCENE-6824: TermAutomatonQuery now rewrites to TermQuery,

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/22d04a7c/lucene/core/src/java/org/apache/lucene/store/MMapDirectory.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/store/MMapDirectory.java b/lucene/core/src/java/org/apache/lucene/store/MMapDirectory.java
index c0e3519..be08a16 100644
--- a/lucene/core/src/java/org/apache/lucene/store/MMapDirectory.java
+++ b/lucene/core/src/java/org/apache/lucene/store/MMapDirectory.java
@@ -385,13 +385,13 @@ public class MMapDirectory extends FSDirectory {
           }
         }
       };
-    } catch (ReflectiveOperationException e) {
-      return "Unmapping is not supported on this platform, because internal Java APIs are not compatible to this Lucene version: " + e; 
     } catch (SecurityException e) {
       return "Unmapping is not supported, because not all required permissions are given to the Lucene JAR file: " + e +
           " [Please grant at least the following permissions: RuntimePermission(\"accessClassInPackage.sun.misc\"), " +
           "RuntimePermission(\"accessClassInPackage.jdk.internal.ref\"), and " +
           "ReflectPermission(\"suppressAccessChecks\")]";
+    } catch (ReflectiveOperationException | RuntimeException e) {
+      return "Unmapping is not supported on this platform, because internal Java APIs are not compatible to this Lucene version: " + e; 
     }
   }

[16/50] [abbrv] lucene-solr:jira/solr-8593: Merge remote-tracking branch 'origin/master'

Posted by kr...@apache.org.

Merge remote-tracking branch 'origin/master'


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/10500c89
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/10500c89
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/10500c89

Branch: refs/heads/jira/solr-8593
Commit: 10500c894da171b6826e5101a981283ec434b9de
Parents: 3f6164c 8b98b15
Author: Noble Paul <no...@apache.org>
Authored: Wed Dec 7 18:42:39 2016 +0530
Committer: Noble Paul <no...@apache.org>
Committed: Wed Dec 7 18:42:39 2016 +0530

----------------------------------------------------------------------
 lucene/CHANGES.txt                              |   4 +
 .../uhighlight/MemoryIndexOffsetStrategy.java   |  10 +-
 .../uhighlight/MultiTermHighlighting.java       |  37 +--
 .../lucene/search/uhighlight/PhraseHelper.java  | 158 ++++++++---
 .../search/uhighlight/UnifiedHighlighter.java   |  64 +++--
 .../uhighlight/TestUnifiedHighlighter.java      | 275 +++++++++++++++++++
 .../TestUnifiedHighlighterExtensibility.java    |   3 +-
 solr/CHANGES.txt                                |   9 +
 .../src/java/org/apache/solr/core/SolrCore.java |   8 -
 .../solr/handler/admin/SystemInfoHandler.java   |  51 +++-
 .../solr/schema/ManagedIndexSchemaFactory.java  |  12 +
 .../org/apache/solr/schema/SchemaManager.java   |   2 +-
 .../ManagedSchemaRoundRobinCloudTest.java       |  98 +++++++
 .../solrj/impl/ConcurrentUpdateSolrClient.java  |  16 +-
 .../solr/client/solrj/SolrExampleTests.java     |   7 +-
 15 files changed, 649 insertions(+), 105 deletions(-)
----------------------------------------------------------------------

[30/50] [abbrv] lucene-solr:jira/solr-8593: SOLR-9843: Fix up DocValuesNotIndexedTest failures. Debugging code

Posted by kr...@apache.org.

SOLR-9843: Fix up DocValuesNotIndexedTest failures. Debugging code


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/d75abe1a
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/d75abe1a
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/d75abe1a

Branch: refs/heads/jira/solr-8593
Commit: d75abe1a3022b5d596b7fca4c7e8623782010a88
Parents: 2b073a2
Author: Erick Erickson <er...@apache.org>
Authored: Sat Dec 10 14:03:15 2016 -0800
Committer: Erick Erickson <er...@apache.org>
Committed: Sat Dec 10 14:03:15 2016 -0800

----------------------------------------------------------------------
 .../solr/cloud/DocValuesNotIndexedTest.java     | 64 ++++++++++++--------
 1 file changed, 39 insertions(+), 25 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/d75abe1a/solr/core/src/test/org/apache/solr/cloud/DocValuesNotIndexedTest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/DocValuesNotIndexedTest.java b/solr/core/src/test/org/apache/solr/cloud/DocValuesNotIndexedTest.java
index b8507b1..f5257f8 100644
--- a/solr/core/src/test/org/apache/solr/cloud/DocValuesNotIndexedTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/DocValuesNotIndexedTest.java
@@ -18,15 +18,15 @@
 package org.apache.solr.cloud;
 
 import java.io.IOException;
-import java.text.SimpleDateFormat;
+import java.lang.invoke.MethodHandles;
+import java.time.Instant;
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.Collections;
 import java.util.LinkedHashMap;
 import java.util.List;
 import java.util.Locale;
 import java.util.Map;
-import java.util.stream.Collectors;
-import java.util.stream.Stream;
 
 import com.carrotsearch.randomizedtesting.rules.SystemPropertiesRestoreRule;
 import org.apache.solr.client.solrj.SolrQuery;
@@ -50,11 +50,16 @@ import org.junit.Rule;
 import org.junit.Test;
 import org.junit.rules.RuleChain;
 import org.junit.rules.TestRule;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
+import static org.apache.lucene.util.LuceneTestCase.random;
 import static org.apache.solr.client.solrj.request.schema.SchemaRequest.*;
 
 public class DocValuesNotIndexedTest extends SolrCloudTestCase {
 
+  private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
+  
   @Rule
   public TestRule solrTestRules = RuleChain.outerRule(new SystemPropertiesRestoreRule());
 
@@ -79,7 +84,7 @@ public class DocValuesNotIndexedTest extends SolrCloudTestCase {
         .process(cluster.getSolrClient());
 
     fieldsToTestSingle =
-        Collections.unmodifiableList(Stream.of(
+        Collections.unmodifiableList(Arrays.asList(
             new FieldProps("intField", "int", 1),
             new FieldProps("longField", "long", 1),
             new FieldProps("doubleField", "double", 1),
@@ -87,10 +92,10 @@ public class DocValuesNotIndexedTest extends SolrCloudTestCase {
             new FieldProps("dateField", "date", 1),
             new FieldProps("stringField", "string", 1),
             new FieldProps("boolField", "boolean", 1)
-        ).collect(Collectors.toList()));
+        ));
 
     fieldsToTestMulti =
-        Collections.unmodifiableList(Stream.of(
+        Collections.unmodifiableList(Arrays.asList(
             new FieldProps("intFieldMulti", "int", 5),
             new FieldProps("longFieldMulti", "long", 5),
             new FieldProps("doubleFieldMulti", "double", 5),
@@ -98,11 +103,11 @@ public class DocValuesNotIndexedTest extends SolrCloudTestCase {
             new FieldProps("dateFieldMulti", "date", 5),
             new FieldProps("stringFieldMulti", "string", 5),
             new FieldProps("boolFieldMulti", "boolean", 2)
-        ).collect(Collectors.toList()));
+        ));
 
     // Fields to test for grouping and sorting with sortMinssingFirst/Last.
     fieldsToTestGroupSortFirst =
-        Collections.unmodifiableList(Stream.of(
+        Collections.unmodifiableList(Arrays.asList(
             new FieldProps("intGSF", "int"),
             new FieldProps("longGSF", "long"),
             new FieldProps("doubleGSF", "double"),
@@ -110,10 +115,10 @@ public class DocValuesNotIndexedTest extends SolrCloudTestCase {
             new FieldProps("dateGSF", "date"),
             new FieldProps("stringGSF", "string"),
             new FieldProps("boolGSF", "boolean")
-        ).collect(Collectors.toList()));
+        ));
 
     fieldsToTestGroupSortLast =
-        Collections.unmodifiableList(Stream.of(
+        Collections.unmodifiableList(Arrays.asList(
             new FieldProps("intGSL", "int"),
             new FieldProps("longGSL", "long"),
             new FieldProps("doubleGSL", "double"),
@@ -121,7 +126,7 @@ public class DocValuesNotIndexedTest extends SolrCloudTestCase {
             new FieldProps("dateGSL", "date"),
             new FieldProps("stringGSL", "string"),
             new FieldProps("boolGSL", "boolean")
-        ).collect(Collectors.toList()));
+        ));
 
     List<Update> updateList = new ArrayList<>(fieldsToTestSingle.size() +
         fieldsToTestMulti.size() + fieldsToTestGroupSortFirst.size() + fieldsToTestGroupSortLast.size() +
@@ -235,7 +240,7 @@ public class DocValuesNotIndexedTest extends SolrCloudTestCase {
     new UpdateRequest()
         .add(docs)
         .commit(client, COLLECTION);
-
+    
     checkSortOrder(client, fieldsToTestGroupSortFirst, "asc", new String[]{"4", "2", "1", "3"}, new String[]{"4", "1", "2", "3"});
     checkSortOrder(client, fieldsToTestGroupSortFirst, "desc", new String[]{"3", "1", "2", "4"}, new String[]{"2", "3", "1", "4"});
 
@@ -251,6 +256,10 @@ public class DocValuesNotIndexedTest extends SolrCloudTestCase {
       solrQuery.addSort("id", SolrQuery.ORDER.asc);
       final QueryResponse rsp = client.query(COLLECTION, solrQuery);
       SolrDocumentList res = rsp.getResults();
+      //TODO remove after SOLR-9843
+      if (order.length != res.getNumFound()) {
+        log.error("(3) About to fail, response is: " + rsp.toString());
+      }
       assertEquals("Should have exactly " + order.length + " documents returned", order.length, res.getNumFound());
       String expected;
       for (int idx = 0; idx < res.size(); ++idx) {
@@ -264,7 +273,7 @@ public class DocValuesNotIndexedTest extends SolrCloudTestCase {
 
   @Test
   public void testGroupingDocAbsent() throws IOException, SolrServerException {
-    List<SolrInputDocument> docs = new ArrayList<>(3);
+    List<SolrInputDocument> docs = new ArrayList<>(4);
     docs.add(makeGSDoc(2, fieldsToTestGroupSortFirst, null));
     docs.add(makeGSDoc(1, fieldsToTestGroupSortFirst, null));
     docs.add(makeGSDoc(3, fieldsToTestGroupSortFirst, null));
@@ -296,7 +305,11 @@ public class DocValuesNotIndexedTest extends SolrCloudTestCase {
       if (prop.getName().startsWith("bool")) expected = 3; //true, false and null
 
       List<Group> fieldCommandGroups = fieldCommand.getValues();
-      assertEquals("Did not find the expected number of groups!", expected, fieldCommandGroups.size());
+      //TODO: remove me since this is excessive in the normal case, this is in for SOLR-9843
+      if (expected != fieldCommandGroups.size()) {
+        log.error("(1) About to fail assert, response is: " + rsp.toString());
+      }
+      assertEquals("Did not find the expected number of groups for field " + prop.getName(), expected, fieldCommandGroups.size());
     }
   }
 
@@ -316,7 +329,7 @@ public class DocValuesNotIndexedTest extends SolrCloudTestCase {
       docs.add(doc);
       if ((idx % 5) == 0) {
         doc = new SolrInputDocument();
-        doc.addField("id", idx + 100);
+        doc.addField("id", idx + 10_000);
         docs.add(doc);
       }
     }
@@ -368,6 +381,8 @@ public class DocValuesNotIndexedTest extends SolrCloudTestCase {
               break;
             
             default:
+              //TODO remove me after SOLR-9843
+              log.error("(2) About to fail, response is: " + rsp.toString());
               fail("Unexpected number of elements in the group for " + prop.getName() + ": " + grp.getResult().size());
           }
         }
@@ -450,8 +465,6 @@ class FieldProps {
   private Object base;
   private int counter = 0;
 
-  static SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS'Z'", Locale.ROOT);
-
   FieldProps(String name, String type, int expectedCount) {
     this.name = name;
     this.type = type;
@@ -460,22 +473,23 @@ class FieldProps {
   }
   void resetBase() {
     if (name.startsWith("int")) {
-      base = Math.abs(DocValuesNotIndexedTest.random().nextInt());
+      base = Math.abs(random().nextInt());
     } else if (name.startsWith("long")) {
-      base = Math.abs(DocValuesNotIndexedTest.random().nextLong());
+      base = Math.abs(random().nextLong());
     } else if (name.startsWith("float")) {
-      base = Math.abs(DocValuesNotIndexedTest.random().nextFloat());
+      base = Math.abs(random().nextFloat());
     } else if (name.startsWith("double")) {
-      base = Math.abs(DocValuesNotIndexedTest.random().nextDouble());
+      base = Math.abs(random().nextDouble());
     } else if (name.startsWith("date")) {
-      base = Math.abs(DocValuesNotIndexedTest.random().nextLong());
+      base = Math.abs(random().nextLong());
     } else if (name.startsWith("bool")) {
       base = true; // Must start with a known value since bools only have a two values....
     } else if (name.startsWith("string")) {
-      base = "base_string_" + DocValuesNotIndexedTest.random().nextInt(1_000_000) + "_";
+      base = "base_string_" + random().nextInt(1_000_000) + "_";
     } else {
       throw new RuntimeException("Should have found a prefix for the field before now!");
     }
+    counter = 0;
   }
 
   FieldProps(String name, String type) {
@@ -496,7 +510,7 @@ class FieldProps {
 
   public String getValue(boolean incrementCounter) {
     if (incrementCounter) {
-      counter += DocValuesNotIndexedTest.random().nextInt(10) + 100;
+      counter += random().nextInt(10) + 10_000;
     }
     if (name.startsWith("int")) {
       return Integer.toString((int) base + counter);
@@ -511,7 +525,7 @@ class FieldProps {
       return Double.toString((double) base + counter);
     }
     if (name.startsWith("date")) {
-      return format.format(985_847_645 + (long) base + counter);
+      return Instant.ofEpochMilli(985_847_645 + (long) base + counter).toString();
     }
     if (name.startsWith("bool")) {
       String ret = Boolean.toString((boolean) base);

[37/50] [abbrv] lucene-solr:jira/solr-8593: LUCENE-7590: add DocValuesStatsCollector

Posted by kr...@apache.org.

LUCENE-7590: add DocValuesStatsCollector


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/ad7152ad
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/ad7152ad
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/ad7152ad

Branch: refs/heads/jira/solr-8593
Commit: ad7152ad4739a47aa2b45405ba1682b3dda18923
Parents: 9aa5b73
Author: Shai Erera <sh...@apache.org>
Authored: Sun Dec 11 12:49:50 2016 +0200
Committer: Shai Erera <sh...@apache.org>
Committed: Wed Dec 14 13:04:52 2016 +0200

----------------------------------------------------------------------
 .../apache/lucene/search/DocValuesStats.java    | 165 ++++++++++++++++++
 .../lucene/search/DocValuesStatsCollector.java  |  64 +++++++
 .../search/TestDocValuesStatsCollector.java     | 166 +++++++++++++++++++
 3 files changed, 395 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ad7152ad/lucene/misc/src/java/org/apache/lucene/search/DocValuesStats.java
----------------------------------------------------------------------
diff --git a/lucene/misc/src/java/org/apache/lucene/search/DocValuesStats.java b/lucene/misc/src/java/org/apache/lucene/search/DocValuesStats.java
new file mode 100644
index 0000000..fad9f97
--- /dev/null
+++ b/lucene/misc/src/java/org/apache/lucene/search/DocValuesStats.java
@@ -0,0 +1,165 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.search;
+
+import java.io.IOException;
+
+import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.index.NumericDocValues;
+
+/** Holds statistics for a DocValues field. */
+public abstract class DocValuesStats<T> {
+
+  private int missing = 0;
+  private int count = 0;
+
+  protected final String field;
+
+  protected T min;
+  protected T max;
+
+  protected DocValuesStats(String field, T initialMin, T initialMax) {
+    this.field = field;
+    this.min = initialMin;
+    this.max = initialMax;
+  }
+
+  /**
+   * Called after #{@link DocValuesStats#accumulate(int)} was processed and verified that the document has a value for
+   * the field. Implementations should update the statistics based on the value of the current document.
+   *
+   * @param count
+   *          the updated number of documents with value for this field.
+   */
+  protected abstract void doAccumulate(int count) throws IOException;
+
+  /**
+   * Initializes this object with the given reader context. Returns whether stats can be computed for this segment (i.e.
+   * it does have the requested DocValues field).
+   */
+  protected abstract boolean init(LeafReaderContext contxt) throws IOException;
+
+  /** Returns whether the given document has a value for the requested DocValues field. */
+  protected abstract boolean hasValue(int doc) throws IOException;
+
+  final void accumulate(int doc) throws IOException {
+    if (hasValue(doc)) {
+      ++count;
+      doAccumulate(count);
+    } else {
+      ++missing;
+    }
+  }
+
+  final void addMissing() {
+    ++missing;
+  }
+
+  /** The field for which these stats were computed. */
+  public final String field() {
+    return field;
+  }
+
+  /** The number of documents which have a value of the field. */
+  public final int count() {
+    return count;
+  }
+
+  /** The number of documents which do not have a value of the field. */
+  public final int missing() {
+    return missing;
+  }
+
+  /** The minimum value of the field. Undefined when {@link #count} is zero. */
+  public final T min() {
+    return min;
+  }
+
+  /** The maximum value of the field. Undefined when {@link #count} is zero. */
+  public final T max() {
+    return max;
+  }
+
+  /** Holds statistics for a numeric DocValues field. */
+  public static abstract class NumericDocValuesStats<T extends Number> extends DocValuesStats<T> {
+
+    protected double mean = 0.0;
+
+    protected NumericDocValues ndv;
+
+    protected NumericDocValuesStats(String field, T initialMin, T initialMax) {
+      super(field, initialMin, initialMax);
+    }
+
+    @Override
+    protected final boolean init(LeafReaderContext contxt) throws IOException {
+      ndv = contxt.reader().getNumericDocValues(field);
+      return ndv != null;
+    }
+
+    @Override
+    protected boolean hasValue(int doc) throws IOException {
+      return ndv.advanceExact(doc);
+    }
+
+    /** The mean of all values of the field. Undefined when {@link #count} is zero. */
+    public final double mean() {
+      return mean;
+    }
+  }
+
+  /** Holds DocValues statistics for a numeric field storing {@code long} values. */
+  public static final class LongDocValuesStats extends NumericDocValuesStats<Long> {
+
+    public LongDocValuesStats(String description) {
+      super(description, Long.MAX_VALUE, Long.MIN_VALUE);
+    }
+
+    @Override
+    protected void doAccumulate(int count) throws IOException {
+      long val = ndv.longValue();
+      if (val > max) {
+        max = val;
+      }
+      if (val < min) {
+        min = val;
+      }
+      mean += (val - mean) / count;
+    }
+  }
+
+  /** Holds DocValues statistics for a numeric field storing {@code double} values. */
+  public static final class DoubleDocValuesStats extends NumericDocValuesStats<Double> {
+
+    public DoubleDocValuesStats(String description) {
+      super(description, Double.MAX_VALUE, Double.MIN_VALUE);
+    }
+
+    @Override
+    protected void doAccumulate(int count) throws IOException {
+      double val = Double.longBitsToDouble(ndv.longValue());
+      if (Double.compare(val, max) > 0) {
+        max = val;
+      }
+      if (Double.compare(val, min) < 0) {
+        min = val;
+      }
+      mean += (val - mean) / count;
+    }
+  }
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ad7152ad/lucene/misc/src/java/org/apache/lucene/search/DocValuesStatsCollector.java
----------------------------------------------------------------------
diff --git a/lucene/misc/src/java/org/apache/lucene/search/DocValuesStatsCollector.java b/lucene/misc/src/java/org/apache/lucene/search/DocValuesStatsCollector.java
new file mode 100644
index 0000000..2b1fa4f
--- /dev/null
+++ b/lucene/misc/src/java/org/apache/lucene/search/DocValuesStatsCollector.java
@@ -0,0 +1,64 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.search;
+
+import java.io.IOException;
+
+import org.apache.lucene.index.LeafReaderContext;
+
+/** A {@link Collector} which computes statistics for a DocValues field. */
+public class DocValuesStatsCollector implements Collector {
+
+  private final DocValuesStats<?> stats;
+
+  /** Creates a collector to compute statistics for a DocValues field using the given {@code stats}. */
+  public DocValuesStatsCollector(DocValuesStats<?> stats) {
+    this.stats = stats;
+  }
+
+  @Override
+  public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException {
+    boolean shouldProcess = stats.init(context);
+    if (!shouldProcess) {
+      // Stats cannot be computed for this segment, therefore consider all matching documents as a 'miss'. 
+      return new LeafCollector() {
+        @Override public void setScorer(Scorer scorer) throws IOException {}
+
+        @Override
+        public void collect(int doc) throws IOException {
+          // All matching documents in this reader are missing a value
+          stats.addMissing();
+        }
+      };
+    }
+
+    return new LeafCollector() {
+      @Override public void setScorer(Scorer scorer) throws IOException {}
+
+      @Override
+      public void collect(int doc) throws IOException {
+        stats.accumulate(doc);
+      }
+    };
+  }
+
+  @Override
+  public boolean needsScores() {
+    return false;
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ad7152ad/lucene/misc/src/test/org/apache/lucene/search/TestDocValuesStatsCollector.java
----------------------------------------------------------------------
diff --git a/lucene/misc/src/test/org/apache/lucene/search/TestDocValuesStatsCollector.java b/lucene/misc/src/test/org/apache/lucene/search/TestDocValuesStatsCollector.java
new file mode 100644
index 0000000..65f82e6
--- /dev/null
+++ b/lucene/misc/src/test/org/apache/lucene/search/TestDocValuesStatsCollector.java
@@ -0,0 +1,166 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.search;
+
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.stream.DoubleStream;
+import java.util.stream.LongStream;
+
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.DoubleDocValuesField;
+import org.apache.lucene.document.Field.Store;
+import org.apache.lucene.document.NumericDocValuesField;
+import org.apache.lucene.document.StringField;
+import org.apache.lucene.index.DirectoryReader;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.DocValuesStats.DoubleDocValuesStats;
+import org.apache.lucene.search.DocValuesStats.LongDocValuesStats;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util.TestUtil;
+
+/** Unit tests for {@link DocValuesStatsCollector}. */
+public class TestDocValuesStatsCollector extends LuceneTestCase {
+
+  public void testNoDocsWithField() throws IOException {
+    try (Directory dir = newDirectory();
+        IndexWriter indexWriter = new IndexWriter(dir, newIndexWriterConfig())) {
+      int numDocs = TestUtil.nextInt(random(), 1, 100);
+      for (int i = 0; i < numDocs; i++) {
+        indexWriter.addDocument(new Document());
+      }
+
+      try (DirectoryReader reader = DirectoryReader.open(indexWriter)) {
+        IndexSearcher searcher = new IndexSearcher(reader);
+        LongDocValuesStats stats = new LongDocValuesStats("foo");
+        searcher.search(new MatchAllDocsQuery(), new DocValuesStatsCollector(stats));
+
+        assertEquals(0, stats.count());
+        assertEquals(numDocs, stats.missing());
+      }
+    }
+  }
+
+  public void testRandomDocsWithLongValues() throws IOException {
+    try (Directory dir = newDirectory();
+        IndexWriter indexWriter = new IndexWriter(dir, newIndexWriterConfig())) {
+      String field = "numeric";
+      int numDocs = TestUtil.nextInt(random(), 1, 100);
+      long[] docValues = new long[numDocs];
+      int nextVal = 1;
+      for (int i = 0; i < numDocs; i++) {
+        Document doc = new Document();
+        if (random().nextBoolean()) { // not all documents have a value
+          doc.add(new NumericDocValuesField(field, nextVal));
+          doc.add(new StringField("id", "doc" + i, Store.NO));
+          docValues[i] = nextVal;
+          ++nextVal;
+        }
+        indexWriter.addDocument(doc);
+      }
+
+      // 20% of cases delete some docs
+      if (random().nextDouble() < 0.2) {
+        for (int i = 0; i < numDocs; i++) {
+          if (random().nextBoolean()) {
+            indexWriter.deleteDocuments(new Term("id", "doc" + i));
+            docValues[i] = 0;
+          }
+        }
+      }
+
+      try (DirectoryReader reader = DirectoryReader.open(indexWriter)) {
+        IndexSearcher searcher = new IndexSearcher(reader);
+        LongDocValuesStats stats = new LongDocValuesStats(field);
+        searcher.search(new MatchAllDocsQuery(), new DocValuesStatsCollector(stats));
+
+        int expCount = (int) Arrays.stream(docValues).filter(v -> v > 0).count();
+        assertEquals(expCount, stats.count());
+        assertEquals(getZeroValues(docValues).count() - reader.numDeletedDocs(), stats.missing());
+        if (stats.count() > 0) {
+          assertEquals(getPositiveValues(docValues).max().getAsLong(), stats.max().longValue());
+          assertEquals(getPositiveValues(docValues).min().getAsLong(), stats.min().longValue());
+          assertEquals(getPositiveValues(docValues).average().getAsDouble(), stats.mean(), 0.00001);
+        }
+      }
+    }
+  }
+
+  public void testRandomDocsWithDoubleValues() throws IOException {
+    try (Directory dir = newDirectory();
+        IndexWriter indexWriter = new IndexWriter(dir, newIndexWriterConfig())) {
+      String field = "numeric";
+      int numDocs = TestUtil.nextInt(random(), 1, 100);
+      double[] docValues = new double[numDocs];
+      double nextVal = 1.0;
+      for (int i = 0; i < numDocs; i++) {
+        Document doc = new Document();
+        if (random().nextBoolean()) { // not all documents have a value
+          doc.add(new DoubleDocValuesField(field, nextVal));
+          doc.add(new StringField("id", "doc" + i, Store.NO));
+          docValues[i] = nextVal;
+          ++nextVal;
+        }
+        indexWriter.addDocument(doc);
+      }
+
+      // 20% of cases delete some docs
+      if (random().nextDouble() < 0.2) {
+        for (int i = 0; i < numDocs; i++) {
+          if (random().nextBoolean()) {
+            indexWriter.deleteDocuments(new Term("id", "doc" + i));
+            docValues[i] = 0;
+          }
+        }
+      }
+
+      try (DirectoryReader reader = DirectoryReader.open(indexWriter)) {
+        IndexSearcher searcher = new IndexSearcher(reader);
+        DoubleDocValuesStats stats = new DoubleDocValuesStats(field);
+        searcher.search(new MatchAllDocsQuery(), new DocValuesStatsCollector(stats));
+
+        int expCount = (int) Arrays.stream(docValues).filter(v -> v > 0).count();
+        assertEquals(expCount, stats.count());
+        assertEquals(getZeroValues(docValues).count() - reader.numDeletedDocs(), stats.missing());
+        if (stats.count() > 0) {
+          assertEquals(getPositiveValues(docValues).max().getAsDouble(), stats.max().doubleValue(), 0.00001);
+          assertEquals(getPositiveValues(docValues).min().getAsDouble(), stats.min().doubleValue(), 0.00001);
+          assertEquals(getPositiveValues(docValues).average().getAsDouble(), stats.mean(), 0.00001);
+        }
+      }
+    }
+  }
+
+  private static LongStream getPositiveValues(long[] docValues) {
+    return Arrays.stream(docValues).filter(v -> v > 0);
+  }
+
+  private static DoubleStream getPositiveValues(double[] docValues) {
+    return Arrays.stream(docValues).filter(v -> v > 0);
+  }
+
+  private static LongStream getZeroValues(long[] docValues) {
+    return Arrays.stream(docValues).filter(v -> v == 0);
+  }
+
+  private static DoubleStream getZeroValues(double[] docValues) {
+    return Arrays.stream(docValues).filter(v -> v == 0);
+  }
+
+}

[06/50] [abbrv] lucene-solr:jira/solr-8593: SOLR-9819: Add new line to the end of SHA

Posted by kr...@apache.org.

SOLR-9819: Add new line to the end of SHA


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/39c2f3d8
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/39c2f3d8
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/39c2f3d8

Branch: refs/heads/jira/solr-8593
Commit: 39c2f3d80fd585c7ae4a4a559d53a19a3f100061
Parents: 8cbcbc9
Author: Anshum Gupta <an...@apache.org>
Authored: Fri Dec 2 16:42:35 2016 -0800
Committer: Anshum Gupta <an...@apache.org>
Committed: Fri Dec 2 16:42:35 2016 -0800

----------------------------------------------------------------------
 solr/licenses/commons-fileupload-1.3.2.jar.sha1 | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/39c2f3d8/solr/licenses/commons-fileupload-1.3.2.jar.sha1
----------------------------------------------------------------------
diff --git a/solr/licenses/commons-fileupload-1.3.2.jar.sha1 b/solr/licenses/commons-fileupload-1.3.2.jar.sha1
index 747b509..80f80fb 100644
--- a/solr/licenses/commons-fileupload-1.3.2.jar.sha1
+++ b/solr/licenses/commons-fileupload-1.3.2.jar.sha1
@@ -1 +1 @@
-5d7491ed6ebd02b6a8d2305f8e6b7fe5dbd95f72
\ No newline at end of file
+5d7491ed6ebd02b6a8d2305f8e6b7fe5dbd95f72

[33/50] [abbrv] lucene-solr:jira/solr-8593: SOLR-9844: Improve FieldCache usage api response formatting and show total size information

Posted by kr...@apache.org.

SOLR-9844: Improve FieldCache usage api response formatting and show total size information


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/39ba1304
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/39ba1304
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/39ba1304

Branch: refs/heads/jira/solr-8593
Commit: 39ba13046bc48beaa139923d5f9fbf7d6fc192b2
Parents: 87d8b54
Author: Varun Thacker <va...@apache.org>
Authored: Mon Dec 12 12:38:14 2016 -0800
Committer: Varun Thacker <va...@apache.org>
Committed: Mon Dec 12 12:46:57 2016 -0800

----------------------------------------------------------------------
 .../org/apache/lucene/index/SegmentCoreReaders.java   |  8 ++++++++
 solr/CHANGES.txt                                      |  3 +++
 .../org/apache/solr/search/SolrFieldCacheMBean.java   |  2 ++
 .../java/org/apache/solr/uninverting/FieldCache.java  | 14 +++++---------
 .../solr/uninverting/FieldCacheSanityChecker.java     |  3 ++-
 .../apache/solr/uninverting/UninvertingReader.java    | 10 ++++++++++
 .../apache/solr/search/TestSolrFieldCacheMBean.java   |  1 +
 7 files changed, 31 insertions(+), 10 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/39ba1304/lucene/core/src/java/org/apache/lucene/index/SegmentCoreReaders.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/SegmentCoreReaders.java b/lucene/core/src/java/org/apache/lucene/index/SegmentCoreReaders.java
index e99c1ad..21ac4a1 100644
--- a/lucene/core/src/java/org/apache/lucene/index/SegmentCoreReaders.java
+++ b/lucene/core/src/java/org/apache/lucene/index/SegmentCoreReaders.java
@@ -56,6 +56,7 @@ final class SegmentCoreReaders {
   final TermVectorsReader termVectorsReaderOrig;
   final PointsReader pointsReader;
   final Directory cfsReader;
+  final String segment;
   /** 
    * fieldinfos for this core: means gen=-1.
    * this is the exact fieldinfos these codec components saw at write.
@@ -98,6 +99,8 @@ final class SegmentCoreReaders {
         cfsDir = dir;
       }
 
+      segment = si.info.name;
+
       coreFieldInfos = codec.fieldInfosFormat().read(cfsDir, si.info, "", context);
       
       final SegmentReadState segmentReadState = new SegmentReadState(cfsDir, si.info, coreFieldInfos, context);
@@ -192,4 +195,9 @@ final class SegmentCoreReaders {
   void removeCoreClosedListener(CoreClosedListener listener) {
     coreClosedListeners.remove(listener);
   }
+
+  @Override
+  public String toString() {
+    return "SegmentCoreReader(" + segment + ")";
+  }
 }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/39ba1304/solr/CHANGES.txt
----------------------------------------------------------------------
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 78f7f55..37cccae 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -164,6 +164,9 @@ New Features
 * SOLR-5043: New solr.dns.prevent.reverse.lookup system property that can be used to prevent long core
   (re)load delays on systems with missconfigured hostname/DNS (hossman)
 
+* SOLR-9844: FieldCache information fetched via the mbeans handler or seen via the UI now displays the total size used.
+  The individual cache entries in the response are now formatted better as well. (Varun Thacker)
+
 Optimizations
 ----------------------
 * SOLR-9704: Facet Module / JSON Facet API: Optimize blockChildren facets that have

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/39ba1304/solr/core/src/java/org/apache/solr/search/SolrFieldCacheMBean.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/search/SolrFieldCacheMBean.java b/solr/core/src/java/org/apache/solr/search/SolrFieldCacheMBean.java
index 62bc4fa..70781e9 100644
--- a/solr/core/src/java/org/apache/solr/search/SolrFieldCacheMBean.java
+++ b/solr/core/src/java/org/apache/solr/search/SolrFieldCacheMBean.java
@@ -64,11 +64,13 @@ public class SolrFieldCacheMBean implements JmxAugmentedSolrInfoMBean {
     if (listEntries) {
       String[] entries = UninvertingReader.getUninvertedStats();
       stats.add("entries_count", entries.length);
+      stats.add("total_size", UninvertingReader.getTotalSize());
       for (int i = 0; i < entries.length; i++) {
         stats.add("entry#" + i, entries[i]);
       }
     } else {
       stats.add("entries_count", UninvertingReader.getUninvertedStatsSize());
+      stats.add("total_size", UninvertingReader.getTotalSize());
     }
     return stats;
   }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/39ba1304/solr/core/src/java/org/apache/solr/uninverting/FieldCache.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/uninverting/FieldCache.java b/solr/core/src/java/org/apache/solr/uninverting/FieldCache.java
index 32f5615..544800e 100644
--- a/solr/core/src/java/org/apache/solr/uninverting/FieldCache.java
+++ b/solr/core/src/java/org/apache/solr/uninverting/FieldCache.java
@@ -384,7 +384,7 @@ public interface FieldCache {
       return custom;
     }
 
-    public Object getValue() {
+    public Accountable getValue() {
       return value;
     }
 
@@ -399,15 +399,11 @@ public interface FieldCache {
     
     @Override
     public String toString() {
-      StringBuilder b = new StringBuilder(250);
-      b.append("'").append(getReaderKey()).append("'=>");
-      b.append("'").append(getFieldName()).append("',");
-      b.append(getCacheType()).append(",").append(getCustom());
-      b.append("=>").append(getValue().getClass().getName()).append("#");
-      b.append(System.identityHashCode(getValue()));
-      
+      StringBuilder b = new StringBuilder(100);
+      b.append("segment='").append(getReaderKey().toString()).append("', ");
+      b.append("field='").append(getFieldName()).append("', ");
       String s = getEstimatedSize();
-      b.append(" (size =~ ").append(s).append(')');
+      b.append("size =~ ").append(s);
 
       return b.toString();
     }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/39ba1304/solr/core/src/java/org/apache/solr/uninverting/FieldCacheSanityChecker.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/uninverting/FieldCacheSanityChecker.java b/solr/core/src/java/org/apache/solr/uninverting/FieldCacheSanityChecker.java
index ec398f2..3d874ce 100644
--- a/solr/core/src/java/org/apache/solr/uninverting/FieldCacheSanityChecker.java
+++ b/solr/core/src/java/org/apache/solr/uninverting/FieldCacheSanityChecker.java
@@ -27,6 +27,7 @@ import java.util.Set;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.IndexReaderContext;
 import org.apache.lucene.store.AlreadyClosedException;
+import org.apache.lucene.util.Accountable;
 import org.apache.lucene.util.MapOfSets;
 import org.apache.solr.uninverting.FieldCache.CacheEntry;
 
@@ -103,7 +104,7 @@ final class FieldCacheSanityChecker {
     // iterate over all the cacheEntries to get the mappings we'll need
     for (int i = 0; i < cacheEntries.length; i++) {
       final CacheEntry item = cacheEntries[i];
-      final Object val = item.getValue();
+      final Accountable val = item.getValue();
 
       // It's OK to have dup entries, where one is eg
       // float[] and the other is the Bits (from

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/39ba1304/solr/core/src/java/org/apache/solr/uninverting/UninvertingReader.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/uninverting/UninvertingReader.java b/solr/core/src/java/org/apache/solr/uninverting/UninvertingReader.java
index 7825666..87fb7a6 100644
--- a/solr/core/src/java/org/apache/solr/uninverting/UninvertingReader.java
+++ b/solr/core/src/java/org/apache/solr/uninverting/UninvertingReader.java
@@ -37,6 +37,7 @@ import org.apache.lucene.index.LeafReader;
 import org.apache.lucene.index.NumericDocValues;
 import org.apache.lucene.index.SortedDocValues;
 import org.apache.lucene.index.SortedSetDocValues;
+import org.apache.lucene.util.RamUsageEstimator;
 import org.apache.solr.uninverting.FieldCache.CacheEntry;
 
 /**
@@ -386,4 +387,13 @@ public class UninvertingReader extends FilterLeafReader {
   public static int getUninvertedStatsSize() {
     return FieldCache.DEFAULT.getCacheEntries().length;
   }
+
+  public static String getTotalSize() {
+    CacheEntry[] entries = FieldCache.DEFAULT.getCacheEntries();
+    long totalBytesUsed = 0;
+    for (int i = 0; i < entries.length; i++) {
+      totalBytesUsed += entries[i].getValue().ramBytesUsed();
+    }
+    return RamUsageEstimator.humanReadableUnits(totalBytesUsed);
+  }
 }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/39ba1304/solr/core/src/test/org/apache/solr/search/TestSolrFieldCacheMBean.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/search/TestSolrFieldCacheMBean.java b/solr/core/src/test/org/apache/solr/search/TestSolrFieldCacheMBean.java
index 5343f73..a705e1e 100644
--- a/solr/core/src/test/org/apache/solr/search/TestSolrFieldCacheMBean.java
+++ b/solr/core/src/test/org/apache/solr/search/TestSolrFieldCacheMBean.java
@@ -76,6 +76,7 @@ public class TestSolrFieldCacheMBean extends SolrTestCaseJ4 {
     SolrFieldCacheMBean mbean = new SolrFieldCacheMBean();
     NamedList stats = checkJmx ? mbean.getStatisticsForJmx() : mbean.getStatistics();
     assert(new Integer(stats.get("entries_count").toString()) > 0);
+    assertNotNull(stats.get("total_size"));
     assertNull(stats.get("entry#0"));
   }
 }

[07/50] [abbrv] lucene-solr:jira/solr-8593: LUCENE-7563: use a compressed format for the in-heap BKD index

Posted by kr...@apache.org.

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/5e8db2e0/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java
index 5526624..c82a0c8 100644
--- a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java
@@ -33,6 +33,7 @@ import org.apache.lucene.store.ChecksumIndexInput;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.IOContext;
 import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.store.RAMOutputStream;
 import org.apache.lucene.store.TrackingDirectoryWrapper;
 import org.apache.lucene.util.ArrayUtil;
 import org.apache.lucene.util.BytesRef;
@@ -83,7 +84,8 @@ public class BKDWriter implements Closeable {
   public static final int VERSION_COMPRESSED_DOC_IDS = 1;
   public static final int VERSION_COMPRESSED_VALUES = 2;
   public static final int VERSION_IMPLICIT_SPLIT_DIM_1D = 3;
-  public static final int VERSION_CURRENT = VERSION_IMPLICIT_SPLIT_DIM_1D;
+  public static final int VERSION_PACKED_INDEX = 4;
+  public static final int VERSION_CURRENT = VERSION_PACKED_INDEX;
 
   /** How many bytes each docs takes in the fixed-width offline format */
   private final int bytesPerDoc;
@@ -325,15 +327,10 @@ public class BKDWriter implements Closeable {
                                            bkd.numDims,
                                            bkd.packedBytesLength,
                                            bkd.maxPointsInLeafNode,
+                                           null,
                                            null);
       this.docMap = docMap;
-      long minFP = Long.MAX_VALUE;
-      //System.out.println("MR.init " + this + " bkdreader=" + bkd + " leafBlockFPs.length=" + bkd.leafBlockFPs.length);
-      for(long fp : bkd.leafBlockFPs) {
-        minFP = Math.min(minFP, fp);
-        //System.out.println("  leaf fp=" + fp);
-      }
-      state.in.seek(minFP);
+      state.in.seek(bkd.getMinLeafBlockFP());
       this.packedValues = new byte[bkd.maxPointsInLeafNode * bkd.packedBytesLength];
     }
 
@@ -341,7 +338,7 @@ public class BKDWriter implements Closeable {
       //System.out.println("MR.next this=" + this);
       while (true) {
         if (docBlockUpto == docsInBlock) {
-          if (blockID == bkd.leafBlockFPs.length) {
+          if (blockID == bkd.leafNodeOffset) {
             //System.out.println("  done!");
             return false;
           }
@@ -489,7 +486,6 @@ public class BKDWriter implements Closeable {
     return indexFP;
   }
 
-
   /* In the 1D case, we can simply sort points in ascending order and use the
    * same writing logic as we use at merge time. */
   private long writeField1Dim(IndexOutput out, String fieldName, MutablePointValues reader) throws IOException {
@@ -648,6 +644,7 @@ public class BKDWriter implements Closeable {
     }
 
     private void writeLeafBlock() throws IOException {
+      //System.out.println("writeLeafBlock pos=" + out.getFilePointer());
       assert leafCount != 0;
       if (valueCount == 0) {
         System.arraycopy(leafValues, 0, minPackedValue, 0, packedBytesLength);
@@ -811,6 +808,24 @@ public class BKDWriter implements Closeable {
     }.sort(0, pointCount);
   }
 
+  // useful for debugging:
+  /*
+  private void printPathSlice(String desc, PathSlice slice, int dim) throws IOException {
+    System.out.println("    " + desc + " dim=" + dim + " count=" + slice.count + ":");    
+    try(PointReader r = slice.writer.getReader(slice.start, slice.count)) {
+      int count = 0;
+      while (r.next()) {
+        byte[] v = r.packedValue();
+        System.out.println("      " + count + ": " + new BytesRef(v, dim*bytesPerDim, bytesPerDim));
+        count++;
+        if (count == slice.count) {
+          break;
+        }
+      }
+    }
+  }
+  */
+
   private PointWriter sort(int dim) throws IOException {
     assert dim >= 0 && dim < numDims;
 
@@ -1019,46 +1034,238 @@ public class BKDWriter implements Closeable {
     return indexFP;
   }
 
-  /** Subclass can change how it writes the index. */
-  protected void writeIndex(IndexOutput out, long[] leafBlockFPs, byte[] splitPackedValues) throws IOException {
+  /** Packs the two arrays, representing a balanced binary tree, into a compact byte[] structure. */
+  private byte[] packIndex(long[] leafBlockFPs, byte[] splitPackedValues) throws IOException {
+
+    int numLeaves = leafBlockFPs.length;
+
+    // Possibly rotate the leaf block FPs, if the index not fully balanced binary tree (only happens
+    // if it was created by OneDimensionBKDWriter).  In this case the leaf nodes may straddle the two bottom
+    // levels of the binary tree:
+    if (numDims == 1 && numLeaves > 1) {
+      int levelCount = 2;
+      while (true) {
+        if (numLeaves >= levelCount && numLeaves <= 2*levelCount) {
+          int lastLevel = 2*(numLeaves - levelCount);
+          assert lastLevel >= 0;
+          if (lastLevel != 0) {
+            // Last level is partially filled, so we must rotate the leaf FPs to match.  We do this here, after loading
+            // at read-time, so that we can still delta code them on disk at write:
+            long[] newLeafBlockFPs = new long[numLeaves];
+            System.arraycopy(leafBlockFPs, lastLevel, newLeafBlockFPs, 0, leafBlockFPs.length - lastLevel);
+            System.arraycopy(leafBlockFPs, 0, newLeafBlockFPs, leafBlockFPs.length - lastLevel, lastLevel);
+            leafBlockFPs = newLeafBlockFPs;
+          }
+          break;
+        }
+
+        levelCount *= 2;
+      }
+    }
+
+    /** Reused while packing the index */
+    RAMOutputStream writeBuffer = new RAMOutputStream();
+
+    // This is the "file" we append the byte[] to:
+    List<byte[]> blocks = new ArrayList<>();
+    byte[] lastSplitValues = new byte[bytesPerDim * numDims];
+    //System.out.println("\npack index");
+    int totalSize = recursePackIndex(writeBuffer, leafBlockFPs, splitPackedValues, 0l, blocks, 1, lastSplitValues, new boolean[numDims], false);
+
+    // Compact the byte[] blocks into single byte index:
+    byte[] index = new byte[totalSize];
+    int upto = 0;
+    for(byte[] block : blocks) {
+      System.arraycopy(block, 0, index, upto, block.length);
+      upto += block.length;
+    }
+    assert upto == totalSize;
+
+    return index;
+  }
+
+  /** Appends the current contents of writeBuffer as another block on the growing in-memory file */
+  private int appendBlock(RAMOutputStream writeBuffer, List<byte[]> blocks) throws IOException {
+    int pos = Math.toIntExact(writeBuffer.getFilePointer());
+    byte[] bytes = new byte[pos];
+    writeBuffer.writeTo(bytes, 0);
+    writeBuffer.reset();
+    blocks.add(bytes);
+    return pos;
+  }
+
+  /**
+   * lastSplitValues is per-dimension split value previously seen; we use this to prefix-code the split byte[] on each inner node
+   */
+  private int recursePackIndex(RAMOutputStream writeBuffer, long[] leafBlockFPs, byte[] splitPackedValues, long minBlockFP, List<byte[]> blocks,
+                               int nodeID, byte[] lastSplitValues, boolean[] negativeDeltas, boolean isLeft) throws IOException {
+    if (nodeID >= leafBlockFPs.length) {
+      int leafID = nodeID - leafBlockFPs.length;
+      //System.out.println("recursePack leaf nodeID=" + nodeID);
+
+      // In the unbalanced case it's possible the left most node only has one child:
+      if (leafID < leafBlockFPs.length) {
+        long delta = leafBlockFPs[leafID] - minBlockFP;
+        if (isLeft) {
+          assert delta == 0;
+          return 0;
+        } else {
+          assert nodeID == 1 || delta > 0: "nodeID=" + nodeID;
+          writeBuffer.writeVLong(delta);
+          return appendBlock(writeBuffer, blocks);
+        }
+      } else {
+        return 0;
+      }
+    } else {
+      long leftBlockFP;
+      if (isLeft == false) {
+        leftBlockFP = getLeftMostLeafBlockFP(leafBlockFPs, nodeID);
+        long delta = leftBlockFP - minBlockFP;
+        assert nodeID == 1 || delta > 0;
+        writeBuffer.writeVLong(delta);
+      } else {
+        // The left tree's left most leaf block FP is always the minimal FP:
+        leftBlockFP = minBlockFP;
+      }
+
+      int address = nodeID * (1+bytesPerDim);
+      int splitDim = splitPackedValues[address++] & 0xff;
+
+      //System.out.println("recursePack inner nodeID=" + nodeID + " splitDim=" + splitDim + " splitValue=" + new BytesRef(splitPackedValues, address, bytesPerDim));
+
+      // find common prefix with last split value in this dim:
+      int prefix = 0;
+      for(;prefix<bytesPerDim;prefix++) {
+        if (splitPackedValues[address+prefix] != lastSplitValues[splitDim * bytesPerDim + prefix]) {
+          break;
+        }
+      }
+
+      //System.out.println("writeNodeData nodeID=" + nodeID + " splitDim=" + splitDim + " numDims=" + numDims + " bytesPerDim=" + bytesPerDim + " prefix=" + prefix);
+
+      int firstDiffByteDelta;
+      if (prefix < bytesPerDim) {
+        //System.out.println("  delta byte cur=" + Integer.toHexString(splitPackedValues[address+prefix]&0xFF) + " prev=" + Integer.toHexString(lastSplitValues[splitDim * bytesPerDim + prefix]&0xFF) + " negated?=" + negativeDeltas[splitDim]);
+        firstDiffByteDelta = (splitPackedValues[address+prefix]&0xFF) - (lastSplitValues[splitDim * bytesPerDim + prefix]&0xFF);
+        if (negativeDeltas[splitDim]) {
+          firstDiffByteDelta = -firstDiffByteDelta;
+        }
+        //System.out.println("  delta=" + firstDiffByteDelta);
+        assert firstDiffByteDelta > 0;
+      } else {
+        firstDiffByteDelta = 0;
+      }
+
+      // pack the prefix, splitDim and delta first diff byte into a single vInt:
+      int code = (firstDiffByteDelta * (1+bytesPerDim) + prefix) * numDims + splitDim;
+
+      //System.out.println("  code=" + code);
+      //System.out.println("  splitValue=" + new BytesRef(splitPackedValues, address, bytesPerDim));
+
+      writeBuffer.writeVInt(code);
+
+      // write the split value, prefix coded vs. our parent's split value:
+      int suffix = bytesPerDim - prefix;
+      byte[] savSplitValue = new byte[suffix];
+      if (suffix > 1) {
+        writeBuffer.writeBytes(splitPackedValues, address+prefix+1, suffix-1);
+      }
+
+      byte[] cmp = lastSplitValues.clone();
+
+      System.arraycopy(lastSplitValues, splitDim * bytesPerDim + prefix, savSplitValue, 0, suffix);
+
+      // copy our split value into lastSplitValues for our children to prefix-code against
+      System.arraycopy(splitPackedValues, address+prefix, lastSplitValues, splitDim * bytesPerDim + prefix, suffix);
+
+      int numBytes = appendBlock(writeBuffer, blocks);
+
+      // placeholder for left-tree numBytes; we need this so that at search time if we only need to recurse into the right sub-tree we can
+      // quickly seek to its starting point
+      int idxSav = blocks.size();
+      blocks.add(null);
+
+      boolean savNegativeDelta = negativeDeltas[splitDim];
+      negativeDeltas[splitDim] = true;
+
+      int leftNumBytes = recursePackIndex(writeBuffer, leafBlockFPs, splitPackedValues, leftBlockFP, blocks, 2*nodeID, lastSplitValues, negativeDeltas, true);
+
+      if (nodeID * 2 < leafBlockFPs.length) {
+        writeBuffer.writeVInt(leftNumBytes);
+      } else {
+        assert leftNumBytes == 0: "leftNumBytes=" + leftNumBytes;
+      }
+      int numBytes2 = Math.toIntExact(writeBuffer.getFilePointer());
+      byte[] bytes2 = new byte[numBytes2];
+      writeBuffer.writeTo(bytes2, 0);
+      writeBuffer.reset();
+      // replace our placeholder:
+      blocks.set(idxSav, bytes2);
+
+      negativeDeltas[splitDim] = false;
+      int rightNumBytes = recursePackIndex(writeBuffer, leafBlockFPs, splitPackedValues, leftBlockFP, blocks, 2*nodeID+1, lastSplitValues, negativeDeltas, false);
+
+      negativeDeltas[splitDim] = savNegativeDelta;
+
+      // restore lastSplitValues to what caller originally passed us:
+      System.arraycopy(savSplitValue, 0, lastSplitValues, splitDim * bytesPerDim + prefix, suffix);
+
+      assert Arrays.equals(lastSplitValues, cmp);
+      
+      return numBytes + numBytes2 + leftNumBytes + rightNumBytes;
+    }
+  }
+
+  private long getLeftMostLeafBlockFP(long[] leafBlockFPs, int nodeID) {
+    int nodeIDIn = nodeID;
+    // TODO: can we do this cheaper, e.g. a closed form solution instead of while loop?  Or
+    // change the recursion while packing the index to return this left-most leaf block FP
+    // from each recursion instead?
+    //
+    // Still, the overall cost here is minor: this method's cost is O(log(N)), and while writing
+    // we call it O(N) times (N = number of leaf blocks)
+    while (nodeID < leafBlockFPs.length) {
+      nodeID *= 2;
+    }
+    int leafID = nodeID - leafBlockFPs.length;
+    long result = leafBlockFPs[leafID];
+    if (result < 0) {
+      throw new AssertionError(result + " for leaf " + leafID);
+    }
+    return result;
+  }
+
+  private void writeIndex(IndexOutput out, long[] leafBlockFPs, byte[] splitPackedValues) throws IOException {
+    byte[] packedIndex = packIndex(leafBlockFPs, splitPackedValues);
+    writeIndex(out, leafBlockFPs.length, packedIndex);
+  }
+  
+  private void writeIndex(IndexOutput out, int numLeaves, byte[] packedIndex) throws IOException {
+    
     CodecUtil.writeHeader(out, CODEC_NAME, VERSION_CURRENT);
     out.writeVInt(numDims);
     out.writeVInt(maxPointsInLeafNode);
     out.writeVInt(bytesPerDim);
 
-    assert leafBlockFPs.length > 0;
-    out.writeVInt(leafBlockFPs.length);
+    assert numLeaves > 0;
+    out.writeVInt(numLeaves);
     out.writeBytes(minPackedValue, 0, packedBytesLength);
     out.writeBytes(maxPackedValue, 0, packedBytesLength);
 
     out.writeVLong(pointCount);
     out.writeVInt(docsSeen.cardinality());
-
-    // NOTE: splitPackedValues[0] is unused, because nodeID is 1-based:
-    if (numDims == 1) {
-      // write the index, skipping the byte used to store the split dim since it is always 0
-      for (int i = 1; i < splitPackedValues.length; i += 1 + bytesPerDim) {
-        out.writeBytes(splitPackedValues, i, bytesPerDim);
-      }
-    } else {
-      out.writeBytes(splitPackedValues, 0, splitPackedValues.length);
-    }
-
-    long lastFP = 0;
-    for (int i=0;i<leafBlockFPs.length;i++) {
-      long delta = leafBlockFPs[i]-lastFP;
-      out.writeVLong(delta);
-      lastFP = leafBlockFPs[i];
-    }
+    out.writeVInt(packedIndex.length);
+    out.writeBytes(packedIndex, 0, packedIndex.length);
   }
 
-  protected void writeLeafBlockDocs(IndexOutput out, int[] docIDs, int start, int count) throws IOException {
+  private void writeLeafBlockDocs(IndexOutput out, int[] docIDs, int start, int count) throws IOException {
     assert count > 0: "maxPointsInLeafNode=" + maxPointsInLeafNode;
     out.writeVInt(count);
     DocIdsWriter.writeDocIds(docIDs, start, count, out);
   }
 
-  protected void writeLeafBlockPackedValues(IndexOutput out, int[] commonPrefixLengths, int count, int sortedDim, IntFunction<BytesRef> packedValues) throws IOException {
+  private void writeLeafBlockPackedValues(IndexOutput out, int[] commonPrefixLengths, int count, int sortedDim, IntFunction<BytesRef> packedValues) throws IOException {
     int prefixLenSum = Arrays.stream(commonPrefixLengths).sum();
     if (prefixLenSum == packedBytesLength) {
       // all values in this block are equal
@@ -1109,7 +1316,7 @@ public class BKDWriter implements Closeable {
     return end - start;
   }
 
-  protected void writeCommonPrefixes(IndexOutput out, int[] commonPrefixes, byte[] packedValue) throws IOException {
+  private void writeCommonPrefixes(IndexOutput out, int[] commonPrefixes, byte[] packedValue) throws IOException {
     for(int dim=0;dim<numDims;dim++) {
       out.writeVInt(commonPrefixes[dim]);
       //System.out.println(commonPrefixes[dim] + " of " + bytesPerDim);
@@ -1177,7 +1384,7 @@ public class BKDWriter implements Closeable {
     // TODO: find a way to also checksum this reader?  If we changed to markLeftTree, and scanned the final chunk, it could work?
     try (PointReader reader = source.writer.getReader(source.start + source.count - rightCount, rightCount)) {
       boolean result = reader.next();
-      assert result;
+      assert result: "rightCount=" + rightCount + " source.count=" + source.count + " source.writer=" + source.writer;
       System.arraycopy(reader.packedValue(), splitDim*bytesPerDim, scratch1, 0, bytesPerDim);
       if (numDims > 1) {
         assert ordBitSet.get(reader.ord()) == false;
@@ -1244,12 +1451,12 @@ public class BKDWriter implements Closeable {
 
   /* Recursively reorders the provided reader and writes the bkd-tree on the fly. */
   private void build(int nodeID, int leafNodeOffset,
-      MutablePointValues reader, int from, int to,
-      IndexOutput out,
-      byte[] minPackedValue, byte[] maxPackedValue,
-      byte[] splitPackedValues,
-      long[] leafBlockFPs,
-      int[] spareDocIds) throws IOException {
+                     MutablePointValues reader, int from, int to,
+                     IndexOutput out,
+                     byte[] minPackedValue, byte[] maxPackedValue,
+                     byte[] splitPackedValues,
+                     long[] leafBlockFPs,
+                     int[] spareDocIds) throws IOException {
 
     if (nodeID >= leafNodeOffset) {
       // leaf node
@@ -1311,6 +1518,7 @@ public class BKDWriter implements Closeable {
       for (int i = from; i < to; ++i) {
         docIDs[i - from] = reader.getDocID(i);
       }
+      //System.out.println("writeLeafBlock pos=" + out.getFilePointer());
       writeLeafBlockDocs(out, docIDs, 0, count);
 
       // Write the common prefixes:
@@ -1344,6 +1552,7 @@ public class BKDWriter implements Closeable {
           break;
         }
       }
+
       MutablePointsReaderUtils.partition(maxDoc, splitDim, bytesPerDim, commonPrefixLen,
           reader, from, to, mid, scratchBytesRef1, scratchBytesRef2);
 
@@ -1381,7 +1590,7 @@ public class BKDWriter implements Closeable {
     for(PathSlice slice : slices) {
       assert slice.count == slices[0].count;
     }
-
+    
     if (numDims == 1 && slices[0].writer instanceof OfflinePointWriter && slices[0].count <= maxPointsSortInHeap) {
       // Special case for 1D, to cutover to heap once we recurse deeply enough:
       slices[0] = switchToHeap(slices[0], toCloseHeroically);

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/5e8db2e0/lucene/core/src/java/org/apache/lucene/util/bkd/HeapPointReader.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/HeapPointReader.java b/lucene/core/src/java/org/apache/lucene/util/bkd/HeapPointReader.java
index 0cd4bd2..99182cb 100644
--- a/lucene/core/src/java/org/apache/lucene/util/bkd/HeapPointReader.java
+++ b/lucene/core/src/java/org/apache/lucene/util/bkd/HeapPointReader.java
@@ -18,7 +18,10 @@ package org.apache.lucene.util.bkd;
 
 import java.util.List;
 
-final class HeapPointReader extends PointReader {
+/** Utility class to read buffered points from in-heap arrays.
+ *
+ * @lucene.internal */
+public final class HeapPointReader extends PointReader {
   private int curRead;
   final List<byte[]> blocks;
   final int valuesPerBlock;
@@ -30,7 +33,7 @@ final class HeapPointReader extends PointReader {
   final byte[] scratch;
   final boolean singleValuePerDoc;
 
-  HeapPointReader(List<byte[]> blocks, int valuesPerBlock, int packedBytesLength, int[] ords, long[] ordsLong, int[] docIDs, int start, int end, boolean singleValuePerDoc) {
+  public HeapPointReader(List<byte[]> blocks, int valuesPerBlock, int packedBytesLength, int[] ords, long[] ordsLong, int[] docIDs, int start, int end, boolean singleValuePerDoc) {
     this.blocks = blocks;
     this.valuesPerBlock = valuesPerBlock;
     this.singleValuePerDoc = singleValuePerDoc;

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/5e8db2e0/lucene/core/src/java/org/apache/lucene/util/bkd/HeapPointWriter.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/HeapPointWriter.java b/lucene/core/src/java/org/apache/lucene/util/bkd/HeapPointWriter.java
index 24d248b..e102651 100644
--- a/lucene/core/src/java/org/apache/lucene/util/bkd/HeapPointWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/util/bkd/HeapPointWriter.java
@@ -24,18 +24,21 @@ import java.util.List;
 import org.apache.lucene.util.ArrayUtil;
 import org.apache.lucene.util.BytesRef;
 
-final class HeapPointWriter implements PointWriter {
-  int[] docIDs;
-  long[] ordsLong;
-  int[] ords;
+/** Utility class to write new points into in-heap arrays.
+ *
+ *  @lucene.internal */
+public final class HeapPointWriter implements PointWriter {
+  public int[] docIDs;
+  public long[] ordsLong;
+  public int[] ords;
   private int nextWrite;
   private boolean closed;
   final int maxSize;
-  final int valuesPerBlock;
+  public final int valuesPerBlock;
   final int packedBytesLength;
   final boolean singleValuePerDoc;
   // NOTE: can't use ByteBlockPool because we need random-write access when sorting in heap
-  final List<byte[]> blocks = new ArrayList<>();
+  public final List<byte[]> blocks = new ArrayList<>();
 
   public HeapPointWriter(int initSize, int maxSize, int packedBytesLength, boolean longOrds, boolean singleValuePerDoc) {
     docIDs = new int[initSize];
@@ -77,7 +80,7 @@ final class HeapPointWriter implements PointWriter {
     nextWrite = other.nextWrite;
   }
 
-  void readPackedValue(int index, byte[] bytes) {
+  public void readPackedValue(int index, byte[] bytes) {
     assert bytes.length == packedBytesLength;
     int block = index / valuesPerBlock;
     int blockIndex = index % valuesPerBlock;
@@ -85,7 +88,7 @@ final class HeapPointWriter implements PointWriter {
   }
 
   /** Returns a reference, in <code>result</code>, to the byte[] slice holding this value */
-  void getPackedValueSlice(int index, BytesRef result) {
+  public void getPackedValueSlice(int index, BytesRef result) {
     int block = index / valuesPerBlock;
     int blockIndex = index % valuesPerBlock;
     result.bytes = blocks.get(block);
@@ -138,7 +141,8 @@ final class HeapPointWriter implements PointWriter {
   @Override
   public PointReader getReader(long start, long length) {
     assert start + length <= docIDs.length: "start=" + start + " length=" + length + " docIDs.length=" + docIDs.length;
-    return new HeapPointReader(blocks, valuesPerBlock, packedBytesLength, ords, ordsLong, docIDs, (int) start, nextWrite, singleValuePerDoc);
+    assert start + length <= nextWrite: "start=" + start + " length=" + length + " nextWrite=" + nextWrite;
+    return new HeapPointReader(blocks, valuesPerBlock, packedBytesLength, ords, ordsLong, docIDs, (int) start, Math.toIntExact(start+length), singleValuePerDoc);
   }
 
   @Override

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/5e8db2e0/lucene/core/src/java/org/apache/lucene/util/bkd/MutablePointsReaderUtils.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/MutablePointsReaderUtils.java b/lucene/core/src/java/org/apache/lucene/util/bkd/MutablePointsReaderUtils.java
index 132ad3c..1c68478 100644
--- a/lucene/core/src/java/org/apache/lucene/util/bkd/MutablePointsReaderUtils.java
+++ b/lucene/core/src/java/org/apache/lucene/util/bkd/MutablePointsReaderUtils.java
@@ -26,13 +26,16 @@ import org.apache.lucene.util.Selector;
 import org.apache.lucene.util.StringHelper;
 import org.apache.lucene.util.packed.PackedInts;
 
-final class MutablePointsReaderUtils {
+/** Utility APIs for sorting and partitioning buffered points.
+ *
+ * @lucene.internal */
+public final class MutablePointsReaderUtils {
 
   MutablePointsReaderUtils() {}
 
   /** Sort the given {@link MutablePointValues} based on its packed value then doc ID. */
-  static void sort(int maxDoc, int packedBytesLength,
-      MutablePointValues reader, int from, int to) {
+  public static void sort(int maxDoc, int packedBytesLength,
+                          MutablePointValues reader, int from, int to) {
     final int bitsPerDocId = PackedInts.bitsRequired(maxDoc - 1);
     new MSBRadixSorter(packedBytesLength + (bitsPerDocId + 7) / 8) {
 
@@ -88,9 +91,9 @@ final class MutablePointsReaderUtils {
   }
 
   /** Sort points on the given dimension. */
-  static void sortByDim(int sortedDim, int bytesPerDim, int[] commonPrefixLengths,
-      MutablePointValues reader, int from, int to,
-      BytesRef scratch1, BytesRef scratch2) {
+  public static void sortByDim(int sortedDim, int bytesPerDim, int[] commonPrefixLengths,
+                               MutablePointValues reader, int from, int to,
+                               BytesRef scratch1, BytesRef scratch2) {
 
     // No need for a fancy radix sort here, this is called on the leaves only so
     // there are not many values to sort
@@ -127,9 +130,9 @@ final class MutablePointsReaderUtils {
   /** Partition points around {@code mid}. All values on the left must be less
    *  than or equal to it and all values on the right must be greater than or
    *  equal to it. */
-  static void partition(int maxDoc, int splitDim, int bytesPerDim, int commonPrefixLen,
-      MutablePointValues reader, int from, int to, int mid,
-      BytesRef scratch1, BytesRef scratch2) {
+  public static void partition(int maxDoc, int splitDim, int bytesPerDim, int commonPrefixLen,
+                               MutablePointValues reader, int from, int to, int mid,
+                               BytesRef scratch1, BytesRef scratch2) {
     final int offset = splitDim * bytesPerDim + commonPrefixLen;
     final int cmpBytes = bytesPerDim - commonPrefixLen;
     final int bitsPerDocId = PackedInts.bitsRequired(maxDoc - 1);

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/5e8db2e0/lucene/core/src/java/org/apache/lucene/util/bkd/OfflinePointReader.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/OfflinePointReader.java b/lucene/core/src/java/org/apache/lucene/util/bkd/OfflinePointReader.java
index 17758c0..2861d59 100644
--- a/lucene/core/src/java/org/apache/lucene/util/bkd/OfflinePointReader.java
+++ b/lucene/core/src/java/org/apache/lucene/util/bkd/OfflinePointReader.java
@@ -27,8 +27,10 @@ import org.apache.lucene.store.IndexInput;
 import org.apache.lucene.store.IndexOutput;
 import org.apache.lucene.util.LongBitSet;
 
-/** Reads points from disk in a fixed-with format, previously written with {@link OfflinePointWriter}. */
-final class OfflinePointReader extends PointReader {
+/** Reads points from disk in a fixed-with format, previously written with {@link OfflinePointWriter}.
+ *
+ * @lucene.internal */
+public final class OfflinePointReader extends PointReader {
   long countLeft;
   final IndexInput in;
   private final byte[] packedValue;
@@ -43,7 +45,7 @@ final class OfflinePointReader extends PointReader {
   // File name we are reading
   final String name;
 
-  OfflinePointReader(Directory tempDir, String tempFileName, int packedBytesLength, long start, long length,
+  public OfflinePointReader(Directory tempDir, String tempFileName, int packedBytesLength, long start, long length,
                      boolean longOrds, boolean singleValuePerDoc) throws IOException {
     this.singleValuePerDoc = singleValuePerDoc;
     int bytesPerDoc = packedBytesLength + Integer.BYTES;

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/5e8db2e0/lucene/core/src/java/org/apache/lucene/util/bkd/OfflinePointWriter.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/OfflinePointWriter.java b/lucene/core/src/java/org/apache/lucene/util/bkd/OfflinePointWriter.java
index 87637ae..7e615a6 100644
--- a/lucene/core/src/java/org/apache/lucene/util/bkd/OfflinePointWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/util/bkd/OfflinePointWriter.java
@@ -26,12 +26,14 @@ import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.IOContext;
 import org.apache.lucene.store.IndexOutput;
 
-/** Writes points to disk in a fixed-with format. */
-final class OfflinePointWriter implements PointWriter {
+/** Writes points to disk in a fixed-with format.
+ *
+ * @lucene.internal */
+public final class OfflinePointWriter implements PointWriter {
 
   final Directory tempDir;
-  final IndexOutput out;
-  final String name;
+  public final IndexOutput out;
+  public final String name;
   final int packedBytesLength;
   final boolean singleValuePerDoc;
   long count;

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/5e8db2e0/lucene/core/src/java/org/apache/lucene/util/bkd/PointReader.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/PointReader.java b/lucene/core/src/java/org/apache/lucene/util/bkd/PointReader.java
index 90de0d1..0c31275 100644
--- a/lucene/core/src/java/org/apache/lucene/util/bkd/PointReader.java
+++ b/lucene/core/src/java/org/apache/lucene/util/bkd/PointReader.java
@@ -24,20 +24,22 @@ import org.apache.lucene.util.LongBitSet;
 
 /** One pass iterator through all points previously written with a
  *  {@link PointWriter}, abstracting away whether points a read
- *  from (offline) disk or simple arrays in heap. */
-abstract class PointReader implements Closeable {
+ *  from (offline) disk or simple arrays in heap.
+ *
+ * @lucene.internal */
+public abstract class PointReader implements Closeable {
 
   /** Returns false once iteration is done, else true. */
-  abstract boolean next() throws IOException;
+  public abstract boolean next() throws IOException;
 
   /** Returns the packed byte[] value */
-  abstract byte[] packedValue();
+  public abstract byte[] packedValue();
 
   /** Point ordinal */
-  abstract long ord();
+  public abstract long ord();
 
   /** DocID for this point */
-  abstract int docID();
+  public abstract int docID();
 
   /** Iterates through the next {@code count} ords, marking them in the provided {@code ordBitSet}. */
   public void markOrds(long count, LongBitSet ordBitSet) throws IOException {

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/5e8db2e0/lucene/core/src/java/org/apache/lucene/util/bkd/PointWriter.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/PointWriter.java b/lucene/core/src/java/org/apache/lucene/util/bkd/PointWriter.java
index d19f6e5..0222d0e 100644
--- a/lucene/core/src/java/org/apache/lucene/util/bkd/PointWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/util/bkd/PointWriter.java
@@ -23,8 +23,10 @@ import java.util.List;
 
 /** Appends many points, and then at the end provides a {@link PointReader} to iterate
  *  those points.  This abstracts away whether we write to disk, or use simple arrays
- *  in heap. */
-interface PointWriter extends Closeable {
+ *  in heap.
+ *
+ *  @lucene.internal */
+public interface PointWriter extends Closeable {
   /** Add a new point */
   void append(byte[] packedValue, long ord, int docID) throws IOException;
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/5e8db2e0/lucene/core/src/test/org/apache/lucene/search/TestPointQueries.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/test/org/apache/lucene/search/TestPointQueries.java b/lucene/core/src/test/org/apache/lucene/search/TestPointQueries.java
index 5ad71bf..73b2813 100644
--- a/lucene/core/src/test/org/apache/lucene/search/TestPointQueries.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestPointQueries.java
@@ -621,6 +621,9 @@ public class TestPointQueries extends LuceneTestCase {
     int numDims = TestUtil.nextInt(random(), 1, PointValues.MAX_DIMENSIONS);
 
     int sameValuePct = random().nextInt(100);
+    if (VERBOSE) {
+      System.out.println("TEST: sameValuePct=" + sameValuePct);
+    }
 
     byte[][][] docValues = new byte[numValues][][];
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/5e8db2e0/lucene/core/src/test/org/apache/lucene/util/bkd/Test2BBKDPoints.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/test/org/apache/lucene/util/bkd/Test2BBKDPoints.java b/lucene/core/src/test/org/apache/lucene/util/bkd/Test2BBKDPoints.java
index af2e463..e30168c 100644
--- a/lucene/core/src/test/org/apache/lucene/util/bkd/Test2BBKDPoints.java
+++ b/lucene/core/src/test/org/apache/lucene/util/bkd/Test2BBKDPoints.java
@@ -16,6 +16,7 @@
  */
 package org.apache.lucene.util.bkd;
 
+import org.apache.lucene.index.CheckIndex;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.FSDirectory;
 import org.apache.lucene.store.IOContext;
@@ -64,7 +65,10 @@ public class Test2BBKDPoints extends LuceneTestCase {
     IndexInput in = dir.openInput("1d.bkd", IOContext.DEFAULT);
     in.seek(indexFP);
     BKDReader r = new BKDReader(in);
-    r.verify(numDocs);
+    CheckIndex.VerifyPointsVisitor visitor = new CheckIndex.VerifyPointsVisitor("1d", numDocs, r);
+    r.intersect(visitor);
+    assertEquals(r.size(), visitor.getPointCountSeen());
+    assertEquals(r.getDocCount(), visitor.getDocCountSeen());
     in.close();
     dir.close();
   }
@@ -101,7 +105,10 @@ public class Test2BBKDPoints extends LuceneTestCase {
     IndexInput in = dir.openInput("2d.bkd", IOContext.DEFAULT);
     in.seek(indexFP);
     BKDReader r = new BKDReader(in);
-    r.verify(numDocs);
+    CheckIndex.VerifyPointsVisitor visitor = new CheckIndex.VerifyPointsVisitor("2d", numDocs, r);
+    r.intersect(visitor);
+    assertEquals(r.size(), visitor.getPointCountSeen());
+    assertEquals(r.getDocCount(), visitor.getDocCountSeen());
     in.close();
     dir.close();
   }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/5e8db2e0/lucene/core/src/test/org/apache/lucene/util/bkd/TestBKD.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/test/org/apache/lucene/util/bkd/TestBKD.java b/lucene/core/src/test/org/apache/lucene/util/bkd/TestBKD.java
index 9eb1fd3..8b9b7a5 100644
--- a/lucene/core/src/test/org/apache/lucene/util/bkd/TestBKD.java
+++ b/lucene/core/src/test/org/apache/lucene/util/bkd/TestBKD.java
@@ -28,6 +28,7 @@ import org.apache.lucene.index.CorruptIndexException;
 import org.apache.lucene.index.MergeState;
 import org.apache.lucene.index.PointValues.IntersectVisitor;
 import org.apache.lucene.index.PointValues.Relation;
+import org.apache.lucene.index.PointValues;
 import org.apache.lucene.store.CorruptingIndexOutput;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.FilterDirectory;
@@ -1010,4 +1011,57 @@ public class TestBKD extends LuceneTestCase {
     }
   }
 
+  // Claims 16 bytes per dim, but only use the bottom N 1-3 bytes; this would happen e.g. if a user indexes what are actually just short
+  // values as a LongPoint:
+  public void testWastedLeadingBytes() throws Exception {
+    int numDims = TestUtil.nextInt(random(), 1, PointValues.MAX_DIMENSIONS);
+    int bytesPerDim = PointValues.MAX_NUM_BYTES;
+    int bytesUsed = TestUtil.nextInt(random(), 1, 3);
+
+    Directory dir = newFSDirectory(createTempDir());
+    int numDocs = 100000;
+    BKDWriter w = new BKDWriter(numDocs+1, dir, "tmp", numDims, bytesPerDim, 32, 1f, numDocs, true);
+    byte[] tmp = new byte[bytesUsed];
+    byte[] buffer = new byte[numDims * bytesPerDim];
+    for(int i=0;i<numDocs;i++) {
+      for(int dim=0;dim<numDims;dim++) {
+        random().nextBytes(tmp);
+        System.arraycopy(tmp, 0, buffer, dim*bytesPerDim+(bytesPerDim-bytesUsed), tmp.length);
+      }
+      w.add(buffer, i);
+    }
+    
+    IndexOutput out = dir.createOutput("bkd", IOContext.DEFAULT);
+    long fp = w.finish(out);
+    out.close();
+
+    IndexInput in = dir.openInput("bkd", IOContext.DEFAULT);
+    in.seek(fp);
+    BKDReader r = new BKDReader(in);
+    int[] count = new int[1];
+    r.intersect(new IntersectVisitor() {
+
+        @Override
+        public void visit(int docID) {
+          count[0]++;
+        }
+
+        @Override
+        public void visit(int docID, byte[] packedValue) {
+          visit(docID);
+        }
+
+        @Override
+        public Relation compare(byte[] minPacked, byte[] maxPacked) {
+          if (random().nextInt(7) == 1) {
+            return Relation.CELL_CROSSES_QUERY;
+          } else {
+            return Relation.CELL_INSIDE_QUERY;
+          }
+        }
+      });
+    assertEquals(numDocs, count[0]);
+    in.close();
+    dir.close();
+  }
 }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/5e8db2e0/lucene/core/src/test/org/apache/lucene/util/fst/TestFSTs.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/test/org/apache/lucene/util/fst/TestFSTs.java b/lucene/core/src/test/org/apache/lucene/util/fst/TestFSTs.java
index 6b218cf..dcce285 100644
--- a/lucene/core/src/test/org/apache/lucene/util/fst/TestFSTs.java
+++ b/lucene/core/src/test/org/apache/lucene/util/fst/TestFSTs.java
@@ -228,7 +228,7 @@ public class TestFSTs extends LuceneTestCase {
         final long value = lastOutput + TestUtil.nextInt(random(), 1, 1000);
         lastOutput = value;
         pairs.add(new FSTTester.InputOutput<>(terms[idx],
-                                                                         outputs.newPair((long) idx, value)));
+                                              outputs.newPair((long) idx, value)));
       }
       new FSTTester<>(random(), dir, inputMode, pairs, outputs, false).doTest(true);
     }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/5e8db2e0/lucene/sandbox/src/java/org/apache/lucene/document/NearestNeighbor.java
----------------------------------------------------------------------
diff --git a/lucene/sandbox/src/java/org/apache/lucene/document/NearestNeighbor.java b/lucene/sandbox/src/java/org/apache/lucene/document/NearestNeighbor.java
index 3b9f302..587c63f 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/document/NearestNeighbor.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/document/NearestNeighbor.java
@@ -26,7 +26,10 @@ import org.apache.lucene.geo.Rectangle;
 import org.apache.lucene.index.PointValues.IntersectVisitor;
 import org.apache.lucene.index.PointValues.Relation;
 import org.apache.lucene.util.Bits;
+import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.SloppyMath;
+import org.apache.lucene.util.bkd.BKDReader.IndexTree;
+import org.apache.lucene.util.bkd.BKDReader.IntersectState;
 import org.apache.lucene.util.bkd.BKDReader;
 
 import static org.apache.lucene.geo.GeoEncodingUtils.decodeLatitude;
@@ -41,16 +44,16 @@ class NearestNeighbor {
 
   static class Cell implements Comparable<Cell> {
     final int readerIndex;
-    final int nodeID;
     final byte[] minPacked;
     final byte[] maxPacked;
+    final IndexTree index;
 
     /** The closest possible distance of all points in this cell */
     final double distanceMeters;
 
-    public Cell(int readerIndex, int nodeID, byte[] minPacked, byte[] maxPacked, double distanceMeters) {
+    public Cell(IndexTree index, int readerIndex, byte[] minPacked, byte[] maxPacked, double distanceMeters) {
+      this.index = index;
       this.readerIndex = readerIndex;
-      this.nodeID = nodeID;
       this.minPacked = minPacked.clone();
       this.maxPacked = maxPacked.clone();
       this.distanceMeters = distanceMeters;
@@ -66,7 +69,7 @@ class NearestNeighbor {
       double minLon = decodeLongitude(minPacked, Integer.BYTES);
       double maxLat = decodeLatitude(maxPacked, 0);
       double maxLon = decodeLongitude(maxPacked, Integer.BYTES);
-      return "Cell(readerIndex=" + readerIndex + " lat=" + minLat + " TO " + maxLat + ", lon=" + minLon + " TO " + maxLon + "; distanceMeters=" + distanceMeters + ")";
+      return "Cell(readerIndex=" + readerIndex + " nodeID=" + index.getNodeID() + " isLeaf=" + index.isLeafNode() + " lat=" + minLat + " TO " + maxLat + ", lon=" + minLon + " TO " + maxLon + "; distanceMeters=" + distanceMeters + ")";
     }
   }
 
@@ -219,13 +222,21 @@ class NearestNeighbor {
     List<BKDReader.IntersectState> states = new ArrayList<>();
 
     // Add root cell for each reader into the queue:
+    int bytesPerDim = -1;
+    
     for(int i=0;i<readers.size();i++) {
       BKDReader reader = readers.get(i);
+      if (bytesPerDim == -1) {
+        bytesPerDim = reader.getBytesPerDimension();
+      } else if (bytesPerDim != reader.getBytesPerDimension()) {
+        throw new IllegalStateException("bytesPerDim changed from " + bytesPerDim + " to " + reader.getBytesPerDimension() + " across readers");
+      }
       byte[] minPackedValue = reader.getMinPackedValue();
       byte[] maxPackedValue = reader.getMaxPackedValue();
-      states.add(reader.getIntersectState(visitor));
+      IntersectState state = reader.getIntersectState(visitor);
+      states.add(state);
 
-      cellQueue.offer(new Cell(i, 1, reader.getMinPackedValue(), reader.getMaxPackedValue(),
+      cellQueue.offer(new Cell(state.index, i, reader.getMinPackedValue(), reader.getMaxPackedValue(),
                                approxBestDistance(minPackedValue, maxPackedValue, pointLat, pointLon)));
     }
 
@@ -236,12 +247,12 @@ class NearestNeighbor {
       // TODO: if we replace approxBestDistance with actualBestDistance, we can put an opto here to break once this "best" cell is fully outside of the hitQueue bottom's radius:
       BKDReader reader = readers.get(cell.readerIndex);
 
-      if (reader.isLeafNode(cell.nodeID)) {
+      if (cell.index.isLeafNode()) {
         //System.out.println("    leaf");
         // Leaf block: visit all points and possibly collect them:
         visitor.curDocBase = docBases.get(cell.readerIndex);
         visitor.curLiveDocs = liveDocs.get(cell.readerIndex);
-        reader.visitLeafBlockValues(cell.nodeID, states.get(cell.readerIndex));
+        reader.visitLeafBlockValues(cell.index, states.get(cell.readerIndex));
         //System.out.println("    now " + hitQueue.size() + " hits");
       } else {
         //System.out.println("    non-leaf");
@@ -257,14 +268,23 @@ class NearestNeighbor {
           continue;
         }
         
+        BytesRef splitValue = BytesRef.deepCopyOf(cell.index.getSplitDimValue());
+        int splitDim = cell.index.getSplitDim();
+        
+        // we must clone the index so that we we can recurse left and right "concurrently":
+        IndexTree newIndex = cell.index.clone();
         byte[] splitPackedValue = cell.maxPacked.clone();
-        reader.copySplitValue(cell.nodeID, splitPackedValue);
-        cellQueue.offer(new Cell(cell.readerIndex, 2*cell.nodeID, cell.minPacked, splitPackedValue,
+        System.arraycopy(splitValue.bytes, splitValue.offset, splitPackedValue, splitDim*bytesPerDim, bytesPerDim);
+
+        cell.index.pushLeft();
+        cellQueue.offer(new Cell(cell.index, cell.readerIndex, cell.minPacked, splitPackedValue,
                                  approxBestDistance(cell.minPacked, splitPackedValue, pointLat, pointLon)));
 
         splitPackedValue = cell.minPacked.clone();
-        reader.copySplitValue(cell.nodeID, splitPackedValue);
-        cellQueue.offer(new Cell(cell.readerIndex, 2*cell.nodeID+1, splitPackedValue, cell.maxPacked,
+        System.arraycopy(splitValue.bytes, splitValue.offset, splitPackedValue, splitDim*bytesPerDim, bytesPerDim);
+
+        newIndex.pushRight();
+        cellQueue.offer(new Cell(newIndex, cell.readerIndex, splitPackedValue, cell.maxPacked,
                                  approxBestDistance(splitPackedValue, cell.maxPacked, pointLat, pointLon)));
       }
     }

[21/50] [abbrv] lucene-solr:jira/solr-8593: LUCENE-7583: buffer small leaf-block writes in BKDWriter

Posted by kr...@apache.org.

LUCENE-7583: buffer small leaf-block writes in BKDWriter


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/b97d9d74
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/b97d9d74
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/b97d9d74

Branch: refs/heads/jira/solr-8593
Commit: b97d9d7478f99660c1cfc91ef4461b7405254dea
Parents: cacabc9
Author: Mike McCandless <mi...@apache.org>
Authored: Wed Dec 7 18:59:23 2016 -0500
Committer: Mike McCandless <mi...@apache.org>
Committed: Wed Dec 7 18:59:23 2016 -0500

----------------------------------------------------------------------
 lucene/CHANGES.txt                              |   4 +
 .../CompressingStoredFieldsWriter.java          |  19 ++--
 .../CompressingTermVectorsWriter.java           |  11 +-
 .../GrowableByteArrayDataOutput.java            |  83 ---------------
 .../util/GrowableByteArrayDataOutput.java       | 103 +++++++++++++++++++
 .../org/apache/lucene/util/bkd/BKDWriter.java   |  85 ++++++++-------
 .../apache/lucene/util/bkd/DocIdsWriter.java    |   4 +-
 .../TestGrowableByteArrayDataOutput.java        |  80 --------------
 .../store/TestGrowableByteArrayDataOutput.java  |  80 ++++++++++++++
 9 files changed, 251 insertions(+), 218 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b97d9d74/lucene/CHANGES.txt
----------------------------------------------------------------------
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index c6c39ac..26a9dec 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -134,6 +134,10 @@ Optimizations
   a compressed format, using substantially less RAM in some cases
   (Adrien Grand, Mike McCandless)
 
+* LUCENE-7583: BKD writing now buffers each leaf block in heap before
+  writing to disk, giving a small speedup in points-heavy use cases.
+  (Mike McCandless)
+
 Other
 
 * LUCENE-7546: Fixed references to benchmark wikipedia data and the Jenkins line-docs file

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b97d9d74/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsWriter.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsWriter.java
index 1956ab7..cda855d 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsWriter.java
@@ -33,6 +33,7 @@ import org.apache.lucene.index.MergeState;
 import org.apache.lucene.index.SegmentInfo;
 import org.apache.lucene.store.DataOutput;
 import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.GrowableByteArrayDataOutput;
 import org.apache.lucene.store.IOContext;
 import org.apache.lucene.store.IndexInput;
 import org.apache.lucene.store.IndexOutput;
@@ -157,7 +158,7 @@ public final class CompressingStoredFieldsWriter extends StoredFieldsWriter {
     }
     this.numStoredFields[numBufferedDocs] = numStoredFieldsInDoc;
     numStoredFieldsInDoc = 0;
-    endOffsets[numBufferedDocs] = bufferedDocs.length;
+    endOffsets[numBufferedDocs] = bufferedDocs.getPosition();
     ++numBufferedDocs;
     if (triggerFlush()) {
       flush();
@@ -210,7 +211,7 @@ public final class CompressingStoredFieldsWriter extends StoredFieldsWriter {
   }
 
   private boolean triggerFlush() {
-    return bufferedDocs.length >= chunkSize || // chunks of at least chunkSize bytes
+    return bufferedDocs.getPosition() >= chunkSize || // chunks of at least chunkSize bytes
         numBufferedDocs >= maxDocsPerChunk;
   }
 
@@ -223,23 +224,23 @@ public final class CompressingStoredFieldsWriter extends StoredFieldsWriter {
       lengths[i] = endOffsets[i] - endOffsets[i - 1];
       assert lengths[i] >= 0;
     }
-    final boolean sliced = bufferedDocs.length >= 2 * chunkSize;
+    final boolean sliced = bufferedDocs.getPosition() >= 2 * chunkSize;
     writeHeader(docBase, numBufferedDocs, numStoredFields, lengths, sliced);
 
     // compress stored fields to fieldsStream
     if (sliced) {
       // big chunk, slice it
-      for (int compressed = 0; compressed < bufferedDocs.length; compressed += chunkSize) {
-        compressor.compress(bufferedDocs.bytes, compressed, Math.min(chunkSize, bufferedDocs.length - compressed), fieldsStream);
+      for (int compressed = 0; compressed < bufferedDocs.getPosition(); compressed += chunkSize) {
+        compressor.compress(bufferedDocs.getBytes(), compressed, Math.min(chunkSize, bufferedDocs.getPosition() - compressed), fieldsStream);
       }
     } else {
-      compressor.compress(bufferedDocs.bytes, 0, bufferedDocs.length, fieldsStream);
+      compressor.compress(bufferedDocs.getBytes(), 0, bufferedDocs.getPosition(), fieldsStream);
     }
 
     // reset
     docBase += numBufferedDocs;
     numBufferedDocs = 0;
-    bufferedDocs.length = 0;
+    bufferedDocs.reset();
     numChunks++;
   }
   
@@ -459,7 +460,7 @@ public final class CompressingStoredFieldsWriter extends StoredFieldsWriter {
       flush();
       numDirtyChunks++; // incomplete: we had to force this flush
     } else {
-      assert bufferedDocs.length == 0;
+      assert bufferedDocs.getPosition() == 0;
     }
     if (docBase != numDocs) {
       throw new RuntimeException("Wrote " + docBase + " docs, finish called with numDocs=" + numDocs);
@@ -468,7 +469,7 @@ public final class CompressingStoredFieldsWriter extends StoredFieldsWriter {
     fieldsStream.writeVLong(numChunks);
     fieldsStream.writeVLong(numDirtyChunks);
     CodecUtil.writeFooter(fieldsStream);
-    assert bufferedDocs.length == 0;
+    assert bufferedDocs.getPosition() == 0;
   }
   
   // bulk merge is scary: its caused corruption bugs in the past.

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b97d9d74/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsWriter.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsWriter.java
index 46a289a..9bd2483 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsWriter.java
@@ -37,6 +37,7 @@ import org.apache.lucene.index.MergeState;
 import org.apache.lucene.index.SegmentInfo;
 import org.apache.lucene.store.DataInput;
 import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.GrowableByteArrayDataOutput;
 import org.apache.lucene.store.IOContext;
 import org.apache.lucene.store.IndexInput;
 import org.apache.lucene.store.IndexOutput;
@@ -269,8 +270,8 @@ public final class CompressingTermVectorsWriter extends TermVectorsWriter {
   @Override
   public void finishDocument() throws IOException {
     // append the payload bytes of the doc after its terms
-    termSuffixes.writeBytes(payloadBytes.bytes, payloadBytes.length);
-    payloadBytes.length = 0;
+    termSuffixes.writeBytes(payloadBytes.getBytes(), payloadBytes.getPosition());
+    payloadBytes.reset();
     ++numDocs;
     if (triggerFlush()) {
       flush();
@@ -316,7 +317,7 @@ public final class CompressingTermVectorsWriter extends TermVectorsWriter {
   }
 
   private boolean triggerFlush() {
-    return termSuffixes.length >= chunkSize
+    return termSuffixes.getPosition() >= chunkSize
         || pendingDocs.size() >= MAX_DOCUMENTS_PER_CHUNK;
   }
 
@@ -355,14 +356,14 @@ public final class CompressingTermVectorsWriter extends TermVectorsWriter {
       flushPayloadLengths();
 
       // compress terms and payloads and write them to the output
-      compressor.compress(termSuffixes.bytes, 0, termSuffixes.length, vectorsStream);
+      compressor.compress(termSuffixes.getBytes(), 0, termSuffixes.getPosition(), vectorsStream);
     }
 
     // reset
     pendingDocs.clear();
     curDoc = null;
     curField = null;
-    termSuffixes.length = 0;
+    termSuffixes.reset();
     numChunks++;
   }
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b97d9d74/lucene/core/src/java/org/apache/lucene/codecs/compressing/GrowableByteArrayDataOutput.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/compressing/GrowableByteArrayDataOutput.java b/lucene/core/src/java/org/apache/lucene/codecs/compressing/GrowableByteArrayDataOutput.java
deleted file mode 100644
index ec551d1..0000000
--- a/lucene/core/src/java/org/apache/lucene/codecs/compressing/GrowableByteArrayDataOutput.java
+++ /dev/null
@@ -1,83 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.lucene.codecs.compressing;
-
-
-import java.io.IOException;
-
-import org.apache.lucene.store.DataOutput;
-import org.apache.lucene.util.ArrayUtil;
-import org.apache.lucene.util.UnicodeUtil;
-
-/**
- * A {@link DataOutput} that can be used to build a byte[].
- * @lucene.internal
- */
-public final class GrowableByteArrayDataOutput extends DataOutput {
-
-  /** Minimum utf8 byte size of a string over which double pass over string is to save memory during encode */
-  static final int MIN_UTF8_SIZE_TO_ENABLE_DOUBLE_PASS_ENCODING = 65536;
-
-  /** The bytes */
-  public byte[] bytes;
-  /** The length */
-  public int length;
-
-  // scratch for utf8 encoding of small strings
-  byte[] scratchBytes = new byte[16];
-
-  /** Create a {@link GrowableByteArrayDataOutput} with the given initial capacity. */
-  public GrowableByteArrayDataOutput(int cp) {
-    this.bytes = new byte[ArrayUtil.oversize(cp, 1)];
-    this.length = 0;
-  }
-
-  @Override
-  public void writeByte(byte b) {
-    if (length >= bytes.length) {
-      bytes = ArrayUtil.grow(bytes);
-    }
-    bytes[length++] = b;
-  }
-
-  @Override
-  public void writeBytes(byte[] b, int off, int len) {
-    final int newLength = length + len;
-    bytes = ArrayUtil.grow(bytes, newLength);
-    System.arraycopy(b, off, bytes, length, len);
-    length = newLength;
-  }
-
-  @Override
-  public void writeString(String string) throws IOException {
-    int maxLen = UnicodeUtil.maxUTF8Length(string.length());
-    if (maxLen <= MIN_UTF8_SIZE_TO_ENABLE_DOUBLE_PASS_ENCODING)  {
-      // string is small enough that we don't need to save memory by falling back to double-pass approach
-      // this is just an optimized writeString() that re-uses scratchBytes.
-      scratchBytes = ArrayUtil.grow(scratchBytes, maxLen);
-      int len = UnicodeUtil.UTF16toUTF8(string, 0, string.length(), scratchBytes);
-      writeVInt(len);
-      writeBytes(scratchBytes, len);
-    } else  {
-      // use a double pass approach to avoid allocating a large intermediate buffer for string encoding
-      int numBytes = UnicodeUtil.calcUTF16toUTF8Length(string, 0, string.length());
-      writeVInt(numBytes);
-      bytes = ArrayUtil.grow(bytes, length + numBytes);
-      length = UnicodeUtil.UTF16toUTF8(string, 0, string.length(), bytes, length);
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b97d9d74/lucene/core/src/java/org/apache/lucene/util/GrowableByteArrayDataOutput.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/util/GrowableByteArrayDataOutput.java b/lucene/core/src/java/org/apache/lucene/util/GrowableByteArrayDataOutput.java
new file mode 100644
index 0000000..5f00d4a
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/util/GrowableByteArrayDataOutput.java
@@ -0,0 +1,103 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.store;
+
+import java.io.IOException;
+
+import org.apache.lucene.store.DataOutput;
+import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.UnicodeUtil;
+
+/**
+ * A {@link DataOutput} that can be used to build a byte[].
+ *
+ * @lucene.internal
+ */
+public final class GrowableByteArrayDataOutput extends DataOutput {
+
+  /** Minimum utf8 byte size of a string over which double pass over string is to save memory during encode */
+  static final int MIN_UTF8_SIZE_TO_ENABLE_DOUBLE_PASS_ENCODING = 65536;
+
+  /** The bytes */
+  private byte[] bytes;
+
+  /** The length */
+  private int length;
+
+  // scratch for utf8 encoding of small strings
+  private byte[] scratchBytes;
+
+  /** Create a {@link GrowableByteArrayDataOutput} with the given initial capacity. */
+  public GrowableByteArrayDataOutput(int cp) {
+    this.bytes = new byte[ArrayUtil.oversize(cp, 1)];
+    this.length = 0;
+  }
+
+  @Override
+  public void writeByte(byte b) {
+    if (length >= bytes.length) {
+      bytes = ArrayUtil.grow(bytes);
+    }
+    bytes[length++] = b;
+  }
+
+  @Override
+  public void writeBytes(byte[] b, int off, int len) {
+    final int newLength = length + len;
+    if (newLength > bytes.length) {
+      bytes = ArrayUtil.grow(bytes, newLength);
+    }
+    System.arraycopy(b, off, bytes, length, len);
+    length = newLength;
+  }
+
+  @Override
+  public void writeString(String string) throws IOException {
+    int maxLen = UnicodeUtil.maxUTF8Length(string.length());
+    if (maxLen <= MIN_UTF8_SIZE_TO_ENABLE_DOUBLE_PASS_ENCODING)  {
+      // string is small enough that we don't need to save memory by falling back to double-pass approach
+      // this is just an optimized writeString() that re-uses scratchBytes.
+      if (scratchBytes == null) {
+        scratchBytes = new byte[ArrayUtil.oversize(maxLen, Character.BYTES)];
+      } else {
+        scratchBytes = ArrayUtil.grow(scratchBytes, maxLen);
+      }
+      int len = UnicodeUtil.UTF16toUTF8(string, 0, string.length(), scratchBytes);
+      writeVInt(len);
+      writeBytes(scratchBytes, len);
+    } else  {
+      // use a double pass approach to avoid allocating a large intermediate buffer for string encoding
+      int numBytes = UnicodeUtil.calcUTF16toUTF8Length(string, 0, string.length());
+      writeVInt(numBytes);
+      bytes = ArrayUtil.grow(bytes, length + numBytes);
+      length = UnicodeUtil.UTF16toUTF8(string, 0, string.length(), bytes, length);
+    }
+  }
+
+  public byte[] getBytes() {
+    return bytes;
+  }
+
+  public int getPosition() {
+    return length;
+  }
+
+  public void reset() {
+    length = 0;
+  }
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b97d9d74/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java
index c82a0c8..9657578 100644
--- a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java
@@ -30,7 +30,9 @@ import org.apache.lucene.index.MergeState;
 import org.apache.lucene.index.PointValues.IntersectVisitor;
 import org.apache.lucene.index.PointValues.Relation;
 import org.apache.lucene.store.ChecksumIndexInput;
+import org.apache.lucene.store.DataOutput;
 import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.GrowableByteArrayDataOutput;
 import org.apache.lucene.store.IOContext;
 import org.apache.lucene.store.IndexOutput;
 import org.apache.lucene.store.RAMOutputStream;
@@ -478,8 +480,8 @@ public class BKDWriter implements Closeable {
     }
 
     build(1, numLeaves, values, 0, Math.toIntExact(pointCount), out,
-        minPackedValue, maxPackedValue, splitPackedValues, leafBlockFPs,
-        new int[maxPointsInLeafNode]);
+          minPackedValue, maxPackedValue, splitPackedValues, leafBlockFPs,
+          new int[maxPointsInLeafNode]);
 
     long indexFP = out.getFilePointer();
     writeIndex(out, leafBlockFPs, splitPackedValues);
@@ -556,6 +558,9 @@ public class BKDWriter implements Closeable {
     return oneDimWriter.finish();
   }
 
+  // reused when writing leaf blocks
+  private final GrowableByteArrayDataOutput scratchOut = new GrowableByteArrayDataOutput(32*1024);
+
   private class OneDimensionBKDWriter {
 
     final IndexOutput out;
@@ -563,8 +568,8 @@ public class BKDWriter implements Closeable {
     final List<byte[]> leafBlockStartValues = new ArrayList<>();
     final byte[] leafValues = new byte[maxPointsInLeafNode * packedBytesLength];
     final int[] leafDocs = new int[maxPointsInLeafNode];
-    long valueCount;
-    int leafCount;
+    private long valueCount;
+    private int leafCount;
 
     OneDimensionBKDWriter(IndexOutput out) {
       if (numDims != 1) {
@@ -589,7 +594,7 @@ public class BKDWriter implements Closeable {
 
     // for asserts
     final byte[] lastPackedValue;
-    int lastDocID;
+    private int lastDocID;
 
     void add(byte[] packedValue, int docID) throws IOException {
       assert valueInOrder(valueCount + leafCount,
@@ -606,8 +611,7 @@ public class BKDWriter implements Closeable {
 
       if (leafCount == maxPointsInLeafNode) {
         // We write a block once we hit exactly the max count ... this is different from
-        // when we flush a new segment, where we write between max/2 and max per leaf block,
-        // so merged segments will behave differently from newly flushed segments:
+        // when we write N > 1 dimensional points where we write between max/2 and max per leaf block
         writeLeafBlock();
         leafCount = 0;
       }
@@ -644,7 +648,6 @@ public class BKDWriter implements Closeable {
     }
 
     private void writeLeafBlock() throws IOException {
-      //System.out.println("writeLeafBlock pos=" + out.getFilePointer());
       assert leafCount != 0;
       if (valueCount == 0) {
         System.arraycopy(leafValues, 0, minPackedValue, 0, packedBytesLength);
@@ -660,42 +663,39 @@ public class BKDWriter implements Closeable {
       leafBlockFPs.add(out.getFilePointer());
       checkMaxLeafNodeCount(leafBlockFPs.size());
 
-      Arrays.fill(commonPrefixLengths, bytesPerDim);
       // Find per-dim common prefix:
-      for(int dim=0;dim<numDims;dim++) {
-        int offset1 = dim * bytesPerDim;
-        int offset2 = (leafCount - 1) * packedBytesLength + offset1;
-        for(int j=0;j<commonPrefixLengths[dim];j++) {
-          if (leafValues[offset1+j] != leafValues[offset2+j]) {
-            commonPrefixLengths[dim] = j;
-            break;
-          }
+      int prefix = bytesPerDim;
+      int offset = (leafCount - 1) * packedBytesLength;
+      for(int j=0;j<bytesPerDim;j++) {
+        if (leafValues[j] != leafValues[offset+j]) {
+          prefix = j;
+          break;
         }
       }
 
-      writeLeafBlockDocs(out, leafDocs, 0, leafCount);
-      writeCommonPrefixes(out, commonPrefixLengths, leafValues);
+      commonPrefixLengths[0] = prefix;
 
-      final IntFunction<BytesRef> packedValues = new IntFunction<BytesRef>() {
-        final BytesRef scratch = new BytesRef();
-
-        {
-          scratch.length = packedBytesLength;
-          scratch.bytes = leafValues;
-        }
+      assert scratchOut.getPosition() == 0;
+      writeLeafBlockDocs(scratchOut, leafDocs, 0, leafCount);
+      writeCommonPrefixes(scratchOut, commonPrefixLengths, leafValues);
 
+      scratchBytesRef1.length = packedBytesLength;
+      scratchBytesRef1.bytes = leafValues;
+      
+      final IntFunction<BytesRef> packedValues = new IntFunction<BytesRef>() {
         @Override
         public BytesRef apply(int i) {
-          scratch.offset = packedBytesLength * i;
-          return scratch;
+          scratchBytesRef1.offset = packedBytesLength * i;
+          return scratchBytesRef1;
         }
       };
       assert valuesInOrderAndBounds(leafCount, 0, Arrays.copyOf(leafValues, packedBytesLength),
           Arrays.copyOfRange(leafValues, (leafCount - 1) * packedBytesLength, leafCount * packedBytesLength),
           packedValues, leafDocs, 0);
-      writeLeafBlockPackedValues(out, commonPrefixLengths, leafCount, 0, packedValues);
+      writeLeafBlockPackedValues(scratchOut, commonPrefixLengths, leafCount, 0, packedValues);
+      out.writeBytes(scratchOut.getBytes(), 0, scratchOut.getPosition());
+      scratchOut.reset();
     }
-
   }
 
   // TODO: there must be a simpler way?
@@ -1259,13 +1259,13 @@ public class BKDWriter implements Closeable {
     out.writeBytes(packedIndex, 0, packedIndex.length);
   }
 
-  private void writeLeafBlockDocs(IndexOutput out, int[] docIDs, int start, int count) throws IOException {
+  private void writeLeafBlockDocs(DataOutput out, int[] docIDs, int start, int count) throws IOException {
     assert count > 0: "maxPointsInLeafNode=" + maxPointsInLeafNode;
     out.writeVInt(count);
     DocIdsWriter.writeDocIds(docIDs, start, count, out);
   }
 
-  private void writeLeafBlockPackedValues(IndexOutput out, int[] commonPrefixLengths, int count, int sortedDim, IntFunction<BytesRef> packedValues) throws IOException {
+  private void writeLeafBlockPackedValues(DataOutput out, int[] commonPrefixLengths, int count, int sortedDim, IntFunction<BytesRef> packedValues) throws IOException {
     int prefixLenSum = Arrays.stream(commonPrefixLengths).sum();
     if (prefixLenSum == packedBytesLength) {
       // all values in this block are equal
@@ -1290,7 +1290,7 @@ public class BKDWriter implements Closeable {
     }
   }
 
-  private void writeLeafBlockPackedValuesRange(IndexOutput out, int[] commonPrefixLengths, int start, int end, IntFunction<BytesRef> packedValues) throws IOException {
+  private void writeLeafBlockPackedValuesRange(DataOutput out, int[] commonPrefixLengths, int start, int end, IntFunction<BytesRef> packedValues) throws IOException {
     for (int i = start; i < end; ++i) {
       BytesRef ref = packedValues.apply(i);
       assert ref.length == packedBytesLength;
@@ -1316,7 +1316,7 @@ public class BKDWriter implements Closeable {
     return end - start;
   }
 
-  private void writeCommonPrefixes(IndexOutput out, int[] commonPrefixes, byte[] packedValue) throws IOException {
+  private void writeCommonPrefixes(DataOutput out, int[] commonPrefixes, byte[] packedValue) throws IOException {
     for(int dim=0;dim<numDims;dim++) {
       out.writeVInt(commonPrefixes[dim]);
       //System.out.println(commonPrefixes[dim] + " of " + bytesPerDim);
@@ -1449,7 +1449,8 @@ public class BKDWriter implements Closeable {
     }
   }
 
-  /* Recursively reorders the provided reader and writes the bkd-tree on the fly. */
+  /* Recursively reorders the provided reader and writes the bkd-tree on the fly; this method is used
+   * when we are writing a new segment directly from IndexWriter's indexing buffer (MutablePointsReader). */
   private void build(int nodeID, int leafNodeOffset,
                      MutablePointValues reader, int from, int to,
                      IndexOutput out,
@@ -1513,18 +1514,20 @@ public class BKDWriter implements Closeable {
       // Save the block file pointer:
       leafBlockFPs[nodeID - leafNodeOffset] = out.getFilePointer();
 
+      assert scratchOut.getPosition() == 0;
+
       // Write doc IDs
       int[] docIDs = spareDocIds;
       for (int i = from; i < to; ++i) {
         docIDs[i - from] = reader.getDocID(i);
       }
       //System.out.println("writeLeafBlock pos=" + out.getFilePointer());
-      writeLeafBlockDocs(out, docIDs, 0, count);
+      writeLeafBlockDocs(scratchOut, docIDs, 0, count);
 
       // Write the common prefixes:
       reader.getValue(from, scratchBytesRef1);
       System.arraycopy(scratchBytesRef1.bytes, scratchBytesRef1.offset, scratch1, 0, packedBytesLength);
-      writeCommonPrefixes(out, commonPrefixLengths, scratch1);
+      writeCommonPrefixes(scratchOut, commonPrefixLengths, scratch1);
 
       // Write the full values:
       IntFunction<BytesRef> packedValues = new IntFunction<BytesRef>() {
@@ -1536,7 +1539,10 @@ public class BKDWriter implements Closeable {
       };
       assert valuesInOrderAndBounds(count, sortedDim, minPackedValue, maxPackedValue, packedValues,
           docIDs, 0);
-      writeLeafBlockPackedValues(out, commonPrefixLengths, count, sortedDim, packedValues);
+      writeLeafBlockPackedValues(scratchOut, commonPrefixLengths, count, sortedDim, packedValues);
+      
+      out.writeBytes(scratchOut.getBytes(), 0, scratchOut.getPosition());
+      scratchOut.reset();
 
     } else {
       // inner node
@@ -1577,7 +1583,8 @@ public class BKDWriter implements Closeable {
     }
   }
 
-  /** The array (sized numDims) of PathSlice describe the cell we have currently recursed to. */
+  /** The array (sized numDims) of PathSlice describe the cell we have currently recursed to.
+  /*  This method is used when we are merging previously written segments, in the numDims > 1 case. */
   private void build(int nodeID, int leafNodeOffset,
                      PathSlice[] slices,
                      LongBitSet ordBitSet,

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b97d9d74/lucene/core/src/java/org/apache/lucene/util/bkd/DocIdsWriter.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/DocIdsWriter.java b/lucene/core/src/java/org/apache/lucene/util/bkd/DocIdsWriter.java
index 9dce5a8..d76c6c7 100644
--- a/lucene/core/src/java/org/apache/lucene/util/bkd/DocIdsWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/util/bkd/DocIdsWriter.java
@@ -19,14 +19,14 @@ package org.apache.lucene.util.bkd;
 import java.io.IOException;
 
 import org.apache.lucene.index.PointValues.IntersectVisitor;
+import org.apache.lucene.store.DataOutput;
 import org.apache.lucene.store.IndexInput;
-import org.apache.lucene.store.IndexOutput;
 
 class DocIdsWriter {
 
   private DocIdsWriter() {}
 
-  static void writeDocIds(int[] docIds, int start, int count, IndexOutput out) throws IOException {
+  static void writeDocIds(int[] docIds, int start, int count, DataOutput out) throws IOException {
     // docs can be sorted either when all docs in a block have the same value
     // or when a segment is sorted
     boolean sorted = true;

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b97d9d74/lucene/core/src/test/org/apache/lucene/codecs/compressing/TestGrowableByteArrayDataOutput.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/test/org/apache/lucene/codecs/compressing/TestGrowableByteArrayDataOutput.java b/lucene/core/src/test/org/apache/lucene/codecs/compressing/TestGrowableByteArrayDataOutput.java
deleted file mode 100644
index 37a7e4c..0000000
--- a/lucene/core/src/test/org/apache/lucene/codecs/compressing/TestGrowableByteArrayDataOutput.java
+++ /dev/null
@@ -1,80 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.lucene.codecs.compressing;
-
-
-import org.apache.lucene.util.LuceneTestCase;
-import org.apache.lucene.util.TestUtil;
-import org.apache.lucene.util.UnicodeUtil;
-import org.junit.Test;
-
-/**
- * Test for {@link GrowableByteArrayDataOutput}
- */
-public class TestGrowableByteArrayDataOutput extends LuceneTestCase {
-
-  @Test
-  public void testWriteSmallStrings() throws Exception {
-    int minSizeForDoublePass = GrowableByteArrayDataOutput.MIN_UTF8_SIZE_TO_ENABLE_DOUBLE_PASS_ENCODING;
-
-    // a simple string encoding test
-    int num = atLeast(1000);
-    for (int i = 0; i < num; i++) {
-      // create a small string such that the single pass approach is used
-      int length = TestUtil.nextInt(random(), 1, minSizeForDoublePass - 1);
-      String unicode = TestUtil.randomFixedByteLengthUnicodeString(random(), length);
-      byte[] utf8 = new byte[UnicodeUtil.maxUTF8Length(unicode.length())];
-      int len = UnicodeUtil.UTF16toUTF8(unicode, 0, unicode.length(), utf8);
-
-      GrowableByteArrayDataOutput dataOutput = new GrowableByteArrayDataOutput(1 << 8);
-      //explicitly write utf8 len so that we know how many bytes it occupies
-      dataOutput.writeVInt(len);
-      int vintLen = dataOutput.length;
-      // now write the string which will internally write number of bytes as a vint and then utf8 bytes
-      dataOutput.writeString(unicode);
-
-      assertEquals("GrowableByteArrayDataOutput wrote the wrong length after encode", len + vintLen * 2, dataOutput.length);
-      for (int j = 0, k = vintLen * 2; j < len; j++, k++) {
-        assertEquals(utf8[j], dataOutput.bytes[k]);
-      }
-    }
-  }
-
-  @Test
-  public void testWriteLargeStrings() throws Exception {
-    int minSizeForDoublePass = GrowableByteArrayDataOutput.MIN_UTF8_SIZE_TO_ENABLE_DOUBLE_PASS_ENCODING;
-
-    int num = atLeast(100);
-    for (int i = 0; i < num; i++) {
-      String unicode = TestUtil.randomRealisticUnicodeString(random(), minSizeForDoublePass, 10 * minSizeForDoublePass);
-      byte[] utf8 = new byte[UnicodeUtil.maxUTF8Length(unicode.length())];
-      int len = UnicodeUtil.UTF16toUTF8(unicode, 0, unicode.length(), utf8);
-
-      GrowableByteArrayDataOutput dataOutput = new GrowableByteArrayDataOutput(1 << 8);
-      //explicitly write utf8 len so that we know how many bytes it occupies
-      dataOutput.writeVInt(len);
-      int vintLen = dataOutput.length;
-      // now write the string which will internally write number of bytes as a vint and then utf8 bytes
-      dataOutput.writeString(unicode);
-
-      assertEquals("GrowableByteArrayDataOutput wrote the wrong length after encode", len + vintLen * 2, dataOutput.length);
-      for (int j = 0, k = vintLen * 2; j < len; j++, k++) {
-        assertEquals(utf8[j], dataOutput.bytes[k]);
-      }
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b97d9d74/lucene/core/src/test/org/apache/lucene/store/TestGrowableByteArrayDataOutput.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/test/org/apache/lucene/store/TestGrowableByteArrayDataOutput.java b/lucene/core/src/test/org/apache/lucene/store/TestGrowableByteArrayDataOutput.java
new file mode 100644
index 0000000..10992b7
--- /dev/null
+++ b/lucene/core/src/test/org/apache/lucene/store/TestGrowableByteArrayDataOutput.java
@@ -0,0 +1,80 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.store;
+
+import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util.TestUtil;
+import org.apache.lucene.util.UnicodeUtil;
+import org.junit.Test;
+
+/**
+ * Test for {@link GrowableByteArrayDataOutput}
+ */
+public class TestGrowableByteArrayDataOutput extends LuceneTestCase {
+
+  @Test
+  public void testWriteSmallStrings() throws Exception {
+    int minSizeForDoublePass = GrowableByteArrayDataOutput.MIN_UTF8_SIZE_TO_ENABLE_DOUBLE_PASS_ENCODING;
+
+    // a simple string encoding test
+    int num = atLeast(1000);
+    for (int i = 0; i < num; i++) {
+      // create a small string such that the single pass approach is used
+      int length = TestUtil.nextInt(random(), 1, minSizeForDoublePass - 1);
+      String unicode = TestUtil.randomFixedByteLengthUnicodeString(random(), length);
+      byte[] utf8 = new byte[UnicodeUtil.maxUTF8Length(unicode.length())];
+      int len = UnicodeUtil.UTF16toUTF8(unicode, 0, unicode.length(), utf8);
+
+      GrowableByteArrayDataOutput dataOutput = new GrowableByteArrayDataOutput(1 << 8);
+      //explicitly write utf8 len so that we know how many bytes it occupies
+      dataOutput.writeVInt(len);
+      int vintLen = dataOutput.getPosition();
+      // now write the string which will internally write number of bytes as a vint and then utf8 bytes
+      dataOutput.writeString(unicode);
+
+      assertEquals("GrowableByteArrayDataOutput wrote the wrong length after encode", len + vintLen * 2, dataOutput.getPosition());
+      for (int j = 0, k = vintLen * 2; j < len; j++, k++) {
+        assertEquals(utf8[j], dataOutput.getBytes()[k]);
+      }
+    }
+  }
+
+  @Test
+  public void testWriteLargeStrings() throws Exception {
+    int minSizeForDoublePass = GrowableByteArrayDataOutput.MIN_UTF8_SIZE_TO_ENABLE_DOUBLE_PASS_ENCODING;
+
+    int num = atLeast(100);
+    for (int i = 0; i < num; i++) {
+      String unicode = TestUtil.randomRealisticUnicodeString(random(), minSizeForDoublePass, 10 * minSizeForDoublePass);
+      byte[] utf8 = new byte[UnicodeUtil.maxUTF8Length(unicode.length())];
+      int len = UnicodeUtil.UTF16toUTF8(unicode, 0, unicode.length(), utf8);
+
+      GrowableByteArrayDataOutput dataOutput = new GrowableByteArrayDataOutput(1 << 8);
+      //explicitly write utf8 len so that we know how many bytes it occupies
+      dataOutput.writeVInt(len);
+      int vintLen = dataOutput.getPosition();
+      // now write the string which will internally write number of bytes as a vint and then utf8 bytes
+      dataOutput.writeString(unicode);
+
+      assertEquals("GrowableByteArrayDataOutput wrote the wrong length after encode", len + vintLen * 2, dataOutput.getPosition());
+      for (int j = 0, k = vintLen * 2; j < len; j++, k++) {
+        assertEquals(utf8[j], dataOutput.getBytes()[k]);
+      }
+    }
+  }
+}

[27/50] [abbrv] lucene-solr:jira/solr-8593: LUCENE-7581: don't allow updating a doc values field if it's used in the index sort

Posted by kr...@apache.org.

LUCENE-7581: don't allow updating a doc values field if it's used in the index sort


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/4efbde4e
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/4efbde4e
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/4efbde4e

Branch: refs/heads/jira/solr-8593
Commit: 4efbde4e76277f364952866c071bb953ca2be070
Parents: 22d04a7
Author: Mike McCandless <mi...@apache.org>
Authored: Fri Dec 9 18:05:13 2016 -0500
Committer: Mike McCandless <mi...@apache.org>
Committed: Fri Dec 9 18:05:13 2016 -0500

----------------------------------------------------------------------
 lucene/CHANGES.txt                              |  4 +++
 .../org/apache/lucene/index/IndexWriter.java    |  6 ++++
 .../apache/lucene/index/IndexWriterConfig.java  |  3 ++
 .../lucene/index/LiveIndexWriterConfig.java     | 13 +++++++++
 .../apache/lucene/index/TestIndexSorting.java   | 30 ++++++++++++++++++--
 5 files changed, 53 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/4efbde4e/lucene/CHANGES.txt
----------------------------------------------------------------------
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index b9deb7e..da6e3d2 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -90,6 +90,10 @@ Bug Fixes
   that does not extend ReflectiveAccessException in Java 9.
   (Uwe Schindler)
 
+* LUCENE-7581: Lucene now prevents updating a doc values field that is used
+  in the index sort, since this would lead to corruption.  (Jim
+  Ferenczi via Mike McCandless)
+
 Improvements
 
 * LUCENE-6824: TermAutomatonQuery now rewrites to TermQuery,

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/4efbde4e/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
index 9868785..3ee87b1 100644
--- a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
@@ -1619,6 +1619,9 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable {
     if (!globalFieldNumberMap.contains(field, DocValuesType.NUMERIC)) {
       throw new IllegalArgumentException("can only update existing numeric-docvalues fields!");
     }
+    if (config.getIndexSortFields().contains(field)) {
+      throw new IllegalArgumentException("cannot update docvalues field involved in the index sort, field=" + field + ", sort=" + config.getIndexSort());
+    }
     try {
       long seqNo = docWriter.updateDocValues(new NumericDocValuesUpdate(term, field, value));
       if (seqNo < 0) {
@@ -1713,6 +1716,9 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable {
       if (!globalFieldNumberMap.contains(f.name(), dvType)) {
         throw new IllegalArgumentException("can only update existing docvalues fields! field=" + f.name() + ", type=" + dvType);
       }
+      if (config.getIndexSortFields().contains(f.name())) {
+        throw new IllegalArgumentException("cannot update docvalues field involved in the index sort, field=" + f.name() + ", sort=" + config.getIndexSort());
+      }
       switch (dvType) {
         case NUMERIC:
           dvUpdates[i] = new NumericDocValuesUpdate(term, f.name(), (Long) f.numericValue());

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/4efbde4e/lucene/core/src/java/org/apache/lucene/index/IndexWriterConfig.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/IndexWriterConfig.java b/lucene/core/src/java/org/apache/lucene/index/IndexWriterConfig.java
index 4f642ee..ce4f0a8 100644
--- a/lucene/core/src/java/org/apache/lucene/index/IndexWriterConfig.java
+++ b/lucene/core/src/java/org/apache/lucene/index/IndexWriterConfig.java
@@ -18,7 +18,9 @@ package org.apache.lucene.index;
 
 
 import java.io.PrintStream;
+import java.util.Arrays;
 import java.util.EnumSet;
+import java.util.stream.Collectors;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.standard.StandardAnalyzer;
@@ -474,6 +476,7 @@ public final class IndexWriterConfig extends LiveIndexWriterConfig {
       }
     }
     this.indexSort = sort;
+    this.indexSortFields = Arrays.stream(sort.getSort()).map((s) -> s.getField()).collect(Collectors.toSet());
     return this;
   }
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/4efbde4e/lucene/core/src/java/org/apache/lucene/index/LiveIndexWriterConfig.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/LiveIndexWriterConfig.java b/lucene/core/src/java/org/apache/lucene/index/LiveIndexWriterConfig.java
index cec70c0..d9e1bc7 100644
--- a/lucene/core/src/java/org/apache/lucene/index/LiveIndexWriterConfig.java
+++ b/lucene/core/src/java/org/apache/lucene/index/LiveIndexWriterConfig.java
@@ -17,6 +17,9 @@
 package org.apache.lucene.index;
 
 
+import java.util.Collections;
+import java.util.Set;
+
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.codecs.Codec;
 import org.apache.lucene.index.DocumentsWriterPerThread.IndexingChain;
@@ -98,6 +101,9 @@ public class LiveIndexWriterConfig {
   /** The sort order to use to write merged segments. */
   protected Sort indexSort = null;
 
+  /** The field names involved in the index sort */
+  protected Set<String> indexSortFields = Collections.emptySet();
+
   // used by IndexWriterConfig
   LiveIndexWriterConfig(Analyzer analyzer) {
     this.analyzer = analyzer;
@@ -457,6 +463,13 @@ public class LiveIndexWriterConfig {
     return indexSort;
   }
 
+  /**
+   * Returns the field names involved in the index sort
+   */
+  public Set<String> getIndexSortFields() {
+    return indexSortFields;
+  }
+
   @Override
   public String toString() {
     StringBuilder sb = new StringBuilder();

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/4efbde4e/lucene/core/src/test/org/apache/lucene/index/TestIndexSorting.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/test/org/apache/lucene/index/TestIndexSorting.java b/lucene/core/src/test/org/apache/lucene/index/TestIndexSorting.java
index 5ebf8f4..08a85ef 100644
--- a/lucene/core/src/test/org/apache/lucene/index/TestIndexSorting.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestIndexSorting.java
@@ -1700,6 +1700,29 @@ public class TestIndexSorting extends LuceneTestCase {
     dir.close();
   }
 
+
+  // docvalues fields involved in the index sort cannot be updated
+  public void testBadDVUpdate() throws Exception {
+    Directory dir = newDirectory();
+    IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
+    Sort indexSort = new Sort(new SortField("foo", SortField.Type.LONG));
+    iwc.setIndexSort(indexSort);
+    IndexWriter w = new IndexWriter(dir, iwc);
+    Document doc = new Document();
+    doc.add(new StringField("id", new BytesRef("0"), Store.NO));
+    doc.add(new NumericDocValuesField("foo", random().nextInt()));
+    w.addDocument(doc);
+    w.commit();
+    IllegalArgumentException exc = expectThrows(IllegalArgumentException.class,
+        () -> w.updateDocValues(new Term("id", "0"), new NumericDocValuesField("foo", -1)));
+    assertEquals(exc.getMessage(), "cannot update docvalues field involved in the index sort, field=foo, sort=<long: \"foo\">");
+    exc = expectThrows(IllegalArgumentException.class,
+        () -> w.updateNumericDocValue(new Term("id", "0"), "foo", -1));
+    assertEquals(exc.getMessage(), "cannot update docvalues field involved in the index sort, field=foo, sort=<long: \"foo\">");
+    w.close();
+    dir.close();
+  }
+
   static class DVUpdateRunnable implements Runnable {
 
     private final int numDocs;
@@ -1727,7 +1750,7 @@ public class TestIndexSorting extends LuceneTestCase {
           final long value = random.nextInt(20);
 
           synchronized (values) {
-            w.updateDocValues(new Term("id", Integer.toString(id)), new NumericDocValuesField("foo", value));
+            w.updateDocValues(new Term("id", Integer.toString(id)), new NumericDocValuesField("bar", value));
             values.put(id, value);
           }
 
@@ -1762,7 +1785,8 @@ public class TestIndexSorting extends LuceneTestCase {
     for (int i = 0; i < numDocs; ++i) {
       Document doc = new Document();
       doc.add(new StringField("id", Integer.toString(i), Store.NO));
-      doc.add(new NumericDocValuesField("foo", -1));
+      doc.add(new NumericDocValuesField("foo", random().nextInt()));
+      doc.add(new NumericDocValuesField("bar", -1));
       w.addDocument(doc);
       values.put(i, -1L);
     }
@@ -1786,7 +1810,7 @@ public class TestIndexSorting extends LuceneTestCase {
     for (int i = 0; i < numDocs; ++i) {
       final TopDocs topDocs = searcher.search(new TermQuery(new Term("id", Integer.toString(i))), 1);
       assertEquals(1, topDocs.totalHits);
-      NumericDocValues dvs = MultiDocValues.getNumericValues(reader, "foo");
+      NumericDocValues dvs = MultiDocValues.getNumericValues(reader, "bar");
       int hitDoc = topDocs.scoreDocs[0].doc;
       assertEquals(hitDoc, dvs.advance(hitDoc));
       assertEquals(values.get(i).longValue(), dvs.longValue());

[29/50] [abbrv] lucene-solr:jira/solr-8593: LUCENE-7570: don't run merges while holding the commitLock to prevent deadlock when merges are stalled and a tragic merge exception strikes

Posted by kr...@apache.org.

LUCENE-7570: don't run merges while holding the commitLock to prevent deadlock when merges are stalled and a tragic merge exception strikes


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/2b073a2f
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/2b073a2f
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/2b073a2f

Branch: refs/heads/jira/solr-8593
Commit: 2b073a2f296289617bea8256d7efec06049df739
Parents: 7cffae3
Author: Mike McCandless <mi...@apache.org>
Authored: Fri Dec 9 18:41:30 2016 -0500
Committer: Mike McCandless <mi...@apache.org>
Committed: Fri Dec 9 18:41:30 2016 -0500

----------------------------------------------------------------------
 lucene/CHANGES.txt                              |  4 ++
 .../org/apache/lucene/index/IndexWriter.java    | 26 ++++++--
 .../index/TestTragicIndexWriterDeadlock.java    | 69 +++++++++++++++++++-
 3 files changed, 91 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/2b073a2f/lucene/CHANGES.txt
----------------------------------------------------------------------
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index da6e3d2..15b89f0 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -94,6 +94,10 @@ Bug Fixes
   in the index sort, since this would lead to corruption.  (Jim
   Ferenczi via Mike McCandless)
 
+* LUCENE-7570: IndexWriter may deadlock if a commit is running while
+  there are too many merges running and one of the merges hits a
+  tragic exception (Joey Echeverria via Mike McCandless)
+
 Improvements
 
 * LUCENE-6824: TermAutomatonQuery now rewrites to TermQuery,

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/2b073a2f/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
index 3ee87b1..4789505 100644
--- a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
@@ -2952,11 +2952,16 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable {
   @Override
   public final long prepareCommit() throws IOException {
     ensureOpen();
-    pendingSeqNo = prepareCommitInternal(config.getMergePolicy());
+    boolean[] doMaybeMerge = new boolean[1];
+    pendingSeqNo = prepareCommitInternal(doMaybeMerge);
+    // we must do this outside of the commitLock else we can deadlock:
+    if (doMaybeMerge[0]) {
+      maybeMerge(config.getMergePolicy(), MergeTrigger.FULL_FLUSH, UNBOUNDED_MAX_MERGE_SEGMENTS);      
+    }
     return pendingSeqNo;
   }
 
-  private long prepareCommitInternal(MergePolicy mergePolicy) throws IOException {
+  private long prepareCommitInternal(boolean[] doMaybeMerge) throws IOException {
     startCommitTime = System.nanoTime();
     synchronized(commitLock) {
       ensureOpen(false);
@@ -3063,7 +3068,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable {
       boolean success = false;
       try {
         if (anySegmentsFlushed) {
-          maybeMerge(mergePolicy, MergeTrigger.FULL_FLUSH, UNBOUNDED_MAX_MERGE_SEGMENTS);
+          doMaybeMerge[0] = true;
         }
         startCommit(toCommit);
         success = true;
@@ -3184,6 +3189,10 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable {
       infoStream.message("IW", "commit: start");
     }
 
+    boolean[] doMaybeMerge = new boolean[1];
+
+    long seqNo;
+
     synchronized(commitLock) {
       ensureOpen(false);
 
@@ -3191,13 +3200,11 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable {
         infoStream.message("IW", "commit: enter lock");
       }
 
-      long seqNo;
-
       if (pendingCommit == null) {
         if (infoStream.isEnabled("IW")) {
           infoStream.message("IW", "commit: now prepare");
         }
-        seqNo = prepareCommitInternal(mergePolicy);
+        seqNo = prepareCommitInternal(doMaybeMerge);
       } else {
         if (infoStream.isEnabled("IW")) {
           infoStream.message("IW", "commit: already prepared");
@@ -3206,9 +3213,14 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable {
       }
 
       finishCommit();
+    }
 
-      return seqNo;
+    // we must do this outside of the commitLock else we can deadlock:
+    if (doMaybeMerge[0]) {
+      maybeMerge(mergePolicy, MergeTrigger.FULL_FLUSH, UNBOUNDED_MAX_MERGE_SEGMENTS);      
     }
+    
+    return seqNo;
   }
 
   private final void finishCommit() throws IOException {

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/2b073a2f/lucene/core/src/test/org/apache/lucene/index/TestTragicIndexWriterDeadlock.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/test/org/apache/lucene/index/TestTragicIndexWriterDeadlock.java b/lucene/core/src/test/org/apache/lucene/index/TestTragicIndexWriterDeadlock.java
index 3cce698..80f9392 100644
--- a/lucene/core/src/test/org/apache/lucene/index/TestTragicIndexWriterDeadlock.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestTragicIndexWriterDeadlock.java
@@ -14,13 +14,15 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package org.apache.lucene.index;
 
+package org.apache.lucene.index;
 
+import java.io.IOException;
 import java.util.concurrent.CountDownLatch;
 import java.util.concurrent.atomic.AtomicBoolean;
 
 import org.apache.lucene.document.Document;
+import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.MockDirectoryWrapper;
 import org.apache.lucene.util.LuceneTestCase;
 
@@ -92,4 +94,69 @@ public class TestTragicIndexWriterDeadlock extends LuceneTestCase {
     w.close();
     dir.close();
   }
+
+  // LUCENE-7570
+  public void testDeadlockStalledMerges() throws Exception {
+    Directory dir = newDirectory();
+    IndexWriterConfig iwc = new IndexWriterConfig();
+
+    // so we merge every 2 segments:
+    LogMergePolicy mp = new LogDocMergePolicy();
+    mp.setMergeFactor(2);
+    iwc.setMergePolicy(mp);
+    CountDownLatch done = new CountDownLatch(1);
+    ConcurrentMergeScheduler cms = new ConcurrentMergeScheduler() {
+        @Override
+        protected void doMerge(IndexWriter writer, MergePolicy.OneMerge merge) throws IOException {
+          // let merge takes forever, until commit thread is stalled
+          try {
+            done.await();
+          } catch (InterruptedException ie) {
+            Thread.currentThread().interrupt();
+            throw new RuntimeException(ie);
+          }
+          super.doMerge(writer, merge);
+        }
+
+        @Override
+        protected synchronized void doStall() {
+          done.countDown();
+          super.doStall();
+        }
+
+        @Override
+        protected void handleMergeException(Directory dir, Throwable exc) {
+        }
+      };
+
+    // so we stall once the 2nd merge wants to run:
+    cms.setMaxMergesAndThreads(1, 1);
+    iwc.setMergeScheduler(cms);
+
+    // so we write a segment every 2 indexed docs:
+    iwc.setMaxBufferedDocs(2);
+
+    final IndexWriter w = new IndexWriter(dir, iwc) {
+      @Override
+      void mergeSuccess(MergePolicy.OneMerge merge) {
+        // tragedy strikes!
+        throw new OutOfMemoryError();
+      }
+      };
+
+    w.addDocument(new Document());
+    w.addDocument(new Document());
+    // w writes first segment
+    w.addDocument(new Document());
+    w.addDocument(new Document());
+    // w writes second segment, and kicks off merge, that takes forever (done.await)
+    w.addDocument(new Document());
+    w.addDocument(new Document());
+    // w writes third segment
+    w.addDocument(new Document());
+    w.commit();
+    // w writes fourth segment, and commit flushes and kicks off merge that stalls
+    w.close();
+    dir.close();
+  }
 }

[05/50] [abbrv] lucene-solr:jira/solr-8593: LUCENE-7576: fix other codecs to detect when special case automaton is passed to Terms.intersect

Posted by kr...@apache.org.

LUCENE-7576: fix other codecs to detect when special case automaton is passed to Terms.intersect


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/8cbcbc9d
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/8cbcbc9d
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/8cbcbc9d

Branch: refs/heads/jira/solr-8593
Commit: 8cbcbc9d956754de1fab2c626705aa6d6ab9f910
Parents: 58476b1
Author: Mike McCandless <mi...@apache.org>
Authored: Fri Dec 2 17:42:27 2016 -0500
Committer: Mike McCandless <mi...@apache.org>
Committed: Fri Dec 2 17:42:50 2016 -0500

----------------------------------------------------------------------
 .../org/apache/lucene/codecs/memory/DirectPostingsFormat.java     | 3 +++
 .../java/org/apache/lucene/codecs/memory/FSTOrdTermsReader.java   | 3 +++
 .../src/java/org/apache/lucene/codecs/memory/FSTTermsReader.java  | 3 +++
 3 files changed, 9 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/8cbcbc9d/lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectPostingsFormat.java
----------------------------------------------------------------------
diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectPostingsFormat.java b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectPostingsFormat.java
index 3ce2abe..00f25cf 100644
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectPostingsFormat.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/DirectPostingsFormat.java
@@ -659,6 +659,9 @@ public final class DirectPostingsFormat extends PostingsFormat {
 
     @Override
     public TermsEnum intersect(CompiledAutomaton compiled, final BytesRef startTerm) {
+      if (compiled.type != CompiledAutomaton.AUTOMATON_TYPE.NORMAL) {
+        throw new IllegalArgumentException("please use CompiledAutomaton.getTermsEnum instead");
+      }
       return new DirectIntersectTermsEnum(compiled, startTerm);
     }
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/8cbcbc9d/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTOrdTermsReader.java
----------------------------------------------------------------------
diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTOrdTermsReader.java b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTOrdTermsReader.java
index 305c419..97bbea3 100644
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTOrdTermsReader.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTOrdTermsReader.java
@@ -270,6 +270,9 @@ public class FSTOrdTermsReader extends FieldsProducer {
 
     @Override
     public TermsEnum intersect(CompiledAutomaton compiled, BytesRef startTerm) throws IOException {
+      if (compiled.type != CompiledAutomaton.AUTOMATON_TYPE.NORMAL) {
+        throw new IllegalArgumentException("please use CompiledAutomaton.getTermsEnum instead");
+      }
       return new IntersectTermsEnum(compiled, startTerm);
     }
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/8cbcbc9d/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermsReader.java
----------------------------------------------------------------------
diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermsReader.java b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermsReader.java
index 775f692..b120656 100644
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermsReader.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/memory/FSTTermsReader.java
@@ -250,6 +250,9 @@ public class FSTTermsReader extends FieldsProducer {
 
     @Override
     public TermsEnum intersect(CompiledAutomaton compiled, BytesRef startTerm) throws IOException {
+      if (compiled.type != CompiledAutomaton.AUTOMATON_TYPE.NORMAL) {
+        throw new IllegalArgumentException("please use CompiledAutomaton.getTermsEnum instead");
+      }
       return new IntersectTermsEnum(compiled, startTerm);
     }

[43/50] [abbrv] lucene-solr:jira/solr-8593: SOLR-9844: Display fc total size only when field entries asked for

Posted by kr...@apache.org.

SOLR-9844: Display fc total size only when field entries asked for


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/51237438
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/51237438
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/51237438

Branch: refs/heads/jira/solr-8593
Commit: 512374384a8984c56c91f47dcac4aaf0490eda54
Parents: 7dec783
Author: Varun Thacker <va...@apache.org>
Authored: Tue Dec 13 15:52:17 2016 -0800
Committer: Varun Thacker <va...@apache.org>
Committed: Wed Dec 14 10:52:26 2016 -0800

----------------------------------------------------------------------
 .../apache/solr/search/SolrFieldCacheMBean.java |  6 ++---
 .../solr/uninverting/UninvertingReader.java     | 25 +++++++++++++-------
 .../solr/search/TestSolrFieldCacheMBean.java    |  3 ++-
 3 files changed, 22 insertions(+), 12 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/51237438/solr/core/src/java/org/apache/solr/search/SolrFieldCacheMBean.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/search/SolrFieldCacheMBean.java b/solr/core/src/java/org/apache/solr/search/SolrFieldCacheMBean.java
index 70781e9..642b708 100644
--- a/solr/core/src/java/org/apache/solr/search/SolrFieldCacheMBean.java
+++ b/solr/core/src/java/org/apache/solr/search/SolrFieldCacheMBean.java
@@ -62,15 +62,15 @@ public class SolrFieldCacheMBean implements JmxAugmentedSolrInfoMBean {
   private NamedList getStats(boolean listEntries) {
     NamedList stats = new SimpleOrderedMap();
     if (listEntries) {
-      String[] entries = UninvertingReader.getUninvertedStats();
+      UninvertingReader.FieldCacheStats fieldCacheStats = UninvertingReader.getUninvertedStats();
+      String[] entries = fieldCacheStats.info;
       stats.add("entries_count", entries.length);
-      stats.add("total_size", UninvertingReader.getTotalSize());
+      stats.add("total_size", fieldCacheStats.totalSize);
       for (int i = 0; i < entries.length; i++) {
         stats.add("entry#" + i, entries[i]);
       }
     } else {
       stats.add("entries_count", UninvertingReader.getUninvertedStatsSize());
-      stats.add("total_size", UninvertingReader.getTotalSize());
     }
     return stats;
   }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/51237438/solr/core/src/java/org/apache/solr/uninverting/UninvertingReader.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/uninverting/UninvertingReader.java b/solr/core/src/java/org/apache/solr/uninverting/UninvertingReader.java
index 87fb7a6..5276ca9 100644
--- a/solr/core/src/java/org/apache/solr/uninverting/UninvertingReader.java
+++ b/solr/core/src/java/org/apache/solr/uninverting/UninvertingReader.java
@@ -375,25 +375,34 @@ public class UninvertingReader extends FilterLeafReader {
    * Return information about the backing cache
    * @lucene.internal 
    */
-  public static String[] getUninvertedStats() {
+  public static FieldCacheStats getUninvertedStats() {
     CacheEntry[] entries = FieldCache.DEFAULT.getCacheEntries();
+    long totalBytesUsed = 0;
     String[] info = new String[entries.length];
     for (int i = 0; i < entries.length; i++) {
       info[i] = entries[i].toString();
+      totalBytesUsed += entries[i].getValue().ramBytesUsed();
     }
-    return info;
+    String totalSize = RamUsageEstimator.humanReadableUnits(totalBytesUsed);
+    return new FieldCacheStats(totalSize, info);
   }
 
   public static int getUninvertedStatsSize() {
     return FieldCache.DEFAULT.getCacheEntries().length;
   }
 
-  public static String getTotalSize() {
-    CacheEntry[] entries = FieldCache.DEFAULT.getCacheEntries();
-    long totalBytesUsed = 0;
-    for (int i = 0; i < entries.length; i++) {
-      totalBytesUsed += entries[i].getValue().ramBytesUsed();
+  /**
+   * Return information about the backing cache
+   * @lucene.internal
+   */
+  public static class FieldCacheStats {
+    public String totalSize;
+    public String[] info;
+
+    public FieldCacheStats(String totalSize, String[] info) {
+      this.totalSize = totalSize;
+      this.info = info;
     }
-    return RamUsageEstimator.humanReadableUnits(totalBytesUsed);
+
   }
 }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/51237438/solr/core/src/test/org/apache/solr/search/TestSolrFieldCacheMBean.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/search/TestSolrFieldCacheMBean.java b/solr/core/src/test/org/apache/solr/search/TestSolrFieldCacheMBean.java
index a705e1e..35bdec6 100644
--- a/solr/core/src/test/org/apache/solr/search/TestSolrFieldCacheMBean.java
+++ b/solr/core/src/test/org/apache/solr/search/TestSolrFieldCacheMBean.java
@@ -69,6 +69,7 @@ public class TestSolrFieldCacheMBean extends SolrTestCaseJ4 {
     SolrFieldCacheMBean mbean = new SolrFieldCacheMBean();
     NamedList stats = checkJmx ? mbean.getStatisticsForJmx() : mbean.getStatistics();
     assert(new Integer(stats.get("entries_count").toString()) > 0);
+    assertNotNull(stats.get("total_size"));
     assertNotNull(stats.get("entry#0"));
   }
 
@@ -76,7 +77,7 @@ public class TestSolrFieldCacheMBean extends SolrTestCaseJ4 {
     SolrFieldCacheMBean mbean = new SolrFieldCacheMBean();
     NamedList stats = checkJmx ? mbean.getStatisticsForJmx() : mbean.getStatistics();
     assert(new Integer(stats.get("entries_count").toString()) > 0);
-    assertNotNull(stats.get("total_size"));
+    assertNull(stats.get("total_size"));
     assertNull(stats.get("entry#0"));
   }
 }

[02/50] [abbrv] lucene-solr:jira/solr-8593: SOLR-9819: Upgrade Apache commons-fileupload to 1.3.2, fixing a security vulnerability

Posted by kr...@apache.org.

SOLR-9819: Upgrade Apache commons-fileupload to 1.3.2, fixing a security vulnerability


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/c61268f7
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/c61268f7
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/c61268f7

Branch: refs/heads/jira/solr-8593
Commit: c61268f7cd2c47884f98513febee6bb5f33ea6dc
Parents: 98f7572
Author: Anshum Gupta <an...@apache.org>
Authored: Fri Dec 2 12:09:10 2016 -0800
Committer: Anshum Gupta <an...@apache.org>
Committed: Fri Dec 2 12:14:35 2016 -0800

----------------------------------------------------------------------
 lucene/ivy-versions.properties                  | 2 +-
 solr/CHANGES.txt                                | 2 ++
 solr/licenses/commons-fileupload-1.3.1.jar.sha1 | 1 -
 solr/licenses/commons-fileupload-1.3.2.jar.sha1 | 1 +
 4 files changed, 4 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/c61268f7/lucene/ivy-versions.properties
----------------------------------------------------------------------
diff --git a/lucene/ivy-versions.properties b/lucene/ivy-versions.properties
index 8526105..ffc54a8 100644
--- a/lucene/ivy-versions.properties
+++ b/lucene/ivy-versions.properties
@@ -64,7 +64,7 @@ com.sun.jersey.version = 1.9
 /commons-collections/commons-collections = 3.2.2
 /commons-configuration/commons-configuration = 1.6
 /commons-digester/commons-digester = 2.1
-/commons-fileupload/commons-fileupload = 1.3.1
+/commons-fileupload/commons-fileupload = 1.3.2
 /commons-io/commons-io = 2.5
 /commons-lang/commons-lang = 2.6
 /commons-logging/commons-logging = 1.1.3

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/c61268f7/solr/CHANGES.txt
----------------------------------------------------------------------
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index d09ae3b..e766169 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -255,6 +255,8 @@ Other Changes
 * SOLR-9660: in GroupingSpecification factor [group](sort|offset|limit) into [group](sortSpec)
   (Judith Silverman, Christine Poerschke)
 
+* SOLR-9819: Upgrade commons-fileupload to 1.3.2, fixing a potential vulnerability CVE-2016-3092 (Anshum Gupta)
+
 ==================  6.3.0 ==================
 
 Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this release.

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/c61268f7/solr/licenses/commons-fileupload-1.3.1.jar.sha1
----------------------------------------------------------------------
diff --git a/solr/licenses/commons-fileupload-1.3.1.jar.sha1 b/solr/licenses/commons-fileupload-1.3.1.jar.sha1
deleted file mode 100644
index 32f4872..0000000
--- a/solr/licenses/commons-fileupload-1.3.1.jar.sha1
+++ /dev/null
@@ -1 +0,0 @@
-c621b54583719ac0310404463d6d99db27e1052c

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/c61268f7/solr/licenses/commons-fileupload-1.3.2.jar.sha1
----------------------------------------------------------------------
diff --git a/solr/licenses/commons-fileupload-1.3.2.jar.sha1 b/solr/licenses/commons-fileupload-1.3.2.jar.sha1
new file mode 100644
index 0000000..747b509
--- /dev/null
+++ b/solr/licenses/commons-fileupload-1.3.2.jar.sha1
@@ -0,0 +1 @@
+5d7491ed6ebd02b6a8d2305f8e6b7fe5dbd95f72
\ No newline at end of file

[24/50] [abbrv] lucene-solr:jira/solr-8593: SOLR-9837: fix redundant calculation of docsWithField for numeric fields in field cache

Posted by kr...@apache.org.

SOLR-9837: fix redundant calculation of docsWithField for numeric fields in field cache


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/1d2e440a
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/1d2e440a
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/1d2e440a

Branch: refs/heads/jira/solr-8593
Commit: 1d2e440a8fe3df8d3207a7428841f79f63381e4f
Parents: 93c1146
Author: yonik <yo...@apache.org>
Authored: Thu Dec 8 18:29:07 2016 -0500
Committer: yonik <yo...@apache.org>
Committed: Thu Dec 8 18:29:07 2016 -0500

----------------------------------------------------------------------
 solr/CHANGES.txt                                |  4 ++
 .../apache/solr/uninverting/FieldCacheImpl.java | 52 +++++++++++---------
 2 files changed, 32 insertions(+), 24 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/1d2e440a/solr/CHANGES.txt
----------------------------------------------------------------------
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index abd9997..78f7f55 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -50,6 +50,10 @@ Bug Fixes
 * SOLR-9262: Connection and read timeouts are being ignored by UpdateShardHandler after SOLR-4509.
   (Mark Miller, shalin)
 
+* SOLR-9837: Fix 55% performance regression of FieldCache uninvert time of
+  numeric fields.  (yonik)
+
+
 Optimizations
 ----------------------
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/1d2e440a/solr/core/src/java/org/apache/solr/uninverting/FieldCacheImpl.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/uninverting/FieldCacheImpl.java b/solr/core/src/java/org/apache/solr/uninverting/FieldCacheImpl.java
index 0ca687f..90be400 100644
--- a/solr/core/src/java/org/apache/solr/uninverting/FieldCacheImpl.java
+++ b/solr/core/src/java/org/apache/solr/uninverting/FieldCacheImpl.java
@@ -365,8 +365,6 @@ public class FieldCacheImpl implements FieldCache {
       }
     }
 
-    /** @deprecated remove this when legacy numerics are removed */
-    @Deprecated
     protected abstract TermsEnum termsEnum(Terms terms) throws IOException;
     protected abstract void visitTerm(BytesRef term);
     protected abstract void visitDoc(int docID);
@@ -632,20 +630,21 @@ public class FieldCacheImpl implements FieldCache {
         }
       }
 
-      Bits docsWithField = getDocsWithField(reader, field, parser);
-      return ((LongsFromArray) caches.get(Long.TYPE).get(reader, new CacheKey(field, parser))).iterator(docsWithField);
+      return ((LongsFromArray) caches.get(Long.TYPE).get(reader, new CacheKey(field, parser))).iterator();
     }
   }
 
-  static class LongsFromArray implements Accountable {
+  public static class LongsFromArray implements Accountable {
     private final PackedInts.Reader values;
     private final long minValue;
+    private final Bits docsWithField;
     private final String field;
 
-    public LongsFromArray(String field, PackedInts.Reader values, long minValue) {
+    public LongsFromArray(String field, PackedInts.Reader values, long minValue, Bits docsWithField) { // TODO: accept null docsWithField?
       this.field = field;
       this.values = values;
       this.minValue = minValue;
+      this.docsWithField = docsWithField;
     }
     
     @Override
@@ -653,7 +652,7 @@ public class FieldCacheImpl implements FieldCache {
       return values.ramBytesUsed() + RamUsageEstimator.NUM_BYTES_OBJECT_REF + Long.BYTES;
     }
 
-    public NumericDocValues iterator(final Bits docsWithField) {
+    public NumericDocValues iterator() {
       return new NumericDocValues() {
         int docID = -1;
 
@@ -767,10 +766,11 @@ public class FieldCacheImpl implements FieldCache {
       u.uninvert(reader, key.field);
       wrapper.setDocsWithField(reader, key.field, u.docsWithField, parser);
       GrowableWriterAndMinValue values = valuesRef.get();
+      Bits docsWithField = u.docsWithField == null ? new Bits.MatchNoBits(reader.maxDoc()) : u.docsWithField;
       if (values == null) {
-        return new LongsFromArray(key.field, new PackedInts.NullReader(reader.maxDoc()), 0L);
+        return new LongsFromArray(key.field, new PackedInts.NullReader(reader.maxDoc()), 0L, docsWithField);
       }
-      return new LongsFromArray(key.field, values.writer.getMutable(), values.minValue);
+      return new LongsFromArray(key.field, values.writer.getMutable(), values.minValue, docsWithField);
     }
   }
 
@@ -993,16 +993,18 @@ public class FieldCacheImpl implements FieldCache {
     }
   }
 
-  private static class BinaryDocValuesImpl implements Accountable {
+  public static class BinaryDocValuesImpl implements Accountable {
     private final PagedBytes.Reader bytes;
     private final PackedInts.Reader docToOffset;
+    private final Bits docsWithField;
 
-    public BinaryDocValuesImpl(PagedBytes.Reader bytes, PackedInts.Reader docToOffset) {
+    public BinaryDocValuesImpl(PagedBytes.Reader bytes, PackedInts.Reader docToOffset, Bits docsWithField) {
       this.bytes = bytes;
       this.docToOffset = docToOffset;
+      this.docsWithField = docsWithField;
     }
     
-    public BinaryDocValues iterator(Bits docsWithField) {
+    public BinaryDocValues iterator() {
       return new BinaryDocValues() {
 
         final BytesRef term = new BytesRef();
@@ -1109,7 +1111,7 @@ public class FieldCacheImpl implements FieldCache {
     }
 
     BinaryDocValuesImpl impl = (BinaryDocValuesImpl) caches.get(BinaryDocValues.class).get(reader, new CacheKey(field, acceptableOverheadRatio));
-    return impl.iterator(getDocsWithField(reader, field, null));
+    return impl.iterator();
   }
 
   static final class BinaryDocValuesCache extends Cache {
@@ -1188,19 +1190,21 @@ public class FieldCacheImpl implements FieldCache {
       }
 
       final PackedInts.Reader offsetReader = docToOffset.getMutable();
-      wrapper.setDocsWithField(reader, key.field, new Bits() {
-          @Override
-          public boolean get(int index) {
-            return offsetReader.get(index) != 0;
-          }
+      Bits docsWithField = new Bits() {
+        @Override
+        public boolean get(int index) {
+          return offsetReader.get(index) != 0;
+        }
 
-          @Override
-          public int length() {
-            return maxDoc;
-          }
-        }, null);
+        @Override
+        public int length() {
+          return maxDoc;
+        }
+      };
+
+      wrapper.setDocsWithField(reader, key.field, docsWithField, null);
       // maybe an int-only impl?
-      return new BinaryDocValuesImpl(bytes.freeze(true), offsetReader);
+      return new BinaryDocValuesImpl(bytes.freeze(true), offsetReader, docsWithField);
     }
   }

[14/50] [abbrv] lucene-solr:jira/solr-8593: SOLR-5043: New solr.dns.prevent.reverse.lookup system property that can be used to prevent long core (re)load delays on systems with missconfigured hostname/DNS

Posted by kr...@apache.org.

SOLR-5043: New solr.dns.prevent.reverse.lookup system property that can be used to prevent long core (re)load delays on systems with missconfigured hostname/DNS


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/8b98b158
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/8b98b158
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/8b98b158

Branch: refs/heads/jira/solr-8593
Commit: 8b98b158ff9cc2a71216e12c894ca14352d31f0e
Parents: c164f7e
Author: Chris Hostetter <ho...@apache.org>
Authored: Tue Dec 6 14:47:03 2016 -0700
Committer: Chris Hostetter <ho...@apache.org>
Committed: Tue Dec 6 14:47:03 2016 -0700

----------------------------------------------------------------------
 solr/CHANGES.txt                                |  3 ++
 .../solr/handler/admin/SystemInfoHandler.java   | 51 +++++++++++++++++---
 2 files changed, 46 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/8b98b158/solr/CHANGES.txt
----------------------------------------------------------------------
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 4f7377c..14dd2fa 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -152,6 +152,9 @@ New Features
 
 * SOLR-9728: Ability to specify Key Store type in solr.in.sh file for SSL (Michael Suzuki, Kevin Risden)
 
+* SOLR-5043: New solr.dns.prevent.reverse.lookup system property that can be used to prevent long core
+  (re)load delays on systems with missconfigured hostname/DNS (hossman)
+
 Optimizations
 ----------------------
 * SOLR-9704: Facet Module / JSON Facet API: Optimize blockChildren facets that have

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/8b98b158/solr/core/src/java/org/apache/solr/handler/admin/SystemInfoHandler.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/handler/admin/SystemInfoHandler.java b/solr/core/src/java/org/apache/solr/handler/admin/SystemInfoHandler.java
index 35ef906..a873c09 100644
--- a/solr/core/src/java/org/apache/solr/handler/admin/SystemInfoHandler.java
+++ b/solr/core/src/java/org/apache/solr/handler/admin/SystemInfoHandler.java
@@ -31,7 +31,6 @@ import java.lang.management.PlatformManagedObject;
 import java.lang.management.RuntimeMXBean;
 import java.lang.reflect.InvocationTargetException;
 import java.net.InetAddress;
-import java.net.UnknownHostException;
 import java.nio.charset.Charset;
 import java.text.DecimalFormat;
 import java.text.DecimalFormatSymbols;
@@ -50,6 +49,8 @@ import org.apache.solr.handler.RequestHandlerBase;
 import org.apache.solr.request.SolrQueryRequest;
 import org.apache.solr.response.SolrQueryResponse;
 import org.apache.solr.schema.IndexSchema;
+import org.apache.solr.util.RTimer;
+
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -64,8 +65,22 @@ import static org.apache.solr.common.params.CommonParams.NAME;
 public class SystemInfoHandler extends RequestHandlerBase 
 {
   private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
-  
 
+  /**
+   * <p>
+   * Undocumented expert level system property to prevent doing a reverse lookup of our hostname.
+   * This property ill be logged as a suggested workaround if any probems are noticed when doing reverse 
+   * lookup.
+   * </p>
+   *
+   * <p>
+   * TODO: should we refactor this (and the associated logic) into a helper method for any other places
+   * where DNS is used?
+   * </p>
+   * @see #initHostname
+   */
+  private static final String PREVENT_REVERSE_DNS_OF_LOCALHOST_SYSPROP = "solr.dns.prevent.reverse.lookup";
+  
   // on some platforms, resolving canonical hostname can cause the thread
   // to block for several seconds if nameservices aren't available
   // so resolve this once per handler instance 
@@ -75,22 +90,42 @@ public class SystemInfoHandler extends RequestHandlerBase
   private CoreContainer cc;
 
   public SystemInfoHandler() {
-    super();
-    init();
+    this(null);
   }
 
   public SystemInfoHandler(CoreContainer cc) {
     super();
     this.cc = cc;
-    init();
+    initHostname();
   }
   
-  private void init() {
+  private void initHostname() {
+    if (null != System.getProperty(PREVENT_REVERSE_DNS_OF_LOCALHOST_SYSPROP, null)) {
+      log.info("Resolving canonical hostname for local host prevented due to '{}' sysprop",
+               PREVENT_REVERSE_DNS_OF_LOCALHOST_SYSPROP);
+      hostname = null;
+      return;
+    }
+    
+    RTimer timer = new RTimer();
     try {
       InetAddress addr = InetAddress.getLocalHost();
       hostname = addr.getCanonicalHostName();
-    } catch (UnknownHostException e) {
-      //default to null
+    } catch (Exception e) {
+      log.warn("Unable to resolve canonical hostname for local host, possible DNS misconfiguration. " +
+               "Set the '"+PREVENT_REVERSE_DNS_OF_LOCALHOST_SYSPROP+"' sysprop to true on startup to " +
+               "prevent future lookups if DNS can not be fixed.", e);
+      hostname = null;
+      return;
+    }
+    timer.stop();
+    
+    if (15000D < timer.getTime()) {
+      String readableTime = String.format(Locale.ROOT, "%.3f", (timer.getTime() / 1000));
+      log.warn("Resolving canonical hostname for local host took {} seconds, possible DNS misconfiguration. " +
+               "Set the '{}' sysprop to true on startup to prevent future lookups if DNS can not be fixed.",
+               readableTime, PREVENT_REVERSE_DNS_OF_LOCALHOST_SYSPROP);
+    
     }
   }

[13/50] [abbrv] lucene-solr:jira/solr-8593: SOLR-9827: Make ConcurrentUpdateSolrClient create RemoteSolrExceptions in case of remote errors instead of SolrException

Posted by kr...@apache.org.

SOLR-9827: Make ConcurrentUpdateSolrClient create RemoteSolrExceptions in case of remote errors instead of SolrException


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/c164f7e3
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/c164f7e3
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/c164f7e3

Branch: refs/heads/jira/solr-8593
Commit: c164f7e35e45d0bfa844cd450ffb4865c27fc4d5
Parents: bf3a313
Author: Tomas Fernandez Lobbe <tf...@apache.org>
Authored: Tue Dec 6 10:34:22 2016 -0800
Committer: Tomas Fernandez Lobbe <tf...@apache.org>
Committed: Tue Dec 6 10:34:22 2016 -0800

----------------------------------------------------------------------
 solr/CHANGES.txt                                    |  4 ++++
 .../solrj/impl/ConcurrentUpdateSolrClient.java      | 16 +++++++++++++---
 .../apache/solr/client/solrj/SolrExampleTests.java  |  7 ++++++-
 3 files changed, 23 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/c164f7e3/solr/CHANGES.txt
----------------------------------------------------------------------
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index bac24e5..4f7377c 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -259,6 +259,10 @@ Other Changes
 
 * SOLR-9819: Upgrade commons-fileupload to 1.3.2, fixing a potential vulnerability CVE-2016-3092 (Anshum Gupta)
 
+* SOLR-9827: ConcurrentUpdateSolrClient creates a RemoteSolrException if the remote host responded with a non-ok
+  response (instead of a SolrException) and includes the remote error message as part of the exception message
+  (Tom�s Fern�ndez L�bbe)
+
 ==================  6.3.0 ==================
 
 Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this release.

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/c164f7e3/solr/solrj/src/java/org/apache/solr/client/solrj/impl/ConcurrentUpdateSolrClient.java
----------------------------------------------------------------------
diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/impl/ConcurrentUpdateSolrClient.java b/solr/solrj/src/java/org/apache/solr/client/solrj/impl/ConcurrentUpdateSolrClient.java
index b96cc23..5c3f289 100644
--- a/solr/solrj/src/java/org/apache/solr/client/solrj/impl/ConcurrentUpdateSolrClient.java
+++ b/solr/solrj/src/java/org/apache/solr/client/solrj/impl/ConcurrentUpdateSolrClient.java
@@ -46,7 +46,6 @@ import org.apache.solr.client.solrj.request.RequestWriter;
 import org.apache.solr.client.solrj.request.UpdateRequest;
 import org.apache.solr.client.solrj.util.ClientUtils;
 import org.apache.solr.common.SolrException;
-import org.apache.solr.common.SolrException.ErrorCode;
 import org.apache.solr.common.params.CommonParams;
 import org.apache.solr.common.params.ModifiableSolrParams;
 import org.apache.solr.common.params.SolrParams;
@@ -330,7 +329,8 @@ public class ConcurrentUpdateSolrClient extends SolrClient {
             msg.append("\n\n\n\n");
             msg.append("request: ").append(method.getURI());
 
-            SolrException solrExc = new SolrException(ErrorCode.getErrorCode(statusCode), msg.toString());
+            SolrException solrExc;
+            NamedList<String> metadata = null;
             // parse out the metadata from the SolrException
             try {
               String encoding = "UTF-8"; // default
@@ -343,11 +343,21 @@ public class ConcurrentUpdateSolrClient extends SolrClient {
               NamedList<Object> resp = client.parser.processResponse(rspBody, encoding);
               NamedList<Object> error = (NamedList<Object>) resp.get("error");
               if (error != null) {
-                solrExc.setMetadata((NamedList<String>) error.get("metadata"));
+                metadata = (NamedList<String>) error.get("metadata");
+                String remoteMsg = (String) error.get("msg");
+                if (remoteMsg != null) {
+                  msg.append("\nRemote error message: ");
+                  msg.append(remoteMsg);
+                }
               }
             } catch (Exception exc) {
               // don't want to fail to report error if parsing the response fails
               log.warn("Failed to parse error response from " + client.getBaseURL() + " due to: " + exc);
+            } finally {
+              solrExc = new HttpSolrClient.RemoteSolrException(client.getBaseURL(), statusCode, msg.toString(), null);
+              if (metadata != null) {
+                solrExc.setMetadata(metadata);
+              }
             }
 
             handleError(solrExc);

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/c164f7e3/solr/solrj/src/test/org/apache/solr/client/solrj/SolrExampleTests.java
----------------------------------------------------------------------
diff --git a/solr/solrj/src/test/org/apache/solr/client/solrj/SolrExampleTests.java b/solr/solrj/src/test/org/apache/solr/client/solrj/SolrExampleTests.java
index f403f3f..d25280d 100644
--- a/solr/solrj/src/test/org/apache/solr/client/solrj/SolrExampleTests.java
+++ b/solr/solrj/src/test/org/apache/solr/client/solrj/SolrExampleTests.java
@@ -36,6 +36,7 @@ import org.apache.solr.client.solrj.embedded.EmbeddedSolrServer;
 import org.apache.solr.client.solrj.embedded.SolrExampleStreamingTest.ErrorTrackingConcurrentUpdateSolrClient;
 import org.apache.solr.client.solrj.impl.BinaryResponseParser;
 import org.apache.solr.client.solrj.impl.HttpSolrClient;
+import org.apache.solr.client.solrj.impl.HttpSolrClient.RemoteSolrException;
 import org.apache.solr.client.solrj.impl.NoOpResponseParser;
 import org.apache.solr.client.solrj.impl.XMLResponseParser;
 import org.apache.solr.client.solrj.request.AbstractUpdateRequest;
@@ -463,7 +464,11 @@ abstract public class SolrExampleTests extends SolrExampleTestsBase
       concurrentClient.lastError = null;
       concurrentClient.add(doc);
       concurrentClient.blockUntilFinished();
-      assertNotNull("Should throw exception!", concurrentClient.lastError); 
+      assertNotNull("Should throw exception!", concurrentClient.lastError);
+      assertEquals("Unexpected exception type", 
+          RemoteSolrException.class, concurrentClient.lastError.getClass());
+      assertTrue("Unexpected exception message: " + concurrentClient.lastError.getMessage(), 
+          concurrentClient.lastError.getMessage().contains("Remote error message: Document contains multiple values for uniqueKey"));
     } else {
       log.info("Ignoring update test for client:" + client.getClass().getName());
     }

[45/50] [abbrv] lucene-solr:jira/solr-8593: LUCENE-7592: if segments file is truncated, throw CorruptIndexException

Posted by kr...@apache.org.

LUCENE-7592: if segments file is truncated, throw CorruptIndexException


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/e4f31fab
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/e4f31fab
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/e4f31fab

Branch: refs/heads/jira/solr-8593
Commit: e4f31fab2f98b7af6d2ec12a2eb3456521b446df
Parents: 6525bb5
Author: Mike McCandless <mi...@apache.org>
Authored: Wed Dec 14 18:00:51 2016 -0500
Committer: Mike McCandless <mi...@apache.org>
Committed: Wed Dec 14 18:00:51 2016 -0500

----------------------------------------------------------------------
 lucene/CHANGES.txt                                            | 4 ++++
 .../core/src/java/org/apache/lucene/index/SegmentInfos.java   | 7 ++++++-
 2 files changed, 10 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e4f31fab/lucene/CHANGES.txt
----------------------------------------------------------------------
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 15b89f0..f38c0d5 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -138,6 +138,10 @@ Improvements
   necessarily refer to that field (AKA requireFieldMatch==false). Disabled by default.
   See UH get/setFieldMatcher. (Jim Ferenczi via David Smiley)
 
+* LUCENE-7592: If the segments file is truncated, we now throw
+  CorruptIndexException instead of the more confusing EOFException
+  (Mike Drob via Mike McCandless)
+
 Optimizations
 
 * LUCENE-7568: Optimize merging when index sorting is used but the

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e4f31fab/lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java b/lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java
index 8f627cd..3e8b1f8 100644
--- a/lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java
+++ b/lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java
@@ -17,6 +17,7 @@
 package org.apache.lucene.index;
 
 
+import java.io.EOFException;
 import java.io.IOException;
 import java.io.PrintStream;
 import java.util.ArrayList;
@@ -277,7 +278,11 @@ public final class SegmentInfos implements Cloneable, Iterable<SegmentCommitInfo
     long generation = generationFromSegmentsFileName(segmentFileName);
     //System.out.println(Thread.currentThread() + ": SegmentInfos.readCommit " + segmentFileName);
     try (ChecksumIndexInput input = directory.openChecksumInput(segmentFileName, IOContext.READ)) {
-      return readCommit(directory, input, generation);
+      try {
+        return readCommit(directory, input, generation);
+      } catch (EOFException e) {
+        throw new CorruptIndexException("Unexpected end of file while reading index.", input, e);
+      }
     }
   }

[22/50] [abbrv] lucene-solr:jira/solr-8593: SOLR-9834: A variety of spots in the code can create a collection zk node after the collection has been removed.

Posted by kr...@apache.org.

SOLR-9834: A variety of spots in the code can create a collection zk node after the collection has been removed.


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/10552099
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/10552099
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/10552099

Branch: refs/heads/jira/solr-8593
Commit: 1055209940faec71bd8046af3323d5982529525b
Parents: b97d9d7
Author: markrmiller <ma...@apache.org>
Authored: Thu Dec 8 12:03:55 2016 -0500
Committer: markrmiller <ma...@apache.org>
Committed: Thu Dec 8 12:03:55 2016 -0500

----------------------------------------------------------------------
 solr/CHANGES.txt                                |   3 +
 .../solr/cloud/CloudConfigSetService.java       |  24 ++-
 .../org/apache/solr/cloud/CloudDescriptor.java  |   2 +-
 .../apache/solr/cloud/CreateCollectionCmd.java  | 146 ++++++++++++++++++-
 .../org/apache/solr/cloud/ElectionContext.java  |  20 +--
 .../org/apache/solr/cloud/LeaderElector.java    |   9 +-
 .../org/apache/solr/cloud/ZkController.java     | 128 +---------------
 .../apache/solr/cloud/ZkSolrResourceLoader.java |   2 +-
 .../org/apache/solr/core/ConfigSetService.java  |   7 +-
 .../solr/handler/admin/CollectionsHandler.java  |   2 +
 .../apache/solr/cloud/LeaderElectionTest.java   |   2 +
 ...verseerCollectionConfigSetProcessorTest.java |  33 +++--
 .../org/apache/solr/cloud/ZkSolrClientTest.java |  54 +++++++
 .../apache/solr/common/cloud/SolrZkClient.java  |  29 +++-
 .../apache/solr/common/cloud/ZkCmdExecutor.java |  15 +-
 15 files changed, 307 insertions(+), 169 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/10552099/solr/CHANGES.txt
----------------------------------------------------------------------
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 8dee837..abd9997 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -230,6 +230,9 @@ Bug Fixes
 
 * SOLR-9832: Schema modifications are not immediately visible on the coordinating node. (Steve Rowe)
 
+* SOLR-9834: A variety of spots in the code can create a collection zk node after the collection has been 
+  removed. (Mark Miller)
+
 Other Changes
 ----------------------
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/10552099/solr/core/src/java/org/apache/solr/cloud/CloudConfigSetService.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/cloud/CloudConfigSetService.java b/solr/core/src/java/org/apache/solr/cloud/CloudConfigSetService.java
index bf11e92..6e0583f 100644
--- a/solr/core/src/java/org/apache/solr/cloud/CloudConfigSetService.java
+++ b/solr/core/src/java/org/apache/solr/cloud/CloudConfigSetService.java
@@ -16,12 +16,20 @@
  */
 package org.apache.solr.cloud;
 
+import java.lang.invoke.MethodHandles;
+
+import org.apache.solr.common.SolrException;
+import org.apache.solr.common.cloud.ZkStateReader;
 import org.apache.solr.core.ConfigSetService;
 import org.apache.solr.core.CoreDescriptor;
 import org.apache.solr.core.SolrResourceLoader;
+import org.apache.zookeeper.KeeperException;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 public class CloudConfigSetService extends ConfigSetService {
-
+  private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
+  
   private final ZkController zkController;
 
   public CloudConfigSetService(SolrResourceLoader loader, ZkController zkController) {
@@ -31,8 +39,18 @@ public class CloudConfigSetService extends ConfigSetService {
 
   @Override
   public SolrResourceLoader createCoreResourceLoader(CoreDescriptor cd) {
-    // TODO: Shouldn't the collection node be created by the Collections API?
-    zkController.createCollectionZkNode(cd.getCloudDescriptor());
+    try {
+      // for back compat with cores that can create collections without the collections API
+      if (!zkController.getZkClient().exists(ZkStateReader.COLLECTIONS_ZKNODE + "/" + cd.getCollectionName(), true)) {
+        CreateCollectionCmd.createCollectionZkNode(zkController.getZkClient(), cd.getCollectionName(), cd.getCloudDescriptor().getParams());
+      }
+    } catch (KeeperException e) {
+      SolrException.log(log, null, e);
+    } catch (InterruptedException e) {
+      Thread.currentThread().interrupt();
+      SolrException.log(log, null, e);
+    }
+
     String configName = zkController.getZkStateReader().readConfigName(cd.getCollectionName());
     return new ZkSolrResourceLoader(cd.getInstanceDir(), configName, parentLoader.getClassLoader(),
         cd.getSubstitutableProperties(), zkController);

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/10552099/solr/core/src/java/org/apache/solr/cloud/CloudDescriptor.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/cloud/CloudDescriptor.java b/solr/core/src/java/org/apache/solr/cloud/CloudDescriptor.java
index 4dd1527..fdc7b02 100644
--- a/solr/core/src/java/org/apache/solr/cloud/CloudDescriptor.java
+++ b/solr/core/src/java/org/apache/solr/cloud/CloudDescriptor.java
@@ -33,7 +33,7 @@ public class CloudDescriptor {
   private String roles = null;
   private Integer numShards;
   private String nodeName = null;
-  private Map<String, String> collectionParams = new HashMap<>();
+  private Map<String,String> collectionParams = new HashMap<>();
 
   private volatile boolean isLeader = false;
   

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/10552099/solr/core/src/java/org/apache/solr/cloud/CreateCollectionCmd.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/cloud/CreateCollectionCmd.java b/solr/core/src/java/org/apache/solr/cloud/CreateCollectionCmd.java
index a067b4a..a1bb70e 100644
--- a/solr/core/src/java/org/apache/solr/cloud/CreateCollectionCmd.java
+++ b/solr/core/src/java/org/apache/solr/cloud/CreateCollectionCmd.java
@@ -25,19 +25,23 @@ import java.util.HashMap;
 import java.util.LinkedHashMap;
 import java.util.List;
 import java.util.Map;
+import java.util.Properties;
 import java.util.concurrent.TimeUnit;
 
 import org.apache.solr.cloud.OverseerCollectionMessageHandler.Cmd;
 import org.apache.solr.cloud.overseer.ClusterStateMutator;
 import org.apache.solr.cloud.rule.ReplicaAssigner;
 import org.apache.solr.common.SolrException;
+import org.apache.solr.common.SolrException.ErrorCode;
 import org.apache.solr.common.cloud.ClusterState;
 import org.apache.solr.common.cloud.DocRouter;
 import org.apache.solr.common.cloud.ImplicitDocRouter;
 import org.apache.solr.common.cloud.Replica;
+import org.apache.solr.common.cloud.SolrZkClient;
 import org.apache.solr.common.cloud.ZkConfigManager;
 import org.apache.solr.common.cloud.ZkNodeProps;
 import org.apache.solr.common.cloud.ZkStateReader;
+import org.apache.solr.common.cloud.ZooKeeperException;
 import org.apache.solr.common.params.CoreAdminParams;
 import org.apache.solr.common.params.ModifiableSolrParams;
 import org.apache.solr.common.util.NamedList;
@@ -46,7 +50,9 @@ import org.apache.solr.common.util.Utils;
 import org.apache.solr.handler.component.ShardHandler;
 import org.apache.solr.handler.component.ShardRequest;
 import org.apache.solr.util.TimeOut;
+import org.apache.zookeeper.CreateMode;
 import org.apache.zookeeper.KeeperException;
+import org.apache.zookeeper.KeeperException.NoNodeException;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -64,9 +70,11 @@ import static org.apache.solr.common.util.StrUtils.formatString;
 public class CreateCollectionCmd implements Cmd {
   private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
   private final OverseerCollectionMessageHandler ocmh;
+  private SolrZkClient zkClient;
 
   public CreateCollectionCmd(OverseerCollectionMessageHandler ocmh) {
     this.ocmh = ocmh;
+    this.zkClient = ocmh.zkStateReader.getZkClient();
   }
 
   @Override
@@ -84,7 +92,6 @@ public class CreateCollectionCmd implements Cmd {
 
     ocmh.validateConfigOrThrowSolrException(configName);
 
-
     try {
       // look at the replication factor and see if it matches reality
       // if it does not, find best nodes to create more cores
@@ -157,10 +164,20 @@ public class CreateCollectionCmd implements Cmd {
       }
 
       ZkStateReader zkStateReader = ocmh.zkStateReader;
-      boolean isLegacyCloud =  Overseer.isLegacy(zkStateReader);
+      boolean isLegacyCloud = Overseer.isLegacy(zkStateReader);
 
       ocmh.createConfNode(configName, collectionName, isLegacyCloud);
 
+      Map<String,String> collectionParams = new HashMap<>();
+      Map<String,Object> collectionProps = message.getProperties();
+      for (String propName : collectionProps.keySet()) {
+        if (propName.startsWith(ZkController.COLLECTION_PARAM_PREFIX)) {
+          collectionParams.put(propName.substring(ZkController.COLLECTION_PARAM_PREFIX.length()), (String) collectionProps.get(propName));
+        }
+      }
+      
+      createCollectionZkNode(zkClient, collectionName, collectionParams);
+      
       Overseer.getStateUpdateQueue(zkStateReader.getZkClient()).offer(Utils.toJSON(message));
 
       // wait for a while until we don't see the collection
@@ -288,4 +305,129 @@ public class CreateCollectionCmd implements Cmd {
     }
     return configName;
   }
+  
+  public static void createCollectionZkNode(SolrZkClient zkClient, String collection, Map<String,String> params) {
+    log.debug("Check for collection zkNode:" + collection);
+    String collectionPath = ZkStateReader.COLLECTIONS_ZKNODE + "/" + collection;
+
+    try {
+      if (!zkClient.exists(collectionPath, true)) {
+        log.debug("Creating collection in ZooKeeper:" + collection);
+
+        try {
+          Map<String,Object> collectionProps = new HashMap<>();
+
+          // TODO: if collection.configName isn't set, and there isn't already a conf in zk, just use that?
+          String defaultConfigName = System.getProperty(ZkController.COLLECTION_PARAM_PREFIX + ZkController.CONFIGNAME_PROP, collection);
+
+          if (params.size() > 0) {
+            collectionProps.putAll(params);
+            // if the config name wasn't passed in, use the default
+            if (!collectionProps.containsKey(ZkController.CONFIGNAME_PROP)) {
+              // users can create the collection node and conf link ahead of time, or this may return another option
+              getConfName(zkClient, collection, collectionPath, collectionProps);
+            }
+
+          } else if (System.getProperty("bootstrap_confdir") != null) {
+            // if we are bootstrapping a collection, default the config for
+            // a new collection to the collection we are bootstrapping
+            log.info("Setting config for collection:" + collection + " to " + defaultConfigName);
+
+            Properties sysProps = System.getProperties();
+            for (String sprop : System.getProperties().stringPropertyNames()) {
+              if (sprop.startsWith(ZkController.COLLECTION_PARAM_PREFIX)) {
+                collectionProps.put(sprop.substring(ZkController.COLLECTION_PARAM_PREFIX.length()), sysProps.getProperty(sprop));
+              }
+            }
+
+            // if the config name wasn't passed in, use the default
+            if (!collectionProps.containsKey(ZkController.CONFIGNAME_PROP))
+              collectionProps.put(ZkController.CONFIGNAME_PROP, defaultConfigName);
+
+          } else if (Boolean.getBoolean("bootstrap_conf")) {
+            // the conf name should should be the collection name of this core
+            collectionProps.put(ZkController.CONFIGNAME_PROP, collection);
+          } else {
+            getConfName(zkClient, collection, collectionPath, collectionProps);
+          }
+
+          collectionProps.remove(ZkStateReader.NUM_SHARDS_PROP);  // we don't put numShards in the collections properties
+
+          ZkNodeProps zkProps = new ZkNodeProps(collectionProps);
+          zkClient.makePath(collectionPath, Utils.toJSON(zkProps), CreateMode.PERSISTENT, null, true);
+
+        } catch (KeeperException e) {
+          // it's okay if the node already exists
+          if (e.code() != KeeperException.Code.NODEEXISTS) {
+            throw e;
+          }
+        }
+      } else {
+        log.debug("Collection zkNode exists");
+      }
+
+    } catch (KeeperException e) {
+      // it's okay if another beats us creating the node
+      if (e.code() == KeeperException.Code.NODEEXISTS) {
+        return;
+      }
+      throw new SolrException(ErrorCode.SERVER_ERROR, "Error creating collection node in Zookeeper", e);
+    } catch (InterruptedException e) {
+      Thread.interrupted();
+      throw new SolrException(ErrorCode.SERVER_ERROR, "Error creating collection node in Zookeeper", e);
+    }
+
+  }
+  
+  private static void getConfName(SolrZkClient zkClient, String collection, String collectionPath, Map<String,Object> collectionProps) throws KeeperException,
+      InterruptedException {
+    // check for configName
+    log.debug("Looking for collection configName");
+    if (collectionProps.containsKey("configName")) {
+      log.info("configName was passed as a param {}", collectionProps.get("configName"));
+      return;
+    }
+    
+    List<String> configNames = null;
+    int retry = 1;
+    int retryLimt = 6;
+    for (; retry < retryLimt; retry++) {
+      if (zkClient.exists(collectionPath, true)) {
+        ZkNodeProps cProps = ZkNodeProps.load(zkClient.getData(collectionPath, null, null, true));
+        if (cProps.containsKey(ZkController.CONFIGNAME_PROP)) {
+          break;
+        }
+      }
+
+      // if there is only one conf, use that
+      try {
+        configNames = zkClient.getChildren(ZkConfigManager.CONFIGS_ZKNODE, null,
+            true);
+      } catch (NoNodeException e) {
+        // just keep trying
+      }
+      if (configNames != null && configNames.size() == 1) {
+        // no config set named, but there is only 1 - use it
+        log.info("Only one config set found in zk - using it:" + configNames.get(0));
+        collectionProps.put(ZkController.CONFIGNAME_PROP, configNames.get(0));
+        break;
+      }
+
+      if (configNames != null && configNames.contains(collection)) {
+        log.info(
+            "Could not find explicit collection configName, but found config name matching collection name - using that set.");
+        collectionProps.put(ZkController.CONFIGNAME_PROP, collection);
+        break;
+      }
+
+      log.info("Could not find collection configName - pausing for 3 seconds and trying again - try: " + retry);
+      Thread.sleep(3000);
+    }
+    if (retry == retryLimt) {
+      log.error("Could not find configName for collection " + collection);
+      throw new ZooKeeperException(
+          SolrException.ErrorCode.SERVER_ERROR,
+          "Could not find configName for collection " + collection + " found:" + configNames);
+    }
+  }
 }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/10552099/solr/core/src/java/org/apache/solr/cloud/ElectionContext.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/cloud/ElectionContext.java b/solr/core/src/java/org/apache/solr/cloud/ElectionContext.java
index 183f177..b3cd585 100644
--- a/solr/core/src/java/org/apache/solr/cloud/ElectionContext.java
+++ b/solr/core/src/java/org/apache/solr/cloud/ElectionContext.java
@@ -125,17 +125,6 @@ class ShardLeaderElectionContextBase extends ElectionContext {
     this.zkClient = zkStateReader.getZkClient();
     this.shardId = shardId;
     this.collection = collection;
-
-    try {
-      new ZkCmdExecutor(zkStateReader.getZkClient().getZkClientTimeout())
-          .ensureExists(ZkStateReader.COLLECTIONS_ZKNODE + "/" + collection,
-              zkClient);
-    } catch (KeeperException e) {
-      throw new SolrException(ErrorCode.SERVER_ERROR, e);
-    } catch (InterruptedException e) {
-      Thread.currentThread().interrupt();
-      throw new SolrException(ErrorCode.SERVER_ERROR, e);
-    }
   }
   
   @Override
@@ -175,9 +164,16 @@ class ShardLeaderElectionContextBase extends ElectionContext {
   void runLeaderProcess(boolean weAreReplacement, int pauseBeforeStartMs)
       throws KeeperException, InterruptedException, IOException {
     // register as leader - if an ephemeral is already there, wait to see if it goes away
+    
+    if (!zkClient.exists(ZkStateReader.COLLECTIONS_ZKNODE + "/" + collection, true)) {
+      log.info("Will not register as leader because collection appears to be gone.");
+      return;
+    }
+    
     String parent = new Path(leaderPath).getParent().toString();
     ZkCmdExecutor zcmd = new ZkCmdExecutor(30000);
-    zcmd.ensureExists(parent, zkClient);
+    // only if /collections/{collection} exists already do we succeed in creating this path
+    zcmd.ensureExists(parent, (byte[])null, CreateMode.PERSISTENT, zkClient, 2);
 
     try {
       RetryUtil.retryOnThrowable(NodeExistsException.class, 60000, 5000, () -> {

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/10552099/solr/core/src/java/org/apache/solr/cloud/LeaderElector.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/cloud/LeaderElector.java b/solr/core/src/java/org/apache/solr/cloud/LeaderElector.java
index 71fdcfd..aa8943d 100644
--- a/solr/core/src/java/org/apache/solr/cloud/LeaderElector.java
+++ b/solr/core/src/java/org/apache/solr/cloud/LeaderElector.java
@@ -360,8 +360,13 @@ public  class LeaderElector {
   public void setup(final ElectionContext context) throws InterruptedException,
       KeeperException {
     String electZKPath = context.electionPath + LeaderElector.ELECTION_NODE;
-    
-    zkCmdExecutor.ensureExists(electZKPath, zkClient);
+    if (context instanceof OverseerElectionContext) {
+      zkCmdExecutor.ensureExists(electZKPath, zkClient);
+    } else {
+      // we use 2 param so that replica won't create /collection/{collection} if it doesn't exist
+      zkCmdExecutor.ensureExists(electZKPath, (byte[])null, CreateMode.PERSISTENT, zkClient, 2);
+    }
+
     this.context = context;
   }
   

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/10552099/solr/core/src/java/org/apache/solr/cloud/ZkController.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/cloud/ZkController.java b/solr/core/src/java/org/apache/solr/cloud/ZkController.java
index c0a8d55..eba7067 100644
--- a/solr/core/src/java/org/apache/solr/cloud/ZkController.java
+++ b/solr/core/src/java/org/apache/solr/cloud/ZkController.java
@@ -34,7 +34,6 @@ import java.util.HashSet;
 import java.util.List;
 import java.util.Locale;
 import java.util.Map;
-import java.util.Properties;
 import java.util.Set;
 import java.util.concurrent.Callable;
 import java.util.concurrent.ConcurrentHashMap;
@@ -1273,130 +1272,6 @@ public class ZkController {
     zkClient.printLayoutToStdOut();
   }
 
-  public void createCollectionZkNode(CloudDescriptor cd) {
-    String collection = cd.getCollectionName();
-
-    log.debug("Check for collection zkNode:" + collection);
-    String collectionPath = ZkStateReader.COLLECTIONS_ZKNODE + "/" + collection;
-
-    try {
-      if (!zkClient.exists(collectionPath, true)) {
-        log.debug("Creating collection in ZooKeeper:" + collection);
-
-        try {
-          Map<String, Object> collectionProps = new HashMap<>();
-
-          // TODO: if collection.configName isn't set, and there isn't already a conf in zk, just use that?
-          String defaultConfigName = System.getProperty(COLLECTION_PARAM_PREFIX + CONFIGNAME_PROP, collection);
-
-          // params passed in - currently only done via core admin (create core commmand).
-          if (cd.getParams().size() > 0) {
-            collectionProps.putAll(cd.getParams());
-            // if the config name wasn't passed in, use the default
-            if (!collectionProps.containsKey(CONFIGNAME_PROP)) {
-              // TODO: getting the configName from the collectionPath should fail since we already know it doesn't exist?
-              getConfName(collection, collectionPath, collectionProps);
-            }
-
-          } else if (System.getProperty("bootstrap_confdir") != null) {
-            // if we are bootstrapping a collection, default the config for
-            // a new collection to the collection we are bootstrapping
-            log.info("Setting config for collection:" + collection + " to " + defaultConfigName);
-
-            Properties sysProps = System.getProperties();
-            for (String sprop : System.getProperties().stringPropertyNames()) {
-              if (sprop.startsWith(COLLECTION_PARAM_PREFIX)) {
-                collectionProps.put(sprop.substring(COLLECTION_PARAM_PREFIX.length()), sysProps.getProperty(sprop));
-              }
-            }
-
-            // if the config name wasn't passed in, use the default
-            if (!collectionProps.containsKey(CONFIGNAME_PROP))
-              collectionProps.put(CONFIGNAME_PROP, defaultConfigName);
-
-          } else if (Boolean.getBoolean("bootstrap_conf")) {
-            // the conf name should should be the collection name of this core
-            collectionProps.put(CONFIGNAME_PROP, cd.getCollectionName());
-          } else {
-            getConfName(collection, collectionPath, collectionProps);
-          }
-
-          collectionProps.remove(ZkStateReader.NUM_SHARDS_PROP);  // we don't put numShards in the collections properties
-
-          ZkNodeProps zkProps = new ZkNodeProps(collectionProps);
-          zkClient.makePath(collectionPath, Utils.toJSON(zkProps), CreateMode.PERSISTENT, null, true);
-
-        } catch (KeeperException e) {
-          // it's okay if the node already exists
-          if (e.code() != KeeperException.Code.NODEEXISTS) {
-            throw e;
-          }
-        }
-      } else {
-        log.debug("Collection zkNode exists");
-      }
-
-    } catch (KeeperException e) {
-      // it's okay if another beats us creating the node
-      if (e.code() == KeeperException.Code.NODEEXISTS) {
-        return;
-      }
-      throw new SolrException(ErrorCode.SERVER_ERROR, "Error creating collection node in Zookeeper", e);
-    } catch (InterruptedException e) {
-      Thread.interrupted();
-      throw new SolrException(ErrorCode.SERVER_ERROR, "Error creating collection node in Zookeeper", e);
-    }
-
-  }
-
-
-  private void getConfName(String collection, String collectionPath,
-                           Map<String, Object> collectionProps) throws KeeperException,
-      InterruptedException {
-    // check for configName
-    log.debug("Looking for collection configName");
-    List<String> configNames = null;
-    int retry = 1;
-    int retryLimt = 6;
-    for (; retry < retryLimt; retry++) {
-      if (zkClient.exists(collectionPath, true)) {
-        ZkNodeProps cProps = ZkNodeProps.load(zkClient.getData(collectionPath, null, null, true));
-        if (cProps.containsKey(CONFIGNAME_PROP)) {
-          break;
-        }
-      }
-
-      // if there is only one conf, use that
-      try {
-        configNames = zkClient.getChildren(ZkConfigManager.CONFIGS_ZKNODE, null,
-            true);
-      } catch (NoNodeException e) {
-        // just keep trying
-      }
-      if (configNames != null && configNames.size() == 1) {
-        // no config set named, but there is only 1 - use it
-        log.info("Only one config set found in zk - using it:" + configNames.get(0));
-        collectionProps.put(CONFIGNAME_PROP, configNames.get(0));
-        break;
-      }
-
-      if (configNames != null && configNames.contains(collection)) {
-        log.info("Could not find explicit collection configName, but found config name matching collection name - using that set.");
-        collectionProps.put(CONFIGNAME_PROP, collection);
-        break;
-      }
-
-      log.info("Could not find collection configName - pausing for 3 seconds and trying again - try: " + retry);
-      Thread.sleep(3000);
-    }
-    if (retry == retryLimt) {
-      log.error("Could not find configName for collection " + collection);
-      throw new ZooKeeperException(
-          SolrException.ErrorCode.SERVER_ERROR,
-          "Could not find configName for collection " + collection + " found:" + configNames);
-    }
-  }
-
   public ZkStateReader getZkStateReader() {
     return zkStateReader;
   }
@@ -2175,7 +2050,8 @@ public class ZkController {
     } else {
       String parentZNodePath = getLeaderInitiatedRecoveryZnodePath(collection, shardId);
       try {
-        zkClient.makePath(parentZNodePath, retryOnConnLoss);
+        // make sure we don't create /collections/{collection} if they do not exist with 2 param
+        zkClient.makePath(parentZNodePath, (byte[]) null, CreateMode.PERSISTENT, (Watcher) null, true, retryOnConnLoss, 2);
       } catch (KeeperException.NodeExistsException nee) {
         // if it exists, that's great!
       }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/10552099/solr/core/src/java/org/apache/solr/cloud/ZkSolrResourceLoader.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/cloud/ZkSolrResourceLoader.java b/solr/core/src/java/org/apache/solr/cloud/ZkSolrResourceLoader.java
index 209ca68..b4137b3 100644
--- a/solr/core/src/java/org/apache/solr/cloud/ZkSolrResourceLoader.java
+++ b/solr/core/src/java/org/apache/solr/cloud/ZkSolrResourceLoader.java
@@ -109,7 +109,7 @@ public class ZkSolrResourceLoader extends SolrResourceLoader {
       } catch (InterruptedException e) {
         Thread.currentThread().interrupt();
         throw new IOException("Error opening " + file, e);
-      } catch (KeeperException e) {
+      } catch (Exception e) {
         throw new IOException("Error opening " + file, e);
       }
     }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/10552099/solr/core/src/java/org/apache/solr/core/ConfigSetService.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/core/ConfigSetService.java b/solr/core/src/java/org/apache/solr/core/ConfigSetService.java
index 3f47f46..e4a135e 100644
--- a/solr/core/src/java/org/apache/solr/core/ConfigSetService.java
+++ b/solr/core/src/java/org/apache/solr/core/ConfigSetService.java
@@ -78,11 +78,10 @@ public abstract class ConfigSetService {
       IndexSchema schema = createIndexSchema(dcore, solrConfig);
       NamedList properties = createConfigSetProperties(dcore, coreLoader);
       return new ConfigSet(configName(dcore), solrConfig, schema, properties);
-    }
-    catch (Exception e) {
+    } catch (Exception e) {
       throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,
-                              "Could not load conf for core " + dcore.getName() + 
-                              ": " + e.getMessage(), e);
+          "Could not load conf for core " + dcore.getName() +
+              ": " + e.getMessage(), e);
     }
 
   }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/10552099/solr/core/src/java/org/apache/solr/handler/admin/CollectionsHandler.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/handler/admin/CollectionsHandler.java b/solr/core/src/java/org/apache/solr/handler/admin/CollectionsHandler.java
index 01095a1..1915176 100644
--- a/solr/core/src/java/org/apache/solr/handler/admin/CollectionsHandler.java
+++ b/solr/core/src/java/org/apache/solr/handler/admin/CollectionsHandler.java
@@ -346,9 +346,11 @@ public class CollectionsHandler extends RequestHandlerBase implements Permission
     try {
       String path = ZkStateReader.CONFIGS_ZKNODE + "/" + SYSTEM_COLL + "/schema.xml";
       byte[] data = IOUtils.toByteArray(Thread.currentThread().getContextClassLoader().getResourceAsStream("SystemCollectionSchema.xml"));
+      assert data != null && data.length > 0;
       cmdExecutor.ensureExists(path, data, CreateMode.PERSISTENT, zk);
       path = ZkStateReader.CONFIGS_ZKNODE + "/" + SYSTEM_COLL + "/solrconfig.xml";
       data = IOUtils.toByteArray(Thread.currentThread().getContextClassLoader().getResourceAsStream("SystemCollectionSolrConfig.xml"));
+      assert data != null && data.length > 0;
       cmdExecutor.ensureExists(path, data, CreateMode.PERSISTENT, zk);
     } catch (IOException e) {
       throw new SolrException(ErrorCode.SERVER_ERROR, e);

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/10552099/solr/core/src/test/org/apache/solr/cloud/LeaderElectionTest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/LeaderElectionTest.java b/solr/core/src/test/org/apache/solr/cloud/LeaderElectionTest.java
index 8e1be10..2582872 100644
--- a/solr/core/src/test/org/apache/solr/cloud/LeaderElectionTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/LeaderElectionTest.java
@@ -80,6 +80,8 @@ public class LeaderElectionTest extends SolrTestCaseJ4 {
     zkClient = new SolrZkClient(server.getZkAddress(), TIMEOUT);
     zkStateReader = new ZkStateReader(zkClient);
     seqToThread = Collections.synchronizedMap(new HashMap<Integer,Thread>());
+    zkClient.makePath("/collections/collection1", true);
+    zkClient.makePath("/collections/collection2", true);
   }
   
   class TestLeaderElectionContext extends ShardLeaderElectionContextBase {

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/10552099/solr/core/src/test/org/apache/solr/cloud/OverseerCollectionConfigSetProcessorTest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/OverseerCollectionConfigSetProcessorTest.java b/solr/core/src/test/org/apache/solr/cloud/OverseerCollectionConfigSetProcessorTest.java
index 239afa1..6a7906d 100644
--- a/solr/core/src/test/org/apache/solr/cloud/OverseerCollectionConfigSetProcessorTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/OverseerCollectionConfigSetProcessorTest.java
@@ -44,6 +44,7 @@ import org.apache.solr.handler.component.ShardRequest;
 import org.apache.solr.handler.component.ShardResponse;
 import org.apache.solr.util.TimeOut;
 import org.apache.zookeeper.CreateMode;
+import org.apache.zookeeper.Watcher;
 import org.easymock.Capture;
 import org.easymock.EasyMock;
 import org.junit.After;
@@ -114,7 +115,6 @@ public class OverseerCollectionConfigSetProcessorTest extends SolrTestCaseJ4 {
     zkStateReaderMock = createMock(ZkStateReader.class);
     clusterStateMock = createMock(ClusterState.class);
     solrZkClientMock = createMock(SolrZkClient.class);
-
   }
   
   @AfterClass
@@ -143,9 +143,7 @@ public class OverseerCollectionConfigSetProcessorTest extends SolrTestCaseJ4 {
     reset(zkStateReaderMock);
     reset(clusterStateMock);
     reset(solrZkClientMock);
-    underTest = new OverseerCollectionConfigSetProcessorToBeTested(zkStateReaderMock,
-        "1234", shardHandlerFactoryMock, ADMIN_PATH, workQueueMock, runningMapMock,
-        completedMapMock, failureMapMock);
+
     zkMap.clear();
     collectionsSet.clear();
   }
@@ -157,12 +155,12 @@ public class OverseerCollectionConfigSetProcessorTest extends SolrTestCaseJ4 {
   }
   
   protected Set<String> commonMocks(int liveNodesCount) throws Exception {
-
     shardHandlerFactoryMock.getShardHandler();
     expectLastCall().andAnswer(() -> {
       log.info("SHARDHANDLER");
       return shardHandlerMock;
     }).anyTimes();
+    
     workQueueMock.peekTopN(EasyMock.anyInt(), anyObject(Predicate.class), EasyMock.anyLong());
     expectLastCall().andAnswer(() -> {
       Object result;
@@ -203,12 +201,12 @@ public class OverseerCollectionConfigSetProcessorTest extends SolrTestCaseJ4 {
     
     workQueueMock.poll();
     expectLastCall().andAnswer(() -> queue.poll()).anyTimes();
-
-    zkStateReaderMock.getClusterState();
-    expectLastCall().andAnswer(() -> clusterStateMock).anyTimes();
     
     zkStateReaderMock.getZkClient();
     expectLastCall().andAnswer(() -> solrZkClientMock).anyTimes();
+    
+    zkStateReaderMock.getClusterState();
+    expectLastCall().andAnswer(() -> clusterStateMock).anyTimes();
 
     zkStateReaderMock.updateClusterState();
 
@@ -262,6 +260,18 @@ public class OverseerCollectionConfigSetProcessorTest extends SolrTestCaseJ4 {
       String key = (String) getCurrentArguments()[0];
       return key;
     }).anyTimes();
+    
+    solrZkClientMock.makePath(anyObject(String.class), anyObject(byte[].class), anyObject(CreateMode.class), anyObject(Watcher.class), anyBoolean());
+    expectLastCall().andAnswer(() -> {
+      String key = (String) getCurrentArguments()[0];
+      return key;
+    }).anyTimes();
+    
+    solrZkClientMock.makePath(anyObject(String.class), anyObject(byte[].class), anyObject(CreateMode.class), anyObject(Watcher.class), anyBoolean(), anyBoolean(), anyInt());
+    expectLastCall().andAnswer(() -> {
+      String key = (String) getCurrentArguments()[0];
+      return key;
+    }).anyTimes();
 
     solrZkClientMock.exists(anyObject(String.class),anyBoolean());
     expectLastCall().andAnswer(() -> {
@@ -518,12 +528,17 @@ public class OverseerCollectionConfigSetProcessorTest extends SolrTestCaseJ4 {
           replicationFactor);
     }
     
-    replay(workQueueMock);
     replay(solrZkClientMock);
     replay(zkStateReaderMock);
+    replay(workQueueMock);
     replay(clusterStateMock);
     replay(shardHandlerFactoryMock);
     replay(shardHandlerMock);
+    
+    
+    underTest = new OverseerCollectionConfigSetProcessorToBeTested(zkStateReaderMock,
+        "1234", shardHandlerFactoryMock, ADMIN_PATH, workQueueMock, runningMapMock,
+        completedMapMock, failureMapMock);
 
 
     log.info("clusterstate " + clusterStateMock.hashCode());

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/10552099/solr/core/src/test/org/apache/solr/cloud/ZkSolrClientTest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/ZkSolrClientTest.java b/solr/core/src/test/org/apache/solr/cloud/ZkSolrClientTest.java
index 39ef1b8..faa2ba7 100644
--- a/solr/core/src/test/org/apache/solr/cloud/ZkSolrClientTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/ZkSolrClientTest.java
@@ -26,6 +26,7 @@ import org.apache.solr.common.cloud.SolrZkClient;
 import org.apache.solr.common.cloud.ZkCmdExecutor;
 import org.apache.solr.common.cloud.ZkOperation;
 import org.apache.solr.util.AbstractSolrTestCase;
+import org.apache.zookeeper.CreateMode;
 import org.apache.zookeeper.KeeperException;
 import org.apache.zookeeper.WatchedEvent;
 import org.apache.zookeeper.Watcher;
@@ -304,6 +305,59 @@ public class ZkSolrClientTest extends AbstractSolrTestCase {
 
     }
   }
+  
+  public void testSkipPathPartsOnMakePath() throws Exception {
+    try (ZkConnection conn = new ZkConnection()) {
+      final SolrZkClient zkClient = conn.getClient();
+
+      zkClient.makePath("/test", true);
+
+      // should work
+      zkClient.makePath("/test/path/here", (byte[]) null, CreateMode.PERSISTENT, (Watcher) null, true, true, 1);
+
+      zkClient.clean("/");
+
+      // should not work
+      try {
+        zkClient.makePath("/test/path/here", (byte[]) null, CreateMode.PERSISTENT, (Watcher) null, true, true, 1);
+        fail("We should not be able to create this path");
+      } catch (Exception e) {
+
+      }
+
+      zkClient.clean("/");
+
+      ZkCmdExecutor zkCmdExecutor = new ZkCmdExecutor(30000);
+      try {
+        zkCmdExecutor.ensureExists("/collection/collection/leader", (byte[]) null, CreateMode.PERSISTENT, zkClient, 2);
+        fail("We should not be able to create this path");
+      } catch (Exception e) {
+
+      }
+
+      zkClient.makePath("/collection", true);
+
+      try {
+        zkCmdExecutor.ensureExists("/collections/collection/leader", (byte[]) null, CreateMode.PERSISTENT, zkClient, 2);
+        fail("We should not be able to create this path");
+      } catch (Exception e) {
+
+      }
+      zkClient.makePath("/collection/collection", true);
+ 
+      byte[] bytes = new byte[10];
+      zkCmdExecutor.ensureExists("/collection/collection", bytes, CreateMode.PERSISTENT, zkClient, 2);
+      
+      byte[] returnedBytes = zkClient.getData("/collection/collection", null, null, true);
+      
+      assertNull("We skipped 2 path parts, so data won't be written", returnedBytes);
+
+      zkClient.makePath("/collection/collection/leader", true);
+
+      zkCmdExecutor.ensureExists("/collection/collection/leader", (byte[]) null, CreateMode.PERSISTENT, zkClient, 2);
+
+    }
+  }
 
   @Override
   public void tearDown() throws Exception {

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/10552099/solr/solrj/src/java/org/apache/solr/common/cloud/SolrZkClient.java
----------------------------------------------------------------------
diff --git a/solr/solrj/src/java/org/apache/solr/common/cloud/SolrZkClient.java b/solr/solrj/src/java/org/apache/solr/common/cloud/SolrZkClient.java
index 422d9e5..3f8deea 100644
--- a/solr/solrj/src/java/org/apache/solr/common/cloud/SolrZkClient.java
+++ b/solr/solrj/src/java/org/apache/solr/common/cloud/SolrZkClient.java
@@ -411,13 +411,13 @@ public class SolrZkClient implements Closeable {
 
   public void makePath(String path, boolean failOnExists, boolean retryOnConnLoss) throws KeeperException,
       InterruptedException {
-    makePath(path, null, CreateMode.PERSISTENT, null, failOnExists, retryOnConnLoss);
+    makePath(path, null, CreateMode.PERSISTENT, null, failOnExists, retryOnConnLoss, 0);
   }
 
   public void makePath(String path, File file, boolean failOnExists, boolean retryOnConnLoss)
       throws IOException, KeeperException, InterruptedException {
     makePath(path, FileUtils.readFileToByteArray(file),
-        CreateMode.PERSISTENT, null, failOnExists, retryOnConnLoss);
+        CreateMode.PERSISTENT, null, failOnExists, retryOnConnLoss, 0);
   }
 
   public void makePath(String path, File file, boolean retryOnConnLoss) throws IOException,
@@ -463,21 +463,35 @@ public class SolrZkClient implements Closeable {
    */
   public void makePath(String path, byte[] data, CreateMode createMode,
       Watcher watcher, boolean retryOnConnLoss) throws KeeperException, InterruptedException {
-    makePath(path, data, createMode, watcher, true, retryOnConnLoss);
+    makePath(path, data, createMode, watcher, true, retryOnConnLoss, 0);
+  }
+  
+  /**
+   * Creates the path in ZooKeeper, creating each node as necessary.
+   *
+   * e.g. If <code>path=/solr/group/node</code> and none of the nodes, solr,
+   * group, node exist, each will be created.
+   *
+   * @param data to set on the last zkNode
+   */
+  public void makePath(String path, byte[] data, CreateMode createMode,
+      Watcher watcher, boolean failOnExists, boolean retryOnConnLoss) throws KeeperException, InterruptedException {
+    makePath(path, data, createMode, watcher, failOnExists, retryOnConnLoss, 0);
   }
-
 
   /**
    * Creates the path in ZooKeeper, creating each node as necessary.
    *
    * e.g. If <code>path=/solr/group/node</code> and none of the nodes, solr,
    * group, node exist, each will be created.
+   * 
+   * skipPathParts will force the call to fail if the first skipPathParts do not exist already.
    *
    * Note: retryOnConnLoss is only respected for the final node - nodes
    * before that are always retried on connection loss.
    */
   public void makePath(String path, byte[] data, CreateMode createMode,
-      Watcher watcher, boolean failOnExists, boolean retryOnConnLoss) throws KeeperException, InterruptedException {
+      Watcher watcher, boolean failOnExists, boolean retryOnConnLoss, int skipPathParts) throws KeeperException, InterruptedException {
     log.debug("makePath: {}", path);
     boolean retry = true;
 
@@ -487,9 +501,12 @@ public class SolrZkClient implements Closeable {
     String[] paths = path.split("/");
     StringBuilder sbPath = new StringBuilder();
     for (int i = 0; i < paths.length; i++) {
-      byte[] bytes = null;
       String pathPiece = paths[i];
       sbPath.append("/" + pathPiece);
+      if (i < skipPathParts) {
+        continue;
+      }
+      byte[] bytes = null;
       final String currentPath = sbPath.toString();
       Object exists = exists(currentPath, watcher, retryOnConnLoss);
       if (exists == null || ((i == paths.length -1) && failOnExists)) {

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/10552099/solr/solrj/src/java/org/apache/solr/common/cloud/ZkCmdExecutor.java
----------------------------------------------------------------------
diff --git a/solr/solrj/src/java/org/apache/solr/common/cloud/ZkCmdExecutor.java b/solr/solrj/src/java/org/apache/solr/common/cloud/ZkCmdExecutor.java
index 0f50f0a..c27f767 100644
--- a/solr/solrj/src/java/org/apache/solr/common/cloud/ZkCmdExecutor.java
+++ b/solr/solrj/src/java/org/apache/solr/common/cloud/ZkCmdExecutor.java
@@ -75,17 +75,26 @@ public class ZkCmdExecutor {
   }
   
   public void ensureExists(String path, final SolrZkClient zkClient) throws KeeperException, InterruptedException {
-    ensureExists(path, null, CreateMode.PERSISTENT, zkClient);
+    ensureExists(path, null, CreateMode.PERSISTENT, zkClient, 0);
+  }
+  
+  
+  public void ensureExists(String path, final byte[] data, final SolrZkClient zkClient) throws KeeperException, InterruptedException {
+    ensureExists(path, data, CreateMode.PERSISTENT, zkClient, 0);
+  }
+  
+  public void ensureExists(String path, final byte[] data, CreateMode createMode, final SolrZkClient zkClient) throws KeeperException, InterruptedException {
+    ensureExists(path, data, createMode, zkClient, 0);
   }
   
   public void ensureExists(final String path, final byte[] data,
-      CreateMode createMode, final SolrZkClient zkClient) throws KeeperException, InterruptedException {
+      CreateMode createMode, final SolrZkClient zkClient, int skipPathParts) throws KeeperException, InterruptedException {
     
     if (zkClient.exists(path, true)) {
       return;
     }
     try {
-      zkClient.makePath(path, data, createMode, true);
+      zkClient.makePath(path, data, createMode, null, true, true, skipPathParts);
     } catch (NodeExistsException e) {
       // it's okay if another beats us creating the node
     }

[48/50] [abbrv] lucene-solr:jira/solr-8593: LUCENE-7572: Cache the hash code of doc values queries.

Posted by kr...@apache.org.

LUCENE-7572: Cache the hash code of doc values queries.


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/ea1569e2
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/ea1569e2
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/ea1569e2

Branch: refs/heads/jira/solr-8593
Commit: ea1569e2914f9ba914b582a0801d6cb83a29529b
Parents: 268d4ac
Author: Adrien Grand <jp...@gmail.com>
Authored: Thu Dec 15 16:30:15 2016 +0100
Committer: Adrien Grand <jp...@gmail.com>
Committed: Thu Dec 15 17:17:54 2016 +0100

----------------------------------------------------------------------
 lucene/CHANGES.txt                              |   2 +
 .../apache/lucene/index/PrefixCodedTerms.java   |   4 +-
 .../lucene/search/DocValuesNumbersQuery.java    |  26 ++--
 .../lucene/search/DocValuesTermsQuery.java      |  49 ++++--
 .../org/apache/lucene/search/LongHashSet.java   | 156 +++++++++++++++++++
 .../apache/lucene/search/LongHashSetTests.java  | 100 ++++++++++++
 .../lucene/search/TestDocValuesTermsQuery.java  |   1 +
 7 files changed, 310 insertions(+), 28 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ea1569e2/lucene/CHANGES.txt
----------------------------------------------------------------------
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 0e327d2..bacc270 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -158,6 +158,8 @@ Optimizations
   writing to disk, giving a small speedup in points-heavy use cases.
   (Mike McCandless)
 
+* LUCENE-7572: Doc values queries now cache their hash code. (Adrien Grand)
+
 Other
 
 * LUCENE-7546: Fixed references to benchmark wikipedia data and the Jenkins line-docs file

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ea1569e2/lucene/core/src/java/org/apache/lucene/index/PrefixCodedTerms.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/PrefixCodedTerms.java b/lucene/core/src/java/org/apache/lucene/index/PrefixCodedTerms.java
index 3dca3db..df1653b 100644
--- a/lucene/core/src/java/org/apache/lucene/index/PrefixCodedTerms.java
+++ b/lucene/core/src/java/org/apache/lucene/index/PrefixCodedTerms.java
@@ -28,7 +28,9 @@ import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.BytesRefBuilder;
 
 /**
- * Prefix codes term instances (prefixes are shared)
+ * Prefix codes term instances (prefixes are shared). This is expected to be
+ * faster to build than a FST and might also be more compact if there are no
+ * common suffixes.
  * @lucene.internal
  */
 public class PrefixCodedTerms implements Accountable {

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ea1569e2/lucene/sandbox/src/java/org/apache/lucene/search/DocValuesNumbersQuery.java
----------------------------------------------------------------------
diff --git a/lucene/sandbox/src/java/org/apache/lucene/search/DocValuesNumbersQuery.java b/lucene/sandbox/src/java/org/apache/lucene/search/DocValuesNumbersQuery.java
index 0fd2244..7725703 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/search/DocValuesNumbersQuery.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/search/DocValuesNumbersQuery.java
@@ -18,6 +18,7 @@ package org.apache.lucene.search;
 
 import java.io.IOException;
 import java.util.Arrays;
+import java.util.Collection;
 import java.util.HashSet;
 import java.util.Objects;
 import java.util.Set;
@@ -45,11 +46,16 @@ import org.apache.lucene.index.SortedNumericDocValues;
 public class DocValuesNumbersQuery extends Query {
 
   private final String field;
-  private final Set<Long> numbers;
+  private final LongHashSet numbers;
 
-  public DocValuesNumbersQuery(String field, Set<Long> numbers) {
+  public DocValuesNumbersQuery(String field, long[] numbers) {
     this.field = Objects.requireNonNull(field);
-    this.numbers = Objects.requireNonNull(numbers, "Set of numbers must not be null");
+    this.numbers = new LongHashSet(numbers);
+  }
+
+  public DocValuesNumbersQuery(String field, Collection<Long> numbers) {
+    this.field = Objects.requireNonNull(field);
+    this.numbers = new LongHashSet(numbers.stream().mapToLong(Long::longValue).toArray());
   }
 
   public DocValuesNumbersQuery(String field, Long... numbers) {
@@ -82,15 +88,11 @@ public class DocValuesNumbersQuery extends Query {
 
   @Override
   public String toString(String defaultField) {
-    StringBuilder sb = new StringBuilder();
-    sb.append(field).append(": [");
-    for (Long number : numbers) {
-      sb.append(number).append(", ");
-    }
-    if (numbers.size() > 0) {
-      sb.setLength(sb.length() - 2);
-    }
-    return sb.append(']').toString();
+    return new StringBuilder()
+        .append(field)
+        .append(": ")
+        .append(numbers.toString())
+        .toString();
   }
 
   @Override

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ea1569e2/lucene/sandbox/src/java/org/apache/lucene/search/DocValuesTermsQuery.java
----------------------------------------------------------------------
diff --git a/lucene/sandbox/src/java/org/apache/lucene/search/DocValuesTermsQuery.java b/lucene/sandbox/src/java/org/apache/lucene/search/DocValuesTermsQuery.java
index 6d852a8..6e30bae 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/search/DocValuesTermsQuery.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/search/DocValuesTermsQuery.java
@@ -25,7 +25,10 @@ import java.util.Objects;
 import org.apache.lucene.index.DocValues;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.index.PrefixCodedTerms;
 import org.apache.lucene.index.SortedSetDocValues;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.index.PrefixCodedTerms.TermIterator;
 import org.apache.lucene.util.ArrayUtil;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.FixedBitSet;
@@ -91,13 +94,24 @@ import org.apache.lucene.util.LongBitSet;
 public class DocValuesTermsQuery extends Query {
 
   private final String field;
-  private final BytesRef[] terms;
+  private final PrefixCodedTerms termData;
+  private final int termDataHashCode; // cached hashcode of termData
 
   public DocValuesTermsQuery(String field, Collection<BytesRef> terms) {
     this.field = Objects.requireNonNull(field);
     Objects.requireNonNull(terms, "Collection of terms must not be null");
-    this.terms = terms.toArray(new BytesRef[terms.size()]);
-    ArrayUtil.timSort(this.terms);
+    BytesRef[] sortedTerms = terms.toArray(new BytesRef[terms.size()]);
+    ArrayUtil.timSort(sortedTerms);
+    PrefixCodedTerms.Builder builder = new PrefixCodedTerms.Builder();
+    BytesRef previous = null;
+    for (BytesRef term : sortedTerms) {
+      if (term.equals(previous) == false) {
+        builder.add(field, term);
+      }
+      previous = term;
+    }
+    termData = builder.finish();
+    termDataHashCode = termData.hashCode();
   }
 
   public DocValuesTermsQuery(String field, BytesRef... terms) {
@@ -124,26 +138,30 @@ public class DocValuesTermsQuery extends Query {
   }
 
   private boolean equalsTo(DocValuesTermsQuery other) {
-    return field.equals(other.field) &&
-           Arrays.equals(terms, other.terms);
+    // termData might be heavy to compare so check the hash code first
+    return termDataHashCode == other.termDataHashCode && 
+           termData.equals(other.termData);
   }
 
   @Override
   public int hashCode() {
-    return 31 * classHash() + Objects.hash(field, Arrays.asList(terms));
+    return 31 * classHash() + termDataHashCode;
   }
 
   @Override
   public String toString(String defaultField) {
-    StringBuilder sb = new StringBuilder();
-    sb.append(field).append(": [");
-    for (BytesRef term : terms) {
-      sb.append(term).append(", ");
-    }
-    if (terms.length > 0) {
-      sb.setLength(sb.length() - 2);
+    StringBuilder builder = new StringBuilder();
+    boolean first = true;
+    TermIterator iterator = termData.iterator();
+    for (BytesRef term = iterator.next(); term != null; term = iterator.next()) {
+      if (!first) {
+        builder.append(' ');
+      }
+      first = false;
+      builder.append(new Term(iterator.field(), term).toString());
     }
-    return sb.append(']').toString();
+
+    return builder.toString();
   }
 
   @Override
@@ -155,7 +173,8 @@ public class DocValuesTermsQuery extends Query {
         final SortedSetDocValues values = DocValues.getSortedSet(context.reader(), field);
         final LongBitSet bits = new LongBitSet(values.getValueCount());
         boolean matchesAtLeastOneTerm = false;
-        for (BytesRef term : terms) {
+        TermIterator iterator = termData.iterator();
+        for (BytesRef term = iterator.next(); term != null; term = iterator.next()) {
           final long ord = values.lookupTerm(term);
           if (ord >= 0) {
             matchesAtLeastOneTerm = true;

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ea1569e2/lucene/sandbox/src/java/org/apache/lucene/search/LongHashSet.java
----------------------------------------------------------------------
diff --git a/lucene/sandbox/src/java/org/apache/lucene/search/LongHashSet.java b/lucene/sandbox/src/java/org/apache/lucene/search/LongHashSet.java
new file mode 100644
index 0000000..3a6af5f
--- /dev/null
+++ b/lucene/sandbox/src/java/org/apache/lucene/search/LongHashSet.java
@@ -0,0 +1,156 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.search;
+
+import java.util.AbstractSet;
+import java.util.Arrays;
+import java.util.Iterator;
+import java.util.NoSuchElementException;
+
+import org.apache.lucene.util.packed.PackedInts;
+
+final class LongHashSet extends AbstractSet<Long> {
+
+  private static final long MISSING = Long.MIN_VALUE;
+
+  final long[] table;
+  final int mask;
+  final boolean hasMissingValue;
+  final int size;
+  final int hashCode;
+
+  LongHashSet(long... values) {
+    int tableSize = Math.toIntExact(values.length * 3L / 2);
+    tableSize = 1 << PackedInts.bitsRequired(tableSize); // make it a power of 2
+    assert tableSize >= values.length * 3L / 2;
+    table = new long[tableSize];
+    Arrays.fill(table, MISSING);
+    mask = tableSize - 1;
+    boolean hasMissingValue = false;
+    int size = 0;
+    int hashCode = 0;
+    for (long value : values) {
+      if (value == MISSING || add(value)) {
+        if (value == MISSING) {
+          hasMissingValue = true;
+        }
+        ++size;
+        hashCode += Long.hashCode(value);
+      }
+    }
+    this.hasMissingValue = hasMissingValue;
+    this.size = size;
+    this.hashCode = hashCode;
+  }
+
+  private boolean add(long l) {
+    assert l != MISSING;
+    final int slot = Long.hashCode(l) & mask;
+    for (int i = slot; ; i = (i + 1) & mask) {
+      if (table[i] == MISSING) {
+        table[i] = l;
+        return true;
+      } else if (table[i] == l) {
+        // already added
+        return false;
+      }
+    }
+  }
+
+  boolean contains(long l) {
+    if (l == MISSING) {
+      return hasMissingValue;
+    }
+    final int slot = Long.hashCode(l) & mask;
+    for (int i = slot; ; i = (i + 1) & mask) {
+      if (table[i] == MISSING) {
+        return false;
+      } else if (table[i] == l) {
+        return true;
+      }
+    }
+  }
+
+  @Override
+  public int size() {
+    return size;
+  }
+
+  @Override
+  public int hashCode() {
+    return hashCode;
+  }
+
+  @Override
+  public boolean equals(Object obj) {
+    if (obj != null && obj.getClass() == LongHashSet.class) {
+      LongHashSet that = (LongHashSet) obj;
+      if (hashCode != that.hashCode
+          || size != that.size
+          || hasMissingValue != that.hasMissingValue) {
+        return false;
+      }
+      for (long v : table) {
+        if (v != MISSING && that.contains(v) == false) {
+          return false;
+        }
+      }
+      return true;
+    }
+    return super.equals(obj);
+  }
+
+  @Override
+  public boolean contains(Object o) {
+    return o instanceof Long && contains(((Long) o).longValue());
+  }
+
+  @Override
+  public Iterator<Long> iterator() {
+    return new Iterator<Long>() {
+
+      private boolean hasNext = hasMissingValue;
+      private int i = -1;
+      private long value = MISSING;
+
+      @Override
+      public boolean hasNext() {
+        if (hasNext) {
+          return true;
+        }
+        while (++i < table.length) {
+          value = table[i];
+          if (value != MISSING) {
+            return hasNext = true;
+          }
+        }
+        return false;
+      }
+
+      @Override
+      public Long next() {
+        if (hasNext() == false) {
+          throw new NoSuchElementException();
+        }
+        hasNext = false;
+        return value;
+      }
+
+    };
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ea1569e2/lucene/sandbox/src/test/org/apache/lucene/search/LongHashSetTests.java
----------------------------------------------------------------------
diff --git a/lucene/sandbox/src/test/org/apache/lucene/search/LongHashSetTests.java b/lucene/sandbox/src/test/org/apache/lucene/search/LongHashSetTests.java
new file mode 100644
index 0000000..25d94a6
--- /dev/null
+++ b/lucene/sandbox/src/test/org/apache/lucene/search/LongHashSetTests.java
@@ -0,0 +1,100 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.search;
+
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.Set;
+import java.util.stream.Collectors;
+import java.util.stream.LongStream;
+
+import org.apache.lucene.util.LuceneTestCase;
+
+public class LongHashSetTests extends LuceneTestCase {
+
+  private void assertEquals(Set<Long> set1, LongHashSet set2) {
+    LuceneTestCase.assertEquals(set1, set2);
+    LuceneTestCase.assertEquals(set2, set1);
+    LuceneTestCase.assertEquals(set2, set2);
+    assertEquals(set1.hashCode(), set2.hashCode());
+
+    if (set1.isEmpty() == false) {
+      Set<Long> set3 = new HashSet<>(set1);
+      long removed = set3.iterator().next();
+      while (true) {
+        long next = random().nextLong();
+        if (next != removed && set3.add(next)) {
+          break;
+        }
+      }
+      assertNotEquals(set3, set2);
+    }
+  }
+
+  private void assertNotEquals(Set<Long> set1, LongHashSet set2) {
+    assertFalse(set1.equals(set2));
+    assertFalse(set2.equals(set1));
+    LongHashSet set3 = new LongHashSet(set1.stream().mapToLong(Long::longValue).toArray());
+    assertFalse(set2.equals(set3));
+  }
+
+  public void testEmpty() {
+    Set<Long> set1 = new HashSet<>();
+    LongHashSet set2 = new LongHashSet();
+    assertEquals(set1, set2);
+  }
+
+  public void testOneValue() {
+    Set<Long> set1 = new HashSet<>(Arrays.asList(42L));
+    LongHashSet set2 = new LongHashSet(42);
+    assertEquals(set1, set2);
+
+    set1 = new HashSet<>(Arrays.asList(Long.MIN_VALUE));
+    set2 = new LongHashSet(Long.MIN_VALUE);
+    assertEquals(set1, set2);
+  }
+
+  public void testTwoValues() {
+    Set<Long> set1 = new HashSet<>(Arrays.asList(42L, Long.MAX_VALUE));
+    LongHashSet set2 = new LongHashSet(42, Long.MAX_VALUE);
+    assertEquals(set1, set2);
+
+    set1 = new HashSet<>(Arrays.asList(Long.MIN_VALUE, 42L));
+    set2 = new LongHashSet(Long.MIN_VALUE, 42L);
+    assertEquals(set1, set2);
+  }
+
+  public void testRandom() {
+    final int iters = atLeast(10);
+    for (int iter = 0; iter < iters; ++iter) {
+      long[] values = new long[random().nextInt(1 << random().nextInt(16))];
+      for (int i = 0; i < values.length; ++i) {
+        if (i == 0 || random().nextInt(10) < 9) {
+          values[i] = random().nextLong();
+        } else {
+          values[i] = values[random().nextInt(i)];
+        }
+      }
+      if (values.length > 0 && random().nextBoolean()) {
+        values[values.length/2] = Long.MIN_VALUE;
+      }
+      Set<Long> set1 = LongStream.of(values).mapToObj(Long::valueOf).collect(Collectors.toCollection(HashSet::new));
+      LongHashSet set2 = new LongHashSet(values);
+      assertEquals(set1, set2);
+    }
+  }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ea1569e2/lucene/sandbox/src/test/org/apache/lucene/search/TestDocValuesTermsQuery.java
----------------------------------------------------------------------
diff --git a/lucene/sandbox/src/test/org/apache/lucene/search/TestDocValuesTermsQuery.java b/lucene/sandbox/src/test/org/apache/lucene/search/TestDocValuesTermsQuery.java
index 6e99492..187f172 100644
--- a/lucene/sandbox/src/test/org/apache/lucene/search/TestDocValuesTermsQuery.java
+++ b/lucene/sandbox/src/test/org/apache/lucene/search/TestDocValuesTermsQuery.java
@@ -38,6 +38,7 @@ public class TestDocValuesTermsQuery extends LuceneTestCase {
 
   public void testEquals() {
     assertEquals(new DocValuesTermsQuery("foo", "bar"), new DocValuesTermsQuery("foo", "bar"));
+    assertEquals(new DocValuesTermsQuery("foo", "bar"), new DocValuesTermsQuery("foo", "bar", "bar"));
     assertEquals(new DocValuesTermsQuery("foo", "bar", "baz"), new DocValuesTermsQuery("foo", "baz", "bar"));
     assertFalse(new DocValuesTermsQuery("foo", "bar").equals(new DocValuesTermsQuery("foo2", "bar")));
     assertFalse(new DocValuesTermsQuery("foo", "bar").equals(new DocValuesTermsQuery("foo", "baz")));

[28/50] [abbrv] lucene-solr:jira/solr-8593: don't create unnecessary lambda

Posted by kr...@apache.org.

don't create unnecessary lambda


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/7cffae3a
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/7cffae3a
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/7cffae3a

Branch: refs/heads/jira/solr-8593
Commit: 7cffae3a16f7d0c94a79a273a702c0013af7f5ac
Parents: 4efbde4
Author: Mike McCandless <mi...@apache.org>
Authored: Fri Dec 9 18:35:13 2016 -0500
Committer: Mike McCandless <mi...@apache.org>
Committed: Fri Dec 9 18:35:13 2016 -0500

----------------------------------------------------------------------
 .../core/src/java/org/apache/lucene/index/IndexWriterConfig.java   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/7cffae3a/lucene/core/src/java/org/apache/lucene/index/IndexWriterConfig.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/IndexWriterConfig.java b/lucene/core/src/java/org/apache/lucene/index/IndexWriterConfig.java
index ce4f0a8..1e1e795 100644
--- a/lucene/core/src/java/org/apache/lucene/index/IndexWriterConfig.java
+++ b/lucene/core/src/java/org/apache/lucene/index/IndexWriterConfig.java
@@ -476,7 +476,7 @@ public final class IndexWriterConfig extends LiveIndexWriterConfig {
       }
     }
     this.indexSort = sort;
-    this.indexSortFields = Arrays.stream(sort.getSort()).map((s) -> s.getField()).collect(Collectors.toSet());
+    this.indexSortFields = Arrays.stream(sort.getSort()).map(SortField::getField).collect(Collectors.toSet());
     return this;
   }

[42/50] [abbrv] lucene-solr:jira/solr-8593: SOLR-9846: OverseerAutoReplicaFailoverThread can take too long to stop and leak out of unit tests.

Posted by kr...@apache.org.

SOLR-9846: OverseerAutoReplicaFailoverThread can take too long to stop and leak out of unit tests.


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/7dec783b
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/7dec783b
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/7dec783b

Branch: refs/heads/jira/solr-8593
Commit: 7dec783b287ab554cc781622b4d6127e553fd2ae
Parents: e82399d
Author: markrmiller <ma...@apache.org>
Authored: Sun Dec 11 22:02:48 2016 -0500
Committer: markrmiller <ma...@apache.org>
Committed: Wed Dec 14 12:57:21 2016 -0500

----------------------------------------------------------------------
 solr/CHANGES.txt                                            | 2 ++
 .../solr/cloud/OverseerAutoReplicaFailoverThread.java       | 9 +++++++--
 2 files changed, 9 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/7dec783b/solr/CHANGES.txt
----------------------------------------------------------------------
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 946a04e..5f0357b 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -288,6 +288,8 @@ Other Changes
   response (instead of a SolrException) and includes the remote error message as part of the exception message
   (Tom�s Fern�ndez L�bbe)
 
+* SOLR-9846: OverseerAutoReplicaFailoverThread can take too long to stop and leak out of unit tests. (Mark Miller)
+
 ==================  6.3.0 ==================
 
 Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this release.

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/7dec783b/solr/core/src/java/org/apache/solr/cloud/OverseerAutoReplicaFailoverThread.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/cloud/OverseerAutoReplicaFailoverThread.java b/solr/core/src/java/org/apache/solr/cloud/OverseerAutoReplicaFailoverThread.java
index 83679a5..10b4bf3 100644
--- a/solr/core/src/java/org/apache/solr/cloud/OverseerAutoReplicaFailoverThread.java
+++ b/solr/core/src/java/org/apache/solr/cloud/OverseerAutoReplicaFailoverThread.java
@@ -89,6 +89,8 @@ public class OverseerAutoReplicaFailoverThread implements Runnable, Closeable {
 
   private final int workLoopDelay;
   private final int waitAfterExpiration;
+
+  private volatile Thread thread;
   
   public OverseerAutoReplicaFailoverThread(CloudConfig config, ZkStateReader zkStateReader,
       UpdateShardHandler updateShardHandler) {
@@ -118,7 +120,7 @@ public class OverseerAutoReplicaFailoverThread implements Runnable, Closeable {
   
   @Override
   public void run() {
-    
+    this.thread = Thread.currentThread();
     while (!this.isClosed) {
       // work loop
       log.debug("do " + this.getClass().getSimpleName() + " work loop");
@@ -136,7 +138,6 @@ public class OverseerAutoReplicaFailoverThread implements Runnable, Closeable {
         try {
           Thread.sleep(workLoopDelay);
         } catch (InterruptedException e) {
-          Thread.currentThread().interrupt();
           return;
         }
       }
@@ -480,6 +481,10 @@ public class OverseerAutoReplicaFailoverThread implements Runnable, Closeable {
   @Override
   public void close() {
     isClosed = true;
+    Thread lThread = thread;
+    if (lThread != null) {
+      lThread.interrupt();
+    }
   }
   
   public boolean isClosed() {

[10/50] [abbrv] lucene-solr:jira/solr-8593: LUCENE-7563: remove redundant array copy in PackedIndexTree.clone

Posted by kr...@apache.org.

LUCENE-7563: remove redundant array copy in PackedIndexTree.clone


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/bd8b1915
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/bd8b1915
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/bd8b1915

Branch: refs/heads/jira/solr-8593
Commit: bd8b191505d92c89a483a6189497374238476a00
Parents: 5e8db2e
Author: Mike McCandless <mi...@apache.org>
Authored: Mon Dec 5 06:45:16 2016 -0500
Committer: Mike McCandless <mi...@apache.org>
Committed: Mon Dec 5 06:45:16 2016 -0500

----------------------------------------------------------------------
 lucene/core/src/java/org/apache/lucene/util/bkd/BKDReader.java | 1 -
 1 file changed, 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/bd8b1915/lucene/core/src/java/org/apache/lucene/util/bkd/BKDReader.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDReader.java b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDReader.java
index 6cccf4c..44744c1 100644
--- a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDReader.java
+++ b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDReader.java
@@ -347,7 +347,6 @@ public final class BKDReader extends PointValues implements Accountable {
       index.nodeID = nodeID;
       index.level = level;
       index.splitDim = splitDim;
-      System.arraycopy(negativeDeltas, level*numDims, index.negativeDeltas, level*numDims, numDims);
       index.leafBlockFPStack[level] = leafBlockFPStack[level];
       index.leftNodePositions[level] = leftNodePositions[level];
       index.rightNodePositions[level] = rightNodePositions[level];

[50/50] [abbrv] lucene-solr:jira/solr-8593: Merge branch 'apache-https-master' into jira/solr-8593

Posted by kr...@apache.org.

Merge branch 'apache-https-master' into jira/solr-8593


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/6c0cafed
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/6c0cafed
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/6c0cafed

Branch: refs/heads/jira/solr-8593
Commit: 6c0cafedac053cce81811f4e38f0eb8d756069c8
Parents: 37fdc37 3b182aa
Author: Kevin Risden <kr...@apache.org>
Authored: Thu Dec 15 15:34:57 2016 -0600
Committer: Kevin Risden <kr...@apache.org>
Committed: Thu Dec 15 15:34:57 2016 -0600

----------------------------------------------------------------------
 .gitignore                                      |    1 +
 dev-tools/scripts/smokeTestRelease.py           |    1 -
 lucene/CHANGES.txt                              |   42 +
 .../classification/utils/DatasetSplitter.java   |    3 +-
 .../codecs/memory/DirectPostingsFormat.java     |    3 +
 .../lucene/codecs/memory/FSTOrdTermsReader.java |    3 +
 .../lucene/codecs/memory/FSTTermsReader.java    |    3 +
 .../codecs/simpletext/SimpleTextBKDReader.java  |  281 ++-
 .../codecs/simpletext/SimpleTextBKDWriter.java  | 1661 ++++++++++++++++++
 .../simpletext/SimpleTextPointsReader.java      |    5 +-
 .../simpletext/SimpleTextPointsWriter.java      |  190 +-
 .../lucene/codecs/blocktree/FieldReader.java    |    3 +
 .../CompressingStoredFieldsWriter.java          |   19 +-
 .../CompressingTermVectorsWriter.java           |   11 +-
 .../GrowableByteArrayDataOutput.java            |   83 -
 .../codecs/lucene60/Lucene60PointsFormat.java   |   10 +-
 .../lucene/codecs/lucene60/package-info.java    |    4 +-
 .../lucene/codecs/lucene62/package-info.java    |    4 +-
 .../lucene70/Lucene70DocValuesConsumer.java     |  163 +-
 .../lucene70/Lucene70DocValuesFormat.java       |    5 +-
 .../lucene70/Lucene70DocValuesProducer.java     |  220 ++-
 .../lucene/codecs/lucene70/package-info.java    |   15 +-
 .../org/apache/lucene/index/CheckIndex.java     |  312 ++--
 .../org/apache/lucene/index/IndexWriter.java    |   63 +-
 .../apache/lucene/index/IndexWriterConfig.java  |    3 +
 .../org/apache/lucene/index/LeafReader.java     |    2 +-
 .../lucene/index/LiveIndexWriterConfig.java     |   13 +
 .../apache/lucene/index/PrefixCodedTerms.java   |    4 +-
 .../apache/lucene/index/SegmentCoreReaders.java |    8 +
 .../org/apache/lucene/index/SegmentInfos.java   |    7 +-
 .../src/java/org/apache/lucene/index/Terms.java |    8 +-
 .../store/GrowableByteArrayDataOutput.java      |  103 ++
 .../org/apache/lucene/store/MMapDirectory.java  |    4 +-
 .../java/org/apache/lucene/util/LongValues.java |    9 +
 .../org/apache/lucene/util/bkd/BKDReader.java   |  654 ++++---
 .../org/apache/lucene/util/bkd/BKDWriter.java   |  370 +++-
 .../apache/lucene/util/bkd/DocIdsWriter.java    |    4 +-
 .../apache/lucene/util/bkd/HeapPointReader.java |    7 +-
 .../apache/lucene/util/bkd/HeapPointWriter.java |   22 +-
 .../util/bkd/MutablePointsReaderUtils.java      |   21 +-
 .../lucene/util/bkd/OfflinePointReader.java     |    8 +-
 .../lucene/util/bkd/OfflinePointWriter.java     |   10 +-
 .../org/apache/lucene/util/bkd/PointReader.java |   14 +-
 .../org/apache/lucene/util/bkd/PointWriter.java |    6 +-
 .../apache/lucene/util/packed/DirectWriter.java |    8 +-
 .../TestGrowableByteArrayDataOutput.java        |   80 -
 .../lucene70/TestLucene70DocValuesFormat.java   |  152 ++
 .../apache/lucene/index/TestIndexSorting.java   |   30 +-
 .../org/apache/lucene/index/TestTermsEnum.java  |   18 +
 .../index/TestTragicIndexWriterDeadlock.java    |   69 +-
 .../apache/lucene/search/TestPointQueries.java  |    3 +
 .../store/TestGrowableByteArrayDataOutput.java  |   80 +
 .../apache/lucene/util/bkd/Test2BBKDPoints.java |   11 +-
 .../org/apache/lucene/util/bkd/TestBKD.java     |   54 +
 .../org/apache/lucene/util/fst/TestFSTs.java    |    2 +-
 .../uhighlight/MemoryIndexOffsetStrategy.java   |   10 +-
 .../uhighlight/MultiTermHighlighting.java       |   37 +-
 .../lucene/search/uhighlight/PhraseHelper.java  |  158 +-
 .../search/uhighlight/UnifiedHighlighter.java   |   64 +-
 .../uhighlight/TestUnifiedHighlighter.java      |  275 +++
 .../TestUnifiedHighlighterExtensibility.java    |    3 +-
 lucene/ivy-versions.properties                  |    2 +-
 .../apache/lucene/search/DocValuesStats.java    |  202 +++
 .../lucene/search/DocValuesStatsCollector.java  |   64 +
 .../search/TestDocValuesStatsCollector.java     |  212 +++
 .../apache/lucene/document/NearestNeighbor.java |   44 +-
 .../apache/lucene/document/RangeFieldQuery.java |    1 +
 .../lucene/search/DocValuesNumbersQuery.java    |   26 +-
 .../lucene/search/DocValuesTermsQuery.java      |   49 +-
 .../org/apache/lucene/search/LongHashSet.java   |  156 ++
 .../search/BaseRangeFieldQueryTestCase.java     |    2 +-
 .../apache/lucene/search/LongHashSetTests.java  |  100 ++
 .../lucene/search/TestDocValuesTermsQuery.java  |    1 +
 .../org/apache/lucene/search/QueryUtils.java    |    4 -
 solr/CHANGES.txt                                |   48 +
 .../TestPlainTextEntityProcessor.java           |  106 ++
 solr/contrib/ltr/README.md                      |    6 +-
 .../java/org/apache/solr/ltr/FeatureLogger.java |   10 +-
 .../org/apache/solr/ltr/LTRScoringQuery.java    |   22 +-
 .../org/apache/solr/ltr/LTRThreadModule.java    |   29 +
 .../org/apache/solr/ltr/feature/Feature.java    |    3 +-
 .../solr/ltr/feature/FieldValueFeature.java     |   18 +-
 .../solr/ltr/feature/OriginalScoreFeature.java  |   12 +-
 .../apache/solr/ltr/feature/SolrFeature.java    |   17 +-
 .../ltr/store/rest/ManagedFeatureStore.java     |    1 -
 .../solr/ltr/store/rest/ManagedModelStore.java  |   32 +-
 .../test-files/solr/collection1/conf/schema.xml |    2 +
 .../solr/ltr/feature/TestFieldValueFeature.java |   48 +-
 .../ltr/feature/TestOriginalScoreScorer.java    |   47 +
 .../model/TestMultipleAdditiveTreesModel.java   |   44 +-
 .../solr/cloud/CloudConfigSetService.java       |   24 +-
 .../org/apache/solr/cloud/CloudDescriptor.java  |    2 +-
 .../apache/solr/cloud/CreateCollectionCmd.java  |  146 +-
 .../org/apache/solr/cloud/ElectionContext.java  |   20 +-
 .../org/apache/solr/cloud/LeaderElector.java    |    9 +-
 .../OverseerAutoReplicaFailoverThread.java      |    9 +-
 .../org/apache/solr/cloud/ZkController.java     |  128 +-
 .../apache/solr/cloud/ZkSolrResourceLoader.java |    2 +-
 .../org/apache/solr/core/ConfigSetService.java  |    7 +-
 .../org/apache/solr/core/CoreContainer.java     |    2 +-
 .../src/java/org/apache/solr/core/SolrCore.java |    8 -
 .../solr/handler/admin/CollectionsHandler.java  |    2 +
 .../solr/handler/admin/SystemInfoHandler.java   |   51 +-
 .../solr/handler/component/ExpandComponent.java |    6 +
 .../solr/handler/component/QueryComponent.java  |   34 +-
 .../component/QueryElevationComponent.java      |   28 +-
 .../java/org/apache/solr/schema/BoolField.java  |    4 +-
 .../solr/schema/ManagedIndexSchemaFactory.java  |   12 +
 .../org/apache/solr/schema/SchemaManager.java   |    2 +-
 .../apache/solr/search/SolrFieldCacheMBean.java |    4 +-
 .../apache/solr/search/SolrIndexSearcher.java   |   26 +
 .../java/org/apache/solr/search/SortSpec.java   |   10 +
 .../facet/FacetFieldProcessorByArrayDV.java     |   49 +-
 .../org/apache/solr/search/facet/FieldUtil.java |  147 ++
 .../search/grouping/GroupingSpecification.java  |   70 +-
 .../distributed/command/QueryCommand.java       |    2 +-
 .../SearchGroupShardResponseProcessor.java      |    4 +-
 .../TopGroupsShardResponseProcessor.java        |    8 +-
 .../GroupedEndResultTransformer.java            |    4 +-
 .../apache/solr/search/mlt/CloudMLTQParser.java |   19 +-
 .../apache/solr/servlet/SolrDispatchFilter.java |   15 +
 .../apache/solr/servlet/SolrRequestParsers.java |   31 +-
 .../org/apache/solr/uninverting/FieldCache.java |   16 +-
 .../apache/solr/uninverting/FieldCacheImpl.java |  182 +-
 .../uninverting/FieldCacheSanityChecker.java    |    3 +-
 .../solr/uninverting/UninvertingReader.java     |   23 +-
 .../processor/DistributedUpdateProcessor.java   |    8 +-
 .../apache/solr/util/RecordingJSONParser.java   |   17 +-
 .../solr/util/SolrFileCleaningTracker.java      |  147 ++
 .../solr/cloud/DocValuesNotIndexedTest.java     |   64 +-
 .../apache/solr/cloud/LeaderElectionTest.java   |    2 +
 ...verseerCollectionConfigSetProcessorTest.java |   33 +-
 .../cloud/SegmentTerminateEarlyTestState.java   |   12 +-
 .../solr/cloud/TestMiniSolrCloudCluster.java    |   50 -
 .../apache/solr/cloud/TestSegmentSorting.java   |  133 ++
 .../org/apache/solr/cloud/ZkSolrClientTest.java |   54 +
 .../handler/component/TestExpandComponent.java  |   15 +
 .../ManagedSchemaRoundRobinCloudTest.java       |   98 ++
 .../solr/search/TestSolrFieldCacheMBean.java    |    2 +
 solr/licenses/commons-fileupload-1.3.1.jar.sha1 |    1 -
 solr/licenses/commons-fileupload-1.3.2.jar.sha1 |    1 +
 .../solr/client/solrj/impl/CloudSolrClient.java |    4 +
 .../solrj/impl/ConcurrentUpdateSolrClient.java  |   16 +-
 .../java/org/apache/solr/common/PushWriter.java |    2 +-
 .../apache/solr/common/cloud/SolrZkClient.java  |   29 +-
 .../apache/solr/common/cloud/ZkCmdExecutor.java |   15 +-
 .../solr/client/solrj/SolrExampleTests.java     |    7 +-
 .../apache/solr/client/solrj/SolrQueryTest.java |    2 +-
 .../solr/common/util/TestJsonRecordReader.java  |   35 +-
 149 files changed, 6939 insertions(+), 1661 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/6c0cafed/lucene/ivy-versions.properties
----------------------------------------------------------------------

[20/50] [abbrv] lucene-solr:jira/solr-8593: fix java warning in SolrQueryTest

Posted by kr...@apache.org.

fix java warning in SolrQueryTest


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/cacabc9a
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/cacabc9a
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/cacabc9a

Branch: refs/heads/jira/solr-8593
Commit: cacabc9a4edf299f1dd2e5d08cc66845bc52fe98
Parents: bfc3690
Author: Christine Poerschke <cp...@apache.org>
Authored: Wed Dec 7 21:16:42 2016 +0000
Committer: Christine Poerschke <cp...@apache.org>
Committed: Wed Dec 7 21:16:42 2016 +0000

----------------------------------------------------------------------
 .../solrj/src/test/org/apache/solr/client/solrj/SolrQueryTest.java | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/cacabc9a/solr/solrj/src/test/org/apache/solr/client/solrj/SolrQueryTest.java
----------------------------------------------------------------------
diff --git a/solr/solrj/src/test/org/apache/solr/client/solrj/SolrQueryTest.java b/solr/solrj/src/test/org/apache/solr/client/solrj/SolrQueryTest.java
index d27847f..1c86c93 100644
--- a/solr/solrj/src/test/org/apache/solr/client/solrj/SolrQueryTest.java
+++ b/solr/solrj/src/test/org/apache/solr/client/solrj/SolrQueryTest.java
@@ -441,7 +441,7 @@ public class SolrQueryTest extends LuceneTestCase {
     solrQuery.addMoreLikeThisField("mlt3");
     solrQuery.addMoreLikeThisField("mlt4");
     assertEquals(4, solrQuery.getMoreLikeThisFields().length);
-    solrQuery.setMoreLikeThisFields(null);
+    solrQuery.setMoreLikeThisFields((String[])null);
     assertTrue(null == solrQuery.getMoreLikeThisFields());
     assertFalse(solrQuery.getMoreLikeThis());

[09/50] [abbrv] lucene-solr:jira/solr-8593: LUCENE-7563: use a compressed format for the in-heap BKD index

Posted by kr...@apache.org.

LUCENE-7563: use a compressed format for the in-heap BKD index


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/5e8db2e0
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/5e8db2e0
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/5e8db2e0

Branch: refs/heads/jira/solr-8593
Commit: 5e8db2e068f2549b9619d5ac48a50c8032fc292b
Parents: 39c2f3d
Author: Mike McCandless <mi...@apache.org>
Authored: Sun Dec 4 05:18:04 2016 -0500
Committer: Mike McCandless <mi...@apache.org>
Committed: Sun Dec 4 05:18:04 2016 -0500

----------------------------------------------------------------------
 lucene/CHANGES.txt                              |    4 +
 .../codecs/simpletext/SimpleTextBKDReader.java  |  281 ++-
 .../codecs/simpletext/SimpleTextBKDWriter.java  | 1661 ++++++++++++++++++
 .../simpletext/SimpleTextPointsReader.java      |    5 +-
 .../simpletext/SimpleTextPointsWriter.java      |  188 +-
 .../codecs/lucene60/Lucene60PointsFormat.java   |   10 +-
 .../lucene/codecs/lucene60/package-info.java    |    4 +-
 .../lucene/codecs/lucene62/package-info.java    |    4 +-
 .../lucene/codecs/lucene70/package-info.java    |   15 +-
 .../org/apache/lucene/index/CheckIndex.java     |  312 ++--
 .../org/apache/lucene/util/bkd/BKDReader.java   |  655 ++++---
 .../org/apache/lucene/util/bkd/BKDWriter.java   |  293 ++-
 .../apache/lucene/util/bkd/HeapPointReader.java |    7 +-
 .../apache/lucene/util/bkd/HeapPointWriter.java |   22 +-
 .../util/bkd/MutablePointsReaderUtils.java      |   21 +-
 .../lucene/util/bkd/OfflinePointReader.java     |    8 +-
 .../lucene/util/bkd/OfflinePointWriter.java     |   10 +-
 .../org/apache/lucene/util/bkd/PointReader.java |   14 +-
 .../org/apache/lucene/util/bkd/PointWriter.java |    6 +-
 .../apache/lucene/search/TestPointQueries.java  |    3 +
 .../apache/lucene/util/bkd/Test2BBKDPoints.java |   11 +-
 .../org/apache/lucene/util/bkd/TestBKD.java     |   54 +
 .../org/apache/lucene/util/fst/TestFSTs.java    |    2 +-
 .../apache/lucene/document/NearestNeighbor.java |   44 +-
 24 files changed, 2998 insertions(+), 636 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/5e8db2e0/lucene/CHANGES.txt
----------------------------------------------------------------------
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 4afc507..79e44e1 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -126,6 +126,10 @@ Optimizations
 * LUCENE-7568: Optimize merging when index sorting is used but the
   index is already sorted (Jim Ferenczi via Mike McCandless)
 
+* LUCENE-7563: The BKD in-memory index for dimensional points now uses
+  a compressed format, using substantially less RAM in some cases
+  (Adrien Grand, Mike McCandless)
+
 Other
 
 * LUCENE-7546: Fixed references to benchmark wikipedia data and the Jenkins line-docs file

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/5e8db2e0/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextBKDReader.java
----------------------------------------------------------------------
diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextBKDReader.java b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextBKDReader.java
index a2b784a..488547b 100644
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextBKDReader.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextBKDReader.java
@@ -16,13 +16,17 @@
  */
 package org.apache.lucene.codecs.simpletext;
 
-
 import java.io.IOException;
 import java.nio.charset.StandardCharsets;
 
+import org.apache.lucene.codecs.simpletext.SimpleTextUtil;
+import org.apache.lucene.index.CorruptIndexException;
+import org.apache.lucene.index.PointValues;
 import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.util.Accountable;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.BytesRefBuilder;
+import org.apache.lucene.util.RamUsageEstimator;
 import org.apache.lucene.util.StringHelper;
 import org.apache.lucene.util.bkd.BKDReader;
 
@@ -30,15 +34,105 @@ import static org.apache.lucene.codecs.simpletext.SimpleTextPointsWriter.BLOCK_C
 import static org.apache.lucene.codecs.simpletext.SimpleTextPointsWriter.BLOCK_DOC_ID;
 import static org.apache.lucene.codecs.simpletext.SimpleTextPointsWriter.BLOCK_VALUE;
 
-class SimpleTextBKDReader extends BKDReader {
+/** Forked from {@link BKDReader} and simplified/specialized for SimpleText's usage */
+
+final class SimpleTextBKDReader extends PointValues implements Accountable {
+  // Packed array of byte[] holding all split values in the full binary tree:
+  final private byte[] splitPackedValues; 
+  final long[] leafBlockFPs;
+  final private int leafNodeOffset;
+  final int numDims;
+  final int bytesPerDim;
+  final int bytesPerIndexEntry;
+  final IndexInput in;
+  final int maxPointsInLeafNode;
+  final byte[] minPackedValue;
+  final byte[] maxPackedValue;
+  final long pointCount;
+  final int docCount;
+  final int version;
+  protected final int packedBytesLength;
 
-  public SimpleTextBKDReader(IndexInput datIn, int numDims, int maxPointsInLeafNode, int bytesPerDim, long[] leafBlockFPs, byte[] splitPackedValues,
+  public SimpleTextBKDReader(IndexInput in, int numDims, int maxPointsInLeafNode, int bytesPerDim, long[] leafBlockFPs, byte[] splitPackedValues,
                              byte[] minPackedValue, byte[] maxPackedValue, long pointCount, int docCount) throws IOException {
-    super(datIn, numDims, maxPointsInLeafNode, bytesPerDim, leafBlockFPs, splitPackedValues, minPackedValue, maxPackedValue, pointCount, docCount);
+    this.in = in;
+    this.numDims = numDims;
+    this.maxPointsInLeafNode = maxPointsInLeafNode;
+    this.bytesPerDim = bytesPerDim;
+    // no version check here because callers of this API (SimpleText) have no back compat:
+    bytesPerIndexEntry = numDims == 1 ? bytesPerDim : bytesPerDim + 1;
+    packedBytesLength = numDims * bytesPerDim;
+    this.leafNodeOffset = leafBlockFPs.length;
+    this.leafBlockFPs = leafBlockFPs;
+    this.splitPackedValues = splitPackedValues;
+    this.minPackedValue = minPackedValue;
+    this.maxPackedValue = maxPackedValue;
+    this.pointCount = pointCount;
+    this.docCount = docCount;
+    this.version = SimpleTextBKDWriter.VERSION_CURRENT;
+    assert minPackedValue.length == packedBytesLength;
+    assert maxPackedValue.length == packedBytesLength;
   }
 
-  @Override
-  protected void visitDocIDs(IndexInput in, long blockFP, IntersectVisitor visitor) throws IOException {
+  /** Used to track all state for a single call to {@link #intersect}. */
+  public static final class IntersectState {
+    final IndexInput in;
+    final int[] scratchDocIDs;
+    final byte[] scratchPackedValue;
+    final int[] commonPrefixLengths;
+
+    final IntersectVisitor visitor;
+
+    public IntersectState(IndexInput in, int numDims,
+                          int packedBytesLength,
+                          int maxPointsInLeafNode,
+                          IntersectVisitor visitor) {
+      this.in = in;
+      this.visitor = visitor;
+      this.commonPrefixLengths = new int[numDims];
+      this.scratchDocIDs = new int[maxPointsInLeafNode];
+      this.scratchPackedValue = new byte[packedBytesLength];
+    }
+  }
+
+  public void intersect(IntersectVisitor visitor) throws IOException {
+    intersect(getIntersectState(visitor), 1, minPackedValue, maxPackedValue);
+  }
+
+  /** Fast path: this is called when the query box fully encompasses all cells under this node. */
+  private void addAll(IntersectState state, int nodeID) throws IOException {
+    //System.out.println("R: addAll nodeID=" + nodeID);
+
+    if (nodeID >= leafNodeOffset) {
+      //System.out.println("ADDALL");
+      visitDocIDs(state.in, leafBlockFPs[nodeID-leafNodeOffset], state.visitor);
+      // TODO: we can assert that the first value here in fact matches what the index claimed?
+    } else {
+      addAll(state, 2*nodeID);
+      addAll(state, 2*nodeID+1);
+    }
+  }
+
+  /** Create a new {@link IntersectState} */
+  public IntersectState getIntersectState(IntersectVisitor visitor) {
+    return new IntersectState(in.clone(), numDims,
+                              packedBytesLength,
+                              maxPointsInLeafNode,
+                              visitor);
+  }
+
+  /** Visits all docIDs and packed values in a single leaf block */
+  public void visitLeafBlockValues(int nodeID, IntersectState state) throws IOException {
+    int leafID = nodeID - leafNodeOffset;
+
+    // Leaf node; scan and filter all points in this block:
+    int count = readDocIDs(state.in, leafBlockFPs[leafID], state.scratchDocIDs);
+
+    // Again, this time reading values and checking with the visitor
+    visitDocValues(state.commonPrefixLengths, state.scratchPackedValue, state.in, state.scratchDocIDs, count, state.visitor);
+  }
+
+  void visitDocIDs(IndexInput in, long blockFP, IntersectVisitor visitor) throws IOException {
     BytesRefBuilder scratch = new BytesRefBuilder();
     in.seek(blockFP);
     readLine(in, scratch);
@@ -50,8 +144,7 @@ class SimpleTextBKDReader extends BKDReader {
     }
   }
 
-  @Override
-  protected int readDocIDs(IndexInput in, long blockFP, int[] docIDs) throws IOException {
+  int readDocIDs(IndexInput in, long blockFP, int[] docIDs) throws IOException {
     BytesRefBuilder scratch = new BytesRefBuilder();
     in.seek(blockFP);
     readLine(in, scratch);
@@ -63,8 +156,7 @@ class SimpleTextBKDReader extends BKDReader {
     return count;
   }
 
-  @Override
-  protected void visitDocValues(int[] commonPrefixLengths, byte[] scratchPackedValue, IndexInput in, int[] docIDs, int count, IntersectVisitor visitor) throws IOException {
+  void visitDocValues(int[] commonPrefixLengths, byte[] scratchPackedValue, IndexInput in, int[] docIDs, int count, IntersectVisitor visitor) throws IOException {
     visitor.grow(count);
     // NOTE: we don't do prefix coding, so we ignore commonPrefixLengths
     assert scratchPackedValue.length == packedBytesLength;
@@ -79,6 +171,175 @@ class SimpleTextBKDReader extends BKDReader {
     }
   }
 
+  private void visitCompressedDocValues(int[] commonPrefixLengths, byte[] scratchPackedValue, IndexInput in, int[] docIDs, int count, IntersectVisitor visitor, int compressedDim) throws IOException {
+    // the byte at `compressedByteOffset` is compressed using run-length compression,
+    // other suffix bytes are stored verbatim
+    final int compressedByteOffset = compressedDim * bytesPerDim + commonPrefixLengths[compressedDim];
+    commonPrefixLengths[compressedDim]++;
+    int i;
+    for (i = 0; i < count; ) {
+      scratchPackedValue[compressedByteOffset] = in.readByte();
+      final int runLen = Byte.toUnsignedInt(in.readByte());
+      for (int j = 0; j < runLen; ++j) {
+        for(int dim=0;dim<numDims;dim++) {
+          int prefix = commonPrefixLengths[dim];
+          in.readBytes(scratchPackedValue, dim*bytesPerDim + prefix, bytesPerDim - prefix);
+        }
+        visitor.visit(docIDs[i+j], scratchPackedValue);
+      }
+      i += runLen;
+    }
+    if (i != count) {
+      throw new CorruptIndexException("Sub blocks do not add up to the expected count: " + count + " != " + i, in);
+    }
+  }
+
+  private int readCompressedDim(IndexInput in) throws IOException {
+    int compressedDim = in.readByte();
+    if (compressedDim < -1 || compressedDim >= numDims) {
+      throw new CorruptIndexException("Got compressedDim="+compressedDim, in);
+    }
+    return compressedDim;
+  }
+
+  private void readCommonPrefixes(int[] commonPrefixLengths, byte[] scratchPackedValue, IndexInput in) throws IOException {
+    for(int dim=0;dim<numDims;dim++) {
+      int prefix = in.readVInt();
+      commonPrefixLengths[dim] = prefix;
+      if (prefix > 0) {
+        in.readBytes(scratchPackedValue, dim*bytesPerDim, prefix);
+      }
+      //System.out.println("R: " + dim + " of " + numDims + " prefix=" + prefix);
+    }
+  }
+
+  private void intersect(IntersectState state,
+                         int nodeID,
+                         byte[] cellMinPacked, byte[] cellMaxPacked)
+    throws IOException {
+
+    /*
+    System.out.println("\nR: intersect nodeID=" + nodeID);
+    for(int dim=0;dim<numDims;dim++) {
+      System.out.println("  dim=" + dim + "\n    cellMin=" + new BytesRef(cellMinPacked, dim*bytesPerDim, bytesPerDim) + "\n    cellMax=" + new BytesRef(cellMaxPacked, dim*bytesPerDim, bytesPerDim));
+    }
+    */
+
+    Relation r = state.visitor.compare(cellMinPacked, cellMaxPacked);
+
+    if (r == Relation.CELL_OUTSIDE_QUERY) {
+      // This cell is fully outside of the query shape: stop recursing
+      return;
+    } else if (r == Relation.CELL_INSIDE_QUERY) {
+      // This cell is fully inside of the query shape: recursively add all points in this cell without filtering
+      addAll(state, nodeID);
+      return;
+    } else {
+      // The cell crosses the shape boundary, or the cell fully contains the query, so we fall through and do full filtering
+    }
+
+    if (nodeID >= leafNodeOffset) {
+      // TODO: we can assert that the first value here in fact matches what the index claimed?
+
+      int leafID = nodeID - leafNodeOffset;
+      
+      // In the unbalanced case it's possible the left most node only has one child:
+      if (leafID < leafBlockFPs.length) {
+        // Leaf node; scan and filter all points in this block:
+        int count = readDocIDs(state.in, leafBlockFPs[leafID], state.scratchDocIDs);
+
+        // Again, this time reading values and checking with the visitor
+        visitDocValues(state.commonPrefixLengths, state.scratchPackedValue, state.in, state.scratchDocIDs, count, state.visitor);
+      }
+
+    } else {
+      
+      // Non-leaf node: recurse on the split left and right nodes
+
+      int address = nodeID * bytesPerIndexEntry;
+      int splitDim;
+      if (numDims == 1) {
+        splitDim = 0;
+      } else {
+        splitDim = splitPackedValues[address++] & 0xff;
+      }
+      
+      assert splitDim < numDims;
+
+      // TODO: can we alloc & reuse this up front?
+
+      byte[] splitPackedValue = new byte[packedBytesLength];
+
+      // Recurse on left sub-tree:
+      System.arraycopy(cellMaxPacked, 0, splitPackedValue, 0, packedBytesLength);
+      System.arraycopy(splitPackedValues, address, splitPackedValue, splitDim*bytesPerDim, bytesPerDim);
+      intersect(state,
+                2*nodeID,
+                cellMinPacked, splitPackedValue);
+
+      // Recurse on right sub-tree:
+      System.arraycopy(cellMinPacked, 0, splitPackedValue, 0, packedBytesLength);
+      System.arraycopy(splitPackedValues, address, splitPackedValue, splitDim*bytesPerDim, bytesPerDim);
+      intersect(state,
+                2*nodeID+1,
+                splitPackedValue, cellMaxPacked);
+    }
+  }
+
+  /** Copies the split value for this node into the provided byte array */
+  public void copySplitValue(int nodeID, byte[] splitPackedValue) {
+    int address = nodeID * bytesPerIndexEntry;
+    int splitDim;
+    if (numDims == 1) {
+      splitDim = 0;
+    } else {
+      splitDim = splitPackedValues[address++] & 0xff;
+    }
+    
+    assert splitDim < numDims;
+    System.arraycopy(splitPackedValues, address, splitPackedValue, splitDim*bytesPerDim, bytesPerDim);
+  }
+
+  @Override
+  public long ramBytesUsed() {
+    return RamUsageEstimator.sizeOf(splitPackedValues) +
+        RamUsageEstimator.sizeOf(leafBlockFPs);
+  }
+
+  @Override
+  public byte[] getMinPackedValue() {
+    return minPackedValue.clone();
+  }
+
+  @Override
+  public byte[] getMaxPackedValue() {
+    return maxPackedValue.clone();
+  }
+
+  @Override
+  public int getNumDimensions() {
+    return numDims;
+  }
+
+  @Override
+  public int getBytesPerDimension() {
+    return bytesPerDim;
+  }
+
+  @Override
+  public long size() {
+    return pointCount;
+  }
+
+  @Override
+  public int getDocCount() {
+    return docCount;
+  }
+
+  public boolean isLeafNode(int nodeID) {
+    return nodeID >= leafNodeOffset;
+  }
+
   private int parseInt(BytesRefBuilder scratch, BytesRef prefix) {
     assert startsWith(scratch, prefix);
     return Integer.parseInt(stripPrefix(scratch, prefix));

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/5e8db2e0/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextBKDWriter.java
----------------------------------------------------------------------
diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextBKDWriter.java b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextBKDWriter.java
new file mode 100644
index 0000000..d7674ed
--- /dev/null
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextBKDWriter.java
@@ -0,0 +1,1661 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.codecs.simpletext;
+
+import java.io.Closeable;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Comparator;
+import java.util.List;
+import java.util.function.IntFunction;
+
+import org.apache.lucene.codecs.CodecUtil;
+import org.apache.lucene.codecs.MutablePointValues;
+import org.apache.lucene.index.MergeState;
+import org.apache.lucene.index.PointValues.IntersectVisitor;
+import org.apache.lucene.index.PointValues.Relation;
+import org.apache.lucene.store.ChecksumIndexInput;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IOContext;
+import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.store.TrackingDirectoryWrapper;
+import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.BytesRefBuilder;
+import org.apache.lucene.util.BytesRefComparator;
+import org.apache.lucene.util.FixedBitSet;
+import org.apache.lucene.util.IOUtils;
+import org.apache.lucene.util.LongBitSet;
+import org.apache.lucene.util.MSBRadixSorter;
+import org.apache.lucene.util.NumericUtils;
+import org.apache.lucene.util.OfflineSorter;
+import org.apache.lucene.util.PriorityQueue;
+import org.apache.lucene.util.StringHelper;
+import org.apache.lucene.util.bkd.BKDWriter;
+import org.apache.lucene.util.bkd.HeapPointWriter;
+import org.apache.lucene.util.bkd.MutablePointsReaderUtils;
+import org.apache.lucene.util.bkd.OfflinePointReader;
+import org.apache.lucene.util.bkd.OfflinePointWriter;
+import org.apache.lucene.util.bkd.PointReader;
+import org.apache.lucene.util.bkd.PointWriter;
+
+import static org.apache.lucene.codecs.simpletext.SimpleTextPointsWriter.BLOCK_COUNT;
+import static org.apache.lucene.codecs.simpletext.SimpleTextPointsWriter.BLOCK_DOC_ID;
+import static org.apache.lucene.codecs.simpletext.SimpleTextPointsWriter.BLOCK_FP;
+import static org.apache.lucene.codecs.simpletext.SimpleTextPointsWriter.BLOCK_VALUE;
+import static org.apache.lucene.codecs.simpletext.SimpleTextPointsWriter.BYTES_PER_DIM;
+import static org.apache.lucene.codecs.simpletext.SimpleTextPointsWriter.DOC_COUNT;
+import static org.apache.lucene.codecs.simpletext.SimpleTextPointsWriter.INDEX_COUNT;
+import static org.apache.lucene.codecs.simpletext.SimpleTextPointsWriter.MAX_LEAF_POINTS;
+import static org.apache.lucene.codecs.simpletext.SimpleTextPointsWriter.MAX_VALUE;
+import static org.apache.lucene.codecs.simpletext.SimpleTextPointsWriter.MIN_VALUE;
+import static org.apache.lucene.codecs.simpletext.SimpleTextPointsWriter.NUM_DIMS;
+import static org.apache.lucene.codecs.simpletext.SimpleTextPointsWriter.POINT_COUNT;
+import static org.apache.lucene.codecs.simpletext.SimpleTextPointsWriter.SPLIT_COUNT;
+import static org.apache.lucene.codecs.simpletext.SimpleTextPointsWriter.SPLIT_DIM;
+import static org.apache.lucene.codecs.simpletext.SimpleTextPointsWriter.SPLIT_VALUE;
+
+
+// TODO
+//   - allow variable length byte[] (across docs and dims), but this is quite a bit more hairy
+//   - we could also index "auto-prefix terms" here, and use better compression, and maybe only use for the "fully contained" case so we'd
+//     only index docIDs
+//   - the index could be efficiently encoded as an FST, so we don't have wasteful
+//     (monotonic) long[] leafBlockFPs; or we could use MonotonicLongValues ... but then
+//     the index is already plenty small: 60M OSM points --> 1.1 MB with 128 points
+//     per leaf, and you can reduce that by putting more points per leaf
+//   - we could use threads while building; the higher nodes are very parallelizable
+
+/** Forked from {@link BKDWriter} and simplified/specialized for SimpleText's usage */
+
+final class SimpleTextBKDWriter implements Closeable {
+
+  public static final String CODEC_NAME = "BKD";
+  public static final int VERSION_START = 0;
+  public static final int VERSION_COMPRESSED_DOC_IDS = 1;
+  public static final int VERSION_COMPRESSED_VALUES = 2;
+  public static final int VERSION_IMPLICIT_SPLIT_DIM_1D = 3;
+  public static final int VERSION_CURRENT = VERSION_IMPLICIT_SPLIT_DIM_1D;
+
+  /** How many bytes each docs takes in the fixed-width offline format */
+  private final int bytesPerDoc;
+
+  /** Default maximum number of point in each leaf block */
+  public static final int DEFAULT_MAX_POINTS_IN_LEAF_NODE = 1024;
+
+  /** Default maximum heap to use, before spilling to (slower) disk */
+  public static final float DEFAULT_MAX_MB_SORT_IN_HEAP = 16.0f;
+
+  /** Maximum number of dimensions */
+  public static final int MAX_DIMS = 8;
+
+  /** How many dimensions we are indexing */
+  protected final int numDims;
+
+  /** How many bytes each value in each dimension takes. */
+  protected final int bytesPerDim;
+
+  /** numDims * bytesPerDim */
+  protected final int packedBytesLength;
+
+  final BytesRefBuilder scratch = new BytesRefBuilder();
+
+  final TrackingDirectoryWrapper tempDir;
+  final String tempFileNamePrefix;
+  final double maxMBSortInHeap;
+
+  final byte[] scratchDiff;
+  final byte[] scratch1;
+  final byte[] scratch2;
+  final BytesRef scratchBytesRef1 = new BytesRef();
+  final BytesRef scratchBytesRef2 = new BytesRef();
+  final int[] commonPrefixLengths;
+
+  protected final FixedBitSet docsSeen;
+
+  private OfflinePointWriter offlinePointWriter;
+  private HeapPointWriter heapPointWriter;
+
+  private IndexOutput tempInput;
+  protected final int maxPointsInLeafNode;
+  private final int maxPointsSortInHeap;
+
+  /** Minimum per-dim values, packed */
+  protected final byte[] minPackedValue;
+
+  /** Maximum per-dim values, packed */
+  protected final byte[] maxPackedValue;
+
+  protected long pointCount;
+
+  /** true if we have so many values that we must write ords using long (8 bytes) instead of int (4 bytes) */
+  protected final boolean longOrds;
+
+  /** An upper bound on how many points the caller will add (includes deletions) */
+  private final long totalPointCount;
+
+  /** True if every document has at most one value.  We specialize this case by not bothering to store the ord since it's redundant with docID.  */
+  protected final boolean singleValuePerDoc;
+
+  /** How much heap OfflineSorter is allowed to use */
+  protected final OfflineSorter.BufferSize offlineSorterBufferMB;
+
+  /** How much heap OfflineSorter is allowed to use */
+  protected final int offlineSorterMaxTempFiles;
+
+  private final int maxDoc;
+
+  public SimpleTextBKDWriter(int maxDoc, Directory tempDir, String tempFileNamePrefix, int numDims, int bytesPerDim,
+                             int maxPointsInLeafNode, double maxMBSortInHeap, long totalPointCount, boolean singleValuePerDoc) throws IOException {
+    this(maxDoc, tempDir, tempFileNamePrefix, numDims, bytesPerDim, maxPointsInLeafNode, maxMBSortInHeap, totalPointCount, singleValuePerDoc,
+         totalPointCount > Integer.MAX_VALUE, Math.max(1, (long) maxMBSortInHeap), OfflineSorter.MAX_TEMPFILES);
+  }
+
+  private SimpleTextBKDWriter(int maxDoc, Directory tempDir, String tempFileNamePrefix, int numDims, int bytesPerDim,
+                              int maxPointsInLeafNode, double maxMBSortInHeap, long totalPointCount,
+                              boolean singleValuePerDoc, boolean longOrds, long offlineSorterBufferMB, int offlineSorterMaxTempFiles) throws IOException {
+    verifyParams(numDims, maxPointsInLeafNode, maxMBSortInHeap, totalPointCount);
+    // We use tracking dir to deal with removing files on exception, so each place that
+    // creates temp files doesn't need crazy try/finally/sucess logic:
+    this.tempDir = new TrackingDirectoryWrapper(tempDir);
+    this.tempFileNamePrefix = tempFileNamePrefix;
+    this.maxPointsInLeafNode = maxPointsInLeafNode;
+    this.numDims = numDims;
+    this.bytesPerDim = bytesPerDim;
+    this.totalPointCount = totalPointCount;
+    this.maxDoc = maxDoc;
+    this.offlineSorterBufferMB = OfflineSorter.BufferSize.megabytes(offlineSorterBufferMB);
+    this.offlineSorterMaxTempFiles = offlineSorterMaxTempFiles;
+    docsSeen = new FixedBitSet(maxDoc);
+    packedBytesLength = numDims * bytesPerDim;
+
+    scratchDiff = new byte[bytesPerDim];
+    scratch1 = new byte[packedBytesLength];
+    scratch2 = new byte[packedBytesLength];
+    commonPrefixLengths = new int[numDims];
+
+    minPackedValue = new byte[packedBytesLength];
+    maxPackedValue = new byte[packedBytesLength];
+
+    // If we may have more than 1+Integer.MAX_VALUE values, then we must encode ords with long (8 bytes), else we can use int (4 bytes).
+    this.longOrds = longOrds;
+
+    this.singleValuePerDoc = singleValuePerDoc;
+
+    // dimensional values (numDims * bytesPerDim) + ord (int or long) + docID (int)
+    if (singleValuePerDoc) {
+      // Lucene only supports up to 2.1 docs, so we better not need longOrds in this case:
+      assert longOrds == false;
+      bytesPerDoc = packedBytesLength + Integer.BYTES;
+    } else if (longOrds) {
+      bytesPerDoc = packedBytesLength + Long.BYTES + Integer.BYTES;
+    } else {
+      bytesPerDoc = packedBytesLength + Integer.BYTES + Integer.BYTES;
+    }
+
+    // As we recurse, we compute temporary partitions of the data, halving the
+    // number of points at each recursion.  Once there are few enough points,
+    // we can switch to sorting in heap instead of offline (on disk).  At any
+    // time in the recursion, we hold the number of points at that level, plus
+    // all recursive halves (i.e. 16 + 8 + 4 + 2) so the memory usage is 2X
+    // what that level would consume, so we multiply by 0.5 to convert from
+    // bytes to points here.  Each dimension has its own sorted partition, so
+    // we must divide by numDims as wel.
+
+    maxPointsSortInHeap = (int) (0.5 * (maxMBSortInHeap * 1024 * 1024) / (bytesPerDoc * numDims));
+
+    // Finally, we must be able to hold at least the leaf node in heap during build:
+    if (maxPointsSortInHeap < maxPointsInLeafNode) {
+      throw new IllegalArgumentException("maxMBSortInHeap=" + maxMBSortInHeap + " only allows for maxPointsSortInHeap=" + maxPointsSortInHeap + ", but this is less than maxPointsInLeafNode=" + maxPointsInLeafNode + "; either increase maxMBSortInHeap or decrease maxPointsInLeafNode");
+    }
+
+    // We write first maxPointsSortInHeap in heap, then cutover to offline for additional points:
+    heapPointWriter = new HeapPointWriter(16, maxPointsSortInHeap, packedBytesLength, longOrds, singleValuePerDoc);
+
+    this.maxMBSortInHeap = maxMBSortInHeap;
+  }
+
+  public static void verifyParams(int numDims, int maxPointsInLeafNode, double maxMBSortInHeap, long totalPointCount) {
+    // We encode dim in a single byte in the splitPackedValues, but we only expose 4 bits for it now, in case we want to use
+    // remaining 4 bits for another purpose later
+    if (numDims < 1 || numDims > MAX_DIMS) {
+      throw new IllegalArgumentException("numDims must be 1 .. " + MAX_DIMS + " (got: " + numDims + ")");
+    }
+    if (maxPointsInLeafNode <= 0) {
+      throw new IllegalArgumentException("maxPointsInLeafNode must be > 0; got " + maxPointsInLeafNode);
+    }
+    if (maxPointsInLeafNode > ArrayUtil.MAX_ARRAY_LENGTH) {
+      throw new IllegalArgumentException("maxPointsInLeafNode must be <= ArrayUtil.MAX_ARRAY_LENGTH (= " + ArrayUtil.MAX_ARRAY_LENGTH + "); got " + maxPointsInLeafNode);
+    }
+    if (maxMBSortInHeap < 0.0) {
+      throw new IllegalArgumentException("maxMBSortInHeap must be >= 0.0 (got: " + maxMBSortInHeap + ")");
+    }
+    if (totalPointCount < 0) {
+      throw new IllegalArgumentException("totalPointCount must be >=0 (got: " + totalPointCount + ")");
+    }
+  }
+
+  /** If the current segment has too many points then we spill over to temp files / offline sort. */
+  private void spillToOffline() throws IOException {
+
+    // For each .add we just append to this input file, then in .finish we sort this input and resursively build the tree:
+    offlinePointWriter = new OfflinePointWriter(tempDir, tempFileNamePrefix, packedBytesLength, longOrds, "spill", 0, singleValuePerDoc);
+    tempInput = offlinePointWriter.out;
+    PointReader reader = heapPointWriter.getReader(0, pointCount);
+    for(int i=0;i<pointCount;i++) {
+      boolean hasNext = reader.next();
+      assert hasNext;
+      offlinePointWriter.append(reader.packedValue(), i, heapPointWriter.docIDs[i]);
+    }
+
+    heapPointWriter = null;
+  }
+
+  public void add(byte[] packedValue, int docID) throws IOException {
+    if (packedValue.length != packedBytesLength) {
+      throw new IllegalArgumentException("packedValue should be length=" + packedBytesLength + " (got: " + packedValue.length + ")");
+    }
+
+    if (pointCount >= maxPointsSortInHeap) {
+      if (offlinePointWriter == null) {
+        spillToOffline();
+      }
+      offlinePointWriter.append(packedValue, pointCount, docID);
+    } else {
+      // Not too many points added yet, continue using heap:
+      heapPointWriter.append(packedValue, pointCount, docID);
+    }
+
+    // TODO: we could specialize for the 1D case:
+    if (pointCount == 0) {
+      System.arraycopy(packedValue, 0, minPackedValue, 0, packedBytesLength);
+      System.arraycopy(packedValue, 0, maxPackedValue, 0, packedBytesLength);
+    } else {
+      for(int dim=0;dim<numDims;dim++) {
+        int offset = dim*bytesPerDim;
+        if (StringHelper.compare(bytesPerDim, packedValue, offset, minPackedValue, offset) < 0) {
+          System.arraycopy(packedValue, offset, minPackedValue, offset, bytesPerDim);
+        }
+        if (StringHelper.compare(bytesPerDim, packedValue, offset, maxPackedValue, offset) > 0) {
+          System.arraycopy(packedValue, offset, maxPackedValue, offset, bytesPerDim);
+        }
+      }
+    }
+
+    pointCount++;
+    if (pointCount > totalPointCount) {
+      throw new IllegalStateException("totalPointCount=" + totalPointCount + " was passed when we were created, but we just hit " + pointCount + " values");
+    }
+    docsSeen.set(docID);
+  }
+
+  /** How many points have been added so far */
+  public long getPointCount() {
+    return pointCount;
+  }
+
+  private static class MergeReader {
+    final SimpleTextBKDReader bkd;
+    final SimpleTextBKDReader.IntersectState state;
+    final MergeState.DocMap docMap;
+
+    /** Current doc ID */
+    public int docID;
+
+    /** Which doc in this block we are up to */
+    private int docBlockUpto;
+
+    /** How many docs in the current block */
+    private int docsInBlock;
+
+    /** Which leaf block we are up to */
+    private int blockID;
+
+    private final byte[] packedValues;
+
+    public MergeReader(SimpleTextBKDReader bkd, MergeState.DocMap docMap) throws IOException {
+      this.bkd = bkd;
+      state = new SimpleTextBKDReader.IntersectState(bkd.in.clone(),
+                                                     bkd.numDims,
+                                                     bkd.packedBytesLength,
+                                                     bkd.maxPointsInLeafNode,
+                                                     null);
+      this.docMap = docMap;
+      long minFP = Long.MAX_VALUE;
+      //System.out.println("MR.init " + this + " bkdreader=" + bkd + " leafBlockFPs.length=" + bkd.leafBlockFPs.length);
+      for(long fp : bkd.leafBlockFPs) {
+        minFP = Math.min(minFP, fp);
+        //System.out.println("  leaf fp=" + fp);
+      }
+      state.in.seek(minFP);
+      this.packedValues = new byte[bkd.maxPointsInLeafNode * bkd.packedBytesLength];
+    }
+
+    public boolean next() throws IOException {
+      //System.out.println("MR.next this=" + this);
+      while (true) {
+        if (docBlockUpto == docsInBlock) {
+          if (blockID == bkd.leafBlockFPs.length) {
+            //System.out.println("  done!");
+            return false;
+          }
+          //System.out.println("  new block @ fp=" + state.in.getFilePointer());
+          docsInBlock = bkd.readDocIDs(state.in, state.in.getFilePointer(), state.scratchDocIDs);
+          assert docsInBlock > 0;
+          docBlockUpto = 0;
+          bkd.visitDocValues(state.commonPrefixLengths, state.scratchPackedValue, state.in, state.scratchDocIDs, docsInBlock, new IntersectVisitor() {
+            int i = 0;
+
+            @Override
+            public void visit(int docID) throws IOException {
+              throw new UnsupportedOperationException();
+            }
+
+            @Override
+            public void visit(int docID, byte[] packedValue) throws IOException {
+              assert docID == state.scratchDocIDs[i];
+              System.arraycopy(packedValue, 0, packedValues, i * bkd.packedBytesLength, bkd.packedBytesLength);
+              i++;
+            }
+
+            @Override
+            public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
+              throw new UnsupportedOperationException();
+            }
+
+          });
+
+          blockID++;
+        }
+
+        final int index = docBlockUpto++;
+        int oldDocID = state.scratchDocIDs[index];
+
+        int mappedDocID;
+        if (docMap == null) {
+          mappedDocID = oldDocID;
+        } else {
+          mappedDocID = docMap.get(oldDocID);
+        }
+
+        if (mappedDocID != -1) {
+          // Not deleted!
+          docID = mappedDocID;
+          System.arraycopy(packedValues, index * bkd.packedBytesLength, state.scratchPackedValue, 0, bkd.packedBytesLength);
+          return true;
+        }
+      }
+    }
+  }
+
+  private static class BKDMergeQueue extends PriorityQueue<MergeReader> {
+    private final int bytesPerDim;
+
+    public BKDMergeQueue(int bytesPerDim, int maxSize) {
+      super(maxSize);
+      this.bytesPerDim = bytesPerDim;
+    }
+
+    @Override
+    public boolean lessThan(MergeReader a, MergeReader b) {
+      assert a != b;
+
+      int cmp = StringHelper.compare(bytesPerDim, a.state.scratchPackedValue, 0, b.state.scratchPackedValue, 0);
+      if (cmp < 0) {
+        return true;
+      } else if (cmp > 0) {
+        return false;
+      }
+
+      // Tie break by sorting smaller docIDs earlier:
+      return a.docID < b.docID;
+    }
+  }
+
+  /** Write a field from a {@link MutablePointValues}. This way of writing
+   *  points is faster than regular writes with {@link BKDWriter#add} since
+   *  there is opportunity for reordering points before writing them to
+   *  disk. This method does not use transient disk in order to reorder points.
+   */
+  public long writeField(IndexOutput out, String fieldName, MutablePointValues reader) throws IOException {
+    if (numDims == 1) {
+      return writeField1Dim(out, fieldName, reader);
+    } else {
+      return writeFieldNDims(out, fieldName, reader);
+    }
+  }
+
+
+  /* In the 2+D case, we recursively pick the split dimension, compute the
+   * median value and partition other values around it. */
+  private long writeFieldNDims(IndexOutput out, String fieldName, MutablePointValues values) throws IOException {
+    if (pointCount != 0) {
+      throw new IllegalStateException("cannot mix add and writeField");
+    }
+
+    // Catch user silliness:
+    if (heapPointWriter == null && tempInput == null) {
+      throw new IllegalStateException("already finished");
+    }
+
+    // Mark that we already finished:
+    heapPointWriter = null;
+
+    long countPerLeaf = pointCount = values.size();
+    long innerNodeCount = 1;
+
+    while (countPerLeaf > maxPointsInLeafNode) {
+      countPerLeaf = (countPerLeaf+1)/2;
+      innerNodeCount *= 2;
+    }
+
+    int numLeaves = Math.toIntExact(innerNodeCount);
+
+    checkMaxLeafNodeCount(numLeaves);
+
+    final byte[] splitPackedValues = new byte[numLeaves * (bytesPerDim + 1)];
+    final long[] leafBlockFPs = new long[numLeaves];
+
+    // compute the min/max for this slice
+    Arrays.fill(minPackedValue, (byte) 0xff);
+    Arrays.fill(maxPackedValue, (byte) 0);
+    for (int i = 0; i < Math.toIntExact(pointCount); ++i) {
+      values.getValue(i, scratchBytesRef1);
+      for(int dim=0;dim<numDims;dim++) {
+        int offset = dim*bytesPerDim;
+        if (StringHelper.compare(bytesPerDim, scratchBytesRef1.bytes, scratchBytesRef1.offset + offset, minPackedValue, offset) < 0) {
+          System.arraycopy(scratchBytesRef1.bytes, scratchBytesRef1.offset + offset, minPackedValue, offset, bytesPerDim);
+        }
+        if (StringHelper.compare(bytesPerDim, scratchBytesRef1.bytes, scratchBytesRef1.offset + offset, maxPackedValue, offset) > 0) {
+          System.arraycopy(scratchBytesRef1.bytes, scratchBytesRef1.offset + offset, maxPackedValue, offset, bytesPerDim);
+        }
+      }
+
+      docsSeen.set(values.getDocID(i));
+    }
+
+    build(1, numLeaves, values, 0, Math.toIntExact(pointCount), out,
+        minPackedValue, maxPackedValue, splitPackedValues, leafBlockFPs,
+        new int[maxPointsInLeafNode]);
+
+    long indexFP = out.getFilePointer();
+    writeIndex(out, leafBlockFPs, splitPackedValues);
+    return indexFP;
+  }
+
+
+  /* In the 1D case, we can simply sort points in ascending order and use the
+   * same writing logic as we use at merge time. */
+  private long writeField1Dim(IndexOutput out, String fieldName, MutablePointValues reader) throws IOException {
+    MutablePointsReaderUtils.sort(maxDoc, packedBytesLength, reader, 0, Math.toIntExact(reader.size()));
+
+    final OneDimensionBKDWriter oneDimWriter = new OneDimensionBKDWriter(out);
+
+    reader.intersect(new IntersectVisitor() {
+
+      @Override
+      public void visit(int docID, byte[] packedValue) throws IOException {
+        oneDimWriter.add(packedValue, docID);
+      }
+
+      @Override
+      public void visit(int docID) throws IOException {
+        throw new IllegalStateException();
+      }
+
+      @Override
+      public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
+        return Relation.CELL_CROSSES_QUERY;
+      }
+    });
+
+    return oneDimWriter.finish();
+  }
+
+  // TODO: remove this opto: SimpleText is supposed to be simple!
+  
+  /** More efficient bulk-add for incoming {@link SimpleTextBKDReader}s.  This does a merge sort of the already
+   *  sorted values and currently only works when numDims==1.  This returns -1 if all documents containing
+   *  dimensional values were deleted. */
+  public long merge(IndexOutput out, List<MergeState.DocMap> docMaps, List<SimpleTextBKDReader> readers) throws IOException {
+    assert docMaps == null || readers.size() == docMaps.size();
+
+    BKDMergeQueue queue = new BKDMergeQueue(bytesPerDim, readers.size());
+
+    for(int i=0;i<readers.size();i++) {
+      SimpleTextBKDReader bkd = readers.get(i);
+      MergeState.DocMap docMap;
+      if (docMaps == null) {
+        docMap = null;
+      } else {
+        docMap = docMaps.get(i);
+      }
+      MergeReader reader = new MergeReader(bkd, docMap);
+      if (reader.next()) {
+        queue.add(reader);
+      }
+    }
+
+    OneDimensionBKDWriter oneDimWriter = new OneDimensionBKDWriter(out);
+
+    while (queue.size() != 0) {
+      MergeReader reader = queue.top();
+      // System.out.println("iter reader=" + reader);
+
+      // NOTE: doesn't work with subclasses (e.g. SimpleText!)
+      oneDimWriter.add(reader.state.scratchPackedValue, reader.docID);
+
+      if (reader.next()) {
+        queue.updateTop();
+      } else {
+        // This segment was exhausted
+        queue.pop();
+      }
+    }
+
+    return oneDimWriter.finish();
+  }
+
+  private class OneDimensionBKDWriter {
+
+    final IndexOutput out;
+    final List<Long> leafBlockFPs = new ArrayList<>();
+    final List<byte[]> leafBlockStartValues = new ArrayList<>();
+    final byte[] leafValues = new byte[maxPointsInLeafNode * packedBytesLength];
+    final int[] leafDocs = new int[maxPointsInLeafNode];
+    long valueCount;
+    int leafCount;
+
+    OneDimensionBKDWriter(IndexOutput out) {
+      if (numDims != 1) {
+        throw new UnsupportedOperationException("numDims must be 1 but got " + numDims);
+      }
+      if (pointCount != 0) {
+        throw new IllegalStateException("cannot mix add and merge");
+      }
+
+      // Catch user silliness:
+      if (heapPointWriter == null && tempInput == null) {
+        throw new IllegalStateException("already finished");
+      }
+
+      // Mark that we already finished:
+      heapPointWriter = null;
+
+      this.out = out;
+
+      lastPackedValue = new byte[packedBytesLength];
+    }
+
+    // for asserts
+    final byte[] lastPackedValue;
+    int lastDocID;
+
+    void add(byte[] packedValue, int docID) throws IOException {
+      assert valueInOrder(valueCount + leafCount,
+          0, lastPackedValue, packedValue, 0, docID, lastDocID);
+
+      System.arraycopy(packedValue, 0, leafValues, leafCount * packedBytesLength, packedBytesLength);
+      leafDocs[leafCount] = docID;
+      docsSeen.set(docID);
+      leafCount++;
+
+      if (valueCount > totalPointCount) {
+        throw new IllegalStateException("totalPointCount=" + totalPointCount + " was passed when we were created, but we just hit " + pointCount + " values");
+      }
+
+      if (leafCount == maxPointsInLeafNode) {
+        // We write a block once we hit exactly the max count ... this is different from
+        // when we flush a new segment, where we write between max/2 and max per leaf block,
+        // so merged segments will behave differently from newly flushed segments:
+        writeLeafBlock();
+        leafCount = 0;
+      }
+
+      assert (lastDocID = docID) >= 0; // only assign when asserts are enabled
+    }
+
+    public long finish() throws IOException {
+      if (leafCount > 0) {
+        writeLeafBlock();
+        leafCount = 0;
+      }
+
+      if (valueCount == 0) {
+        return -1;
+      }
+
+      pointCount = valueCount;
+
+      long indexFP = out.getFilePointer();
+
+      int numInnerNodes = leafBlockStartValues.size();
+
+      //System.out.println("BKDW: now rotate numInnerNodes=" + numInnerNodes + " leafBlockStarts=" + leafBlockStartValues.size());
+
+      byte[] index = new byte[(1+numInnerNodes) * (1+bytesPerDim)];
+      rotateToTree(1, 0, numInnerNodes, index, leafBlockStartValues);
+      long[] arr = new long[leafBlockFPs.size()];
+      for(int i=0;i<leafBlockFPs.size();i++) {
+        arr[i] = leafBlockFPs.get(i);
+      }
+      writeIndex(out, arr, index);
+      return indexFP;
+    }
+
+    private void writeLeafBlock() throws IOException {
+      assert leafCount != 0;
+      if (valueCount == 0) {
+        System.arraycopy(leafValues, 0, minPackedValue, 0, packedBytesLength);
+      }
+      System.arraycopy(leafValues, (leafCount - 1) * packedBytesLength, maxPackedValue, 0, packedBytesLength);
+
+      valueCount += leafCount;
+
+      if (leafBlockFPs.size() > 0) {
+        // Save the first (minimum) value in each leaf block except the first, to build the split value index in the end:
+        leafBlockStartValues.add(Arrays.copyOf(leafValues, packedBytesLength));
+      }
+      leafBlockFPs.add(out.getFilePointer());
+      checkMaxLeafNodeCount(leafBlockFPs.size());
+
+      Arrays.fill(commonPrefixLengths, bytesPerDim);
+      // Find per-dim common prefix:
+      for(int dim=0;dim<numDims;dim++) {
+        int offset1 = dim * bytesPerDim;
+        int offset2 = (leafCount - 1) * packedBytesLength + offset1;
+        for(int j=0;j<commonPrefixLengths[dim];j++) {
+          if (leafValues[offset1+j] != leafValues[offset2+j]) {
+            commonPrefixLengths[dim] = j;
+            break;
+          }
+        }
+      }
+
+      writeLeafBlockDocs(out, leafDocs, 0, leafCount);
+
+      final IntFunction<BytesRef> packedValues = new IntFunction<BytesRef>() {
+        final BytesRef scratch = new BytesRef();
+
+        {
+          scratch.length = packedBytesLength;
+          scratch.bytes = leafValues;
+        }
+
+        @Override
+        public BytesRef apply(int i) {
+          scratch.offset = packedBytesLength * i;
+          return scratch;
+        }
+      };
+      assert valuesInOrderAndBounds(leafCount, 0, Arrays.copyOf(leafValues, packedBytesLength),
+          Arrays.copyOfRange(leafValues, (leafCount - 1) * packedBytesLength, leafCount * packedBytesLength),
+          packedValues, leafDocs, 0);
+      writeLeafBlockPackedValues(out, commonPrefixLengths, leafCount, 0, packedValues);
+    }
+
+  }
+
+  // TODO: there must be a simpler way?
+  private void rotateToTree(int nodeID, int offset, int count, byte[] index, List<byte[]> leafBlockStartValues) {
+    //System.out.println("ROTATE: nodeID=" + nodeID + " offset=" + offset + " count=" + count + " bpd=" + bytesPerDim + " index.length=" + index.length);
+    if (count == 1) {
+      // Leaf index node
+      //System.out.println("  leaf index node");
+      //System.out.println("  index[" + nodeID + "] = blockStartValues[" + offset + "]");
+      System.arraycopy(leafBlockStartValues.get(offset), 0, index, nodeID*(1+bytesPerDim)+1, bytesPerDim);
+    } else if (count > 1) {
+      // Internal index node: binary partition of count
+      int countAtLevel = 1;
+      int totalCount = 0;
+      while (true) {
+        int countLeft = count - totalCount;
+        //System.out.println("    cycle countLeft=" + countLeft + " coutAtLevel=" + countAtLevel);
+        if (countLeft <= countAtLevel) {
+          // This is the last level, possibly partially filled:
+          int lastLeftCount = Math.min(countAtLevel/2, countLeft);
+          assert lastLeftCount >= 0;
+          int leftHalf = (totalCount-1)/2 + lastLeftCount;
+
+          int rootOffset = offset + leftHalf;
+          /*
+          System.out.println("  last left count " + lastLeftCount);
+          System.out.println("  leftHalf " + leftHalf + " rightHalf=" + (count-leftHalf-1));
+          System.out.println("  rootOffset=" + rootOffset);
+          */
+
+          System.arraycopy(leafBlockStartValues.get(rootOffset), 0, index, nodeID*(1+bytesPerDim)+1, bytesPerDim);
+          //System.out.println("  index[" + nodeID + "] = blockStartValues[" + rootOffset + "]");
+
+          // TODO: we could optimize/specialize, when we know it's simply fully balanced binary tree
+          // under here, to save this while loop on each recursion
+
+          // Recurse left
+          rotateToTree(2*nodeID, offset, leftHalf, index, leafBlockStartValues);
+
+          // Recurse right
+          rotateToTree(2*nodeID+1, rootOffset+1, count-leftHalf-1, index, leafBlockStartValues);
+          return;
+        }
+        totalCount += countAtLevel;
+        countAtLevel *= 2;
+      }
+    } else {
+      assert count == 0;
+    }
+  }
+
+  // TODO: if we fixed each partition step to just record the file offset at the "split point", we could probably handle variable length
+  // encoding and not have our own ByteSequencesReader/Writer
+
+  /** Sort the heap writer by the specified dim */
+  private void sortHeapPointWriter(final HeapPointWriter writer, int dim) {
+    final int pointCount = Math.toIntExact(this.pointCount);
+    // Tie-break by docID:
+
+    // No need to tie break on ord, for the case where the same doc has the same value in a given dimension indexed more than once: it
+    // can't matter at search time since we don't write ords into the index:
+    new MSBRadixSorter(bytesPerDim + Integer.BYTES) {
+
+      @Override
+      protected int byteAt(int i, int k) {
+        assert k >= 0;
+        if (k < bytesPerDim) {
+          // dim bytes
+          int block = i / writer.valuesPerBlock;
+          int index = i % writer.valuesPerBlock;
+          return writer.blocks.get(block)[index * packedBytesLength + dim * bytesPerDim + k] & 0xff;
+        } else {
+          // doc id
+          int s = 3 - (k - bytesPerDim);
+          return (writer.docIDs[i] >>> (s * 8)) & 0xff;
+        }
+      }
+
+      @Override
+      protected void swap(int i, int j) {
+        int docID = writer.docIDs[i];
+        writer.docIDs[i] = writer.docIDs[j];
+        writer.docIDs[j] = docID;
+
+        if (singleValuePerDoc == false) {
+          if (longOrds) {
+            long ord = writer.ordsLong[i];
+            writer.ordsLong[i] = writer.ordsLong[j];
+            writer.ordsLong[j] = ord;
+          } else {
+            int ord = writer.ords[i];
+            writer.ords[i] = writer.ords[j];
+            writer.ords[j] = ord;
+          }
+        }
+
+        byte[] blockI = writer.blocks.get(i / writer.valuesPerBlock);
+        int indexI = (i % writer.valuesPerBlock) * packedBytesLength;
+        byte[] blockJ = writer.blocks.get(j / writer.valuesPerBlock);
+        int indexJ = (j % writer.valuesPerBlock) * packedBytesLength;
+
+        // scratch1 = values[i]
+        System.arraycopy(blockI, indexI, scratch1, 0, packedBytesLength);
+        // values[i] = values[j]
+        System.arraycopy(blockJ, indexJ, blockI, indexI, packedBytesLength);
+        // values[j] = scratch1
+        System.arraycopy(scratch1, 0, blockJ, indexJ, packedBytesLength);
+      }
+
+    }.sort(0, pointCount);
+  }
+
+  private PointWriter sort(int dim) throws IOException {
+    assert dim >= 0 && dim < numDims;
+
+    if (heapPointWriter != null) {
+
+      assert tempInput == null;
+
+      // We never spilled the incoming points to disk, so now we sort in heap:
+      HeapPointWriter sorted;
+
+      if (dim == 0) {
+        // First dim can re-use the current heap writer
+        sorted = heapPointWriter;
+      } else {
+        // Subsequent dims need a private copy
+        sorted = new HeapPointWriter((int) pointCount, (int) pointCount, packedBytesLength, longOrds, singleValuePerDoc);
+        sorted.copyFrom(heapPointWriter);
+      }
+
+      //long t0 = System.nanoTime();
+      sortHeapPointWriter(sorted, dim);
+      //long t1 = System.nanoTime();
+      //System.out.println("BKD: sort took " + ((t1-t0)/1000000.0) + " msec");
+
+      sorted.close();
+      return sorted;
+    } else {
+
+      // Offline sort:
+      assert tempInput != null;
+
+      final int offset = bytesPerDim * dim;
+
+      Comparator<BytesRef> cmp;
+      if (dim == numDims - 1) {
+        // in that case the bytes for the dimension and for the doc id are contiguous,
+        // so we don't need a branch
+        cmp = new BytesRefComparator(bytesPerDim + Integer.BYTES) {
+          @Override
+          protected int byteAt(BytesRef ref, int i) {
+            return ref.bytes[ref.offset + offset + i] & 0xff;
+          }
+        };
+      } else {
+        cmp = new BytesRefComparator(bytesPerDim + Integer.BYTES) {
+          @Override
+          protected int byteAt(BytesRef ref, int i) {
+            if (i < bytesPerDim) {
+              return ref.bytes[ref.offset + offset + i] & 0xff;
+            } else {
+              return ref.bytes[ref.offset + packedBytesLength + i - bytesPerDim] & 0xff;
+            }
+          }
+        };
+      }
+
+      OfflineSorter sorter = new OfflineSorter(tempDir, tempFileNamePrefix + "_bkd" + dim, cmp, offlineSorterBufferMB, offlineSorterMaxTempFiles, bytesPerDoc) {
+
+          /** We write/read fixed-byte-width file that {@link OfflinePointReader} can read. */
+          @Override
+          protected ByteSequencesWriter getWriter(IndexOutput out) {
+            return new ByteSequencesWriter(out) {
+              @Override
+              public void write(byte[] bytes, int off, int len) throws IOException {
+                assert len == bytesPerDoc: "len=" + len + " bytesPerDoc=" + bytesPerDoc;
+                out.writeBytes(bytes, off, len);
+              }
+            };
+          }
+
+          /** We write/read fixed-byte-width file that {@link OfflinePointReader} can read. */
+          @Override
+          protected ByteSequencesReader getReader(ChecksumIndexInput in, String name) throws IOException {
+            return new ByteSequencesReader(in, name) {
+              final BytesRef scratch = new BytesRef(new byte[bytesPerDoc]);
+              @Override
+              public BytesRef next() throws IOException {
+                if (in.getFilePointer() >= end) {
+                  return null;
+                }
+                in.readBytes(scratch.bytes, 0, bytesPerDoc);
+                return scratch;
+              }
+            };
+          }
+        };
+
+      String name = sorter.sort(tempInput.getName());
+
+      return new OfflinePointWriter(tempDir, name, packedBytesLength, pointCount, longOrds, singleValuePerDoc);
+    }
+  }
+
+  private void checkMaxLeafNodeCount(int numLeaves) {
+    if ((1+bytesPerDim) * (long) numLeaves > ArrayUtil.MAX_ARRAY_LENGTH) {
+      throw new IllegalStateException("too many nodes; increase maxPointsInLeafNode (currently " + maxPointsInLeafNode + ") and reindex");
+    }
+  }
+
+  /** Writes the BKD tree to the provided {@link IndexOutput} and returns the file offset where index was written. */
+  public long finish(IndexOutput out) throws IOException {
+    // System.out.println("\nBKDTreeWriter.finish pointCount=" + pointCount + " out=" + out + " heapWriter=" + heapPointWriter);
+
+    // TODO: specialize the 1D case?  it's much faster at indexing time (no partitioning on recurse...)
+
+    // Catch user silliness:
+    if (heapPointWriter == null && tempInput == null) {
+      throw new IllegalStateException("already finished");
+    }
+
+    if (offlinePointWriter != null) {
+      offlinePointWriter.close();
+    }
+
+    if (pointCount == 0) {
+      throw new IllegalStateException("must index at least one point");
+    }
+
+    LongBitSet ordBitSet;
+    if (numDims > 1) {
+      if (singleValuePerDoc) {
+        ordBitSet = new LongBitSet(maxDoc);
+      } else {
+        ordBitSet = new LongBitSet(pointCount);
+      }
+    } else {
+      ordBitSet = null;
+    }
+
+    long countPerLeaf = pointCount;
+    long innerNodeCount = 1;
+
+    while (countPerLeaf > maxPointsInLeafNode) {
+      countPerLeaf = (countPerLeaf+1)/2;
+      innerNodeCount *= 2;
+    }
+
+    int numLeaves = (int) innerNodeCount;
+
+    checkMaxLeafNodeCount(numLeaves);
+
+    // NOTE: we could save the 1+ here, to use a bit less heap at search time, but then we'd need a somewhat costly check at each
+    // step of the recursion to recompute the split dim:
+
+    // Indexed by nodeID, but first (root) nodeID is 1.  We do 1+ because the lead byte at each recursion says which dim we split on.
+    byte[] splitPackedValues = new byte[Math.toIntExact(numLeaves*(1+bytesPerDim))];
+
+    // +1 because leaf count is power of 2 (e.g. 8), and innerNodeCount is power of 2 minus 1 (e.g. 7)
+    long[] leafBlockFPs = new long[numLeaves];
+
+    // Make sure the math above "worked":
+    assert pointCount / numLeaves <= maxPointsInLeafNode: "pointCount=" + pointCount + " numLeaves=" + numLeaves + " maxPointsInLeafNode=" + maxPointsInLeafNode;
+
+    // Sort all docs once by each dimension:
+    PathSlice[] sortedPointWriters = new PathSlice[numDims];
+
+    // This is only used on exception; on normal code paths we close all files we opened:
+    List<Closeable> toCloseHeroically = new ArrayList<>();
+
+    boolean success = false;
+    try {
+      //long t0 = System.nanoTime();
+      for(int dim=0;dim<numDims;dim++) {
+        sortedPointWriters[dim] = new PathSlice(sort(dim), 0, pointCount);
+      }
+      //long t1 = System.nanoTime();
+      //System.out.println("sort time: " + ((t1-t0)/1000000.0) + " msec");
+
+      if (tempInput != null) {
+        tempDir.deleteFile(tempInput.getName());
+        tempInput = null;
+      } else {
+        assert heapPointWriter != null;
+        heapPointWriter = null;
+      }
+
+      build(1, numLeaves, sortedPointWriters,
+            ordBitSet, out,
+            minPackedValue, maxPackedValue,
+            splitPackedValues,
+            leafBlockFPs,
+            toCloseHeroically);
+
+      for(PathSlice slice : sortedPointWriters) {
+        slice.writer.destroy();
+      }
+
+      // If no exception, we should have cleaned everything up:
+      assert tempDir.getCreatedFiles().isEmpty();
+      //long t2 = System.nanoTime();
+      //System.out.println("write time: " + ((t2-t1)/1000000.0) + " msec");
+
+      success = true;
+    } finally {
+      if (success == false) {
+        IOUtils.deleteFilesIgnoringExceptions(tempDir, tempDir.getCreatedFiles());
+        IOUtils.closeWhileHandlingException(toCloseHeroically);
+      }
+    }
+
+    //System.out.println("Total nodes: " + innerNodeCount);
+
+    // Write index:
+    long indexFP = out.getFilePointer();
+    writeIndex(out, leafBlockFPs, splitPackedValues);
+    return indexFP;
+  }
+
+  /** Subclass can change how it writes the index. */
+  private void writeIndex(IndexOutput out, long[] leafBlockFPs, byte[] splitPackedValues) throws IOException {
+    write(out, NUM_DIMS);
+    writeInt(out, numDims);
+    newline(out);
+
+    write(out, BYTES_PER_DIM);
+    writeInt(out, bytesPerDim);
+    newline(out);
+
+    write(out, MAX_LEAF_POINTS);
+    writeInt(out, maxPointsInLeafNode);
+    newline(out);
+
+    write(out, INDEX_COUNT);
+    writeInt(out, leafBlockFPs.length);
+    newline(out);
+
+    write(out, MIN_VALUE);
+    BytesRef br = new BytesRef(minPackedValue, 0, minPackedValue.length);
+    write(out, br.toString());
+    newline(out);
+
+    write(out, MAX_VALUE);
+    br = new BytesRef(maxPackedValue, 0, maxPackedValue.length);
+    write(out, br.toString());
+    newline(out);
+
+    write(out, POINT_COUNT);
+    writeLong(out, pointCount);
+    newline(out);
+
+    write(out, DOC_COUNT);
+    writeInt(out, docsSeen.cardinality());
+    newline(out);
+
+    for(int i=0;i<leafBlockFPs.length;i++) {
+      write(out, BLOCK_FP);
+      writeLong(out, leafBlockFPs[i]);
+      newline(out);
+    }
+
+    assert (splitPackedValues.length % (1 + bytesPerDim)) == 0;
+    int count = splitPackedValues.length / (1 + bytesPerDim);
+    assert count == leafBlockFPs.length;
+
+    write(out, SPLIT_COUNT);
+    writeInt(out, count);
+    newline(out);
+
+    for(int i=0;i<count;i++) {
+      write(out, SPLIT_DIM);
+      writeInt(out, splitPackedValues[i * (1 + bytesPerDim)] & 0xff);
+      newline(out);
+      write(out, SPLIT_VALUE);
+      br = new BytesRef(splitPackedValues, 1+(i * (1+bytesPerDim)), bytesPerDim);
+      write(out, br.toString());
+      newline(out);
+    }
+  }
+
+  protected void writeLeafBlockDocs(IndexOutput out, int[] docIDs, int start, int count) throws IOException {
+    write(out, BLOCK_COUNT);
+    writeInt(out, count);
+    newline(out);
+    for(int i=0;i<count;i++) {
+      write(out, BLOCK_DOC_ID);
+      writeInt(out, docIDs[start+i]);
+      newline(out);
+    }
+  }
+
+  protected void writeLeafBlockPackedValues(IndexOutput out, int[] commonPrefixLengths, int count, int sortedDim, IntFunction<BytesRef> packedValues) throws IOException {
+    for (int i = 0; i < count; ++i) {
+      BytesRef packedValue = packedValues.apply(i);
+      // NOTE: we don't do prefix coding, so we ignore commonPrefixLengths
+      write(out, BLOCK_VALUE);
+      write(out, packedValue.toString());
+      newline(out);
+    }
+  }
+
+  private void writeLeafBlockPackedValuesRange(IndexOutput out, int[] commonPrefixLengths, int start, int end, IntFunction<BytesRef> packedValues) throws IOException {
+    for (int i = start; i < end; ++i) {
+      BytesRef ref = packedValues.apply(i);
+      assert ref.length == packedBytesLength;
+
+      for(int dim=0;dim<numDims;dim++) {
+        int prefix = commonPrefixLengths[dim];
+        out.writeBytes(ref.bytes, ref.offset + dim*bytesPerDim + prefix, bytesPerDim-prefix);
+      }
+    }
+  }
+
+  private static int runLen(IntFunction<BytesRef> packedValues, int start, int end, int byteOffset) {
+    BytesRef first = packedValues.apply(start);
+    byte b = first.bytes[first.offset + byteOffset];
+    for (int i = start + 1; i < end; ++i) {
+      BytesRef ref = packedValues.apply(i);
+      byte b2 = ref.bytes[ref.offset + byteOffset];
+      assert Byte.toUnsignedInt(b2) >= Byte.toUnsignedInt(b);
+      if (b != b2) {
+        return i - start;
+      }
+    }
+    return end - start;
+  }
+
+  @Override
+  public void close() throws IOException {
+    if (tempInput != null) {
+      // NOTE: this should only happen on exception, e.g. caller calls close w/o calling finish:
+      try {
+        tempInput.close();
+      } finally {
+        tempDir.deleteFile(tempInput.getName());
+        tempInput = null;
+      }
+    }
+  }
+
+  /** Sliced reference to points in an OfflineSorter.ByteSequencesWriter file. */
+  private static final class PathSlice {
+    final PointWriter writer;
+    final long start;
+    final long count;
+
+    public PathSlice(PointWriter writer, long start, long count) {
+      this.writer = writer;
+      this.start = start;
+      this.count = count;
+    }
+
+    @Override
+    public String toString() {
+      return "PathSlice(start=" + start + " count=" + count + " writer=" + writer + ")";
+    }
+  }
+
+  /** Called on exception, to check whether the checksum is also corrupt in this source, and add that
+   *  information (checksum matched or didn't) as a suppressed exception. */
+  private void verifyChecksum(Throwable priorException, PointWriter writer) throws IOException {
+    // TODO: we could improve this, to always validate checksum as we recurse, if we shared left and
+    // right reader after recursing to children, and possibly within recursed children,
+    // since all together they make a single pass through the file.  But this is a sizable re-org,
+    // and would mean leaving readers (IndexInputs) open for longer:
+    if (writer instanceof OfflinePointWriter) {
+      // We are reading from a temp file; go verify the checksum:
+      String tempFileName = ((OfflinePointWriter) writer).name;
+      try (ChecksumIndexInput in = tempDir.openChecksumInput(tempFileName, IOContext.READONCE)) {
+        CodecUtil.checkFooter(in, priorException);
+      }
+    } else {
+      // We are reading from heap; nothing to add:
+      IOUtils.reThrow(priorException);
+    }
+  }
+
+  /** Marks bits for the ords (points) that belong in the right sub tree (those docs that have values >= the splitValue). */
+  private byte[] markRightTree(long rightCount, int splitDim, PathSlice source, LongBitSet ordBitSet) throws IOException {
+
+    // Now we mark ords that fall into the right half, so we can partition on all other dims that are not the split dim:
+
+    // Read the split value, then mark all ords in the right tree (larger than the split value):
+
+    // TODO: find a way to also checksum this reader?  If we changed to markLeftTree, and scanned the final chunk, it could work?
+    try (PointReader reader = source.writer.getReader(source.start + source.count - rightCount, rightCount)) {
+      boolean result = reader.next();
+      assert result;
+      System.arraycopy(reader.packedValue(), splitDim*bytesPerDim, scratch1, 0, bytesPerDim);
+      if (numDims > 1) {
+        assert ordBitSet.get(reader.ord()) == false;
+        ordBitSet.set(reader.ord());
+        // Subtract 1 from rightCount because we already did the first value above (so we could record the split value):
+        reader.markOrds(rightCount-1, ordBitSet);
+      }
+    } catch (Throwable t) {
+      verifyChecksum(t, source.writer);
+    }
+
+    return scratch1;
+  }
+
+  /** Called only in assert */
+  private boolean valueInBounds(BytesRef packedValue, byte[] minPackedValue, byte[] maxPackedValue) {
+    for(int dim=0;dim<numDims;dim++) {
+      int offset = bytesPerDim*dim;
+      if (StringHelper.compare(bytesPerDim, packedValue.bytes, packedValue.offset + offset, minPackedValue, offset) < 0) {
+        return false;
+      }
+      if (StringHelper.compare(bytesPerDim, packedValue.bytes, packedValue.offset + offset, maxPackedValue, offset) > 0) {
+        return false;
+      }
+    }
+
+    return true;
+  }
+
+  protected int split(byte[] minPackedValue, byte[] maxPackedValue) {
+    // Find which dim has the largest span so we can split on it:
+    int splitDim = -1;
+    for(int dim=0;dim<numDims;dim++) {
+      NumericUtils.subtract(bytesPerDim, dim, maxPackedValue, minPackedValue, scratchDiff);
+      if (splitDim == -1 || StringHelper.compare(bytesPerDim, scratchDiff, 0, scratch1, 0) > 0) {
+        System.arraycopy(scratchDiff, 0, scratch1, 0, bytesPerDim);
+        splitDim = dim;
+      }
+    }
+
+    //System.out.println("SPLIT: " + splitDim);
+    return splitDim;
+  }
+
+  /** Pull a partition back into heap once the point count is low enough while recursing. */
+  private PathSlice switchToHeap(PathSlice source, List<Closeable> toCloseHeroically) throws IOException {
+    int count = Math.toIntExact(source.count);
+    // Not inside the try because we don't want to close it here:
+    PointReader reader = source.writer.getSharedReader(source.start, source.count, toCloseHeroically);
+    try (PointWriter writer = new HeapPointWriter(count, count, packedBytesLength, longOrds, singleValuePerDoc)) {
+      for(int i=0;i<count;i++) {
+        boolean hasNext = reader.next();
+        assert hasNext;
+        writer.append(reader.packedValue(), reader.ord(), reader.docID());
+      }
+      return new PathSlice(writer, 0, count);
+    } catch (Throwable t) {
+      verifyChecksum(t, source.writer);
+
+      // Dead code but javac disagrees:
+      return null;
+    }
+  }
+
+  /* Recursively reorders the provided reader and writes the bkd-tree on the fly. */
+  private void build(int nodeID, int leafNodeOffset,
+      MutablePointValues reader, int from, int to,
+      IndexOutput out,
+      byte[] minPackedValue, byte[] maxPackedValue,
+      byte[] splitPackedValues,
+      long[] leafBlockFPs,
+      int[] spareDocIds) throws IOException {
+
+    if (nodeID >= leafNodeOffset) {
+      // leaf node
+      final int count = to - from;
+      assert count <= maxPointsInLeafNode;
+
+      // Compute common prefixes
+      Arrays.fill(commonPrefixLengths, bytesPerDim);
+      reader.getValue(from, scratchBytesRef1);
+      for (int i = from + 1; i < to; ++i) {
+        reader.getValue(i, scratchBytesRef2);
+        for (int dim=0;dim<numDims;dim++) {
+          final int offset = dim * bytesPerDim;
+          for(int j=0;j<commonPrefixLengths[dim];j++) {
+            if (scratchBytesRef1.bytes[scratchBytesRef1.offset+offset+j] != scratchBytesRef2.bytes[scratchBytesRef2.offset+offset+j]) {
+              commonPrefixLengths[dim] = j;
+              break;
+            }
+          }
+        }
+      }
+
+      // Find the dimension that has the least number of unique bytes at commonPrefixLengths[dim]
+      FixedBitSet[] usedBytes = new FixedBitSet[numDims];
+      for (int dim = 0; dim < numDims; ++dim) {
+        if (commonPrefixLengths[dim] < bytesPerDim) {
+          usedBytes[dim] = new FixedBitSet(256);
+        }
+      }
+      for (int i = from + 1; i < to; ++i) {
+        for (int dim=0;dim<numDims;dim++) {
+          if (usedBytes[dim] != null) {
+            byte b = reader.getByteAt(i, dim * bytesPerDim + commonPrefixLengths[dim]);
+            usedBytes[dim].set(Byte.toUnsignedInt(b));
+          }
+        }
+      }
+      int sortedDim = 0;
+      int sortedDimCardinality = Integer.MAX_VALUE;
+      for (int dim = 0; dim < numDims; ++dim) {
+        if (usedBytes[dim] != null) {
+          final int cardinality = usedBytes[dim].cardinality();
+          if (cardinality < sortedDimCardinality) {
+            sortedDim = dim;
+            sortedDimCardinality = cardinality;
+          }
+        }
+      }
+
+      // sort by sortedDim
+      MutablePointsReaderUtils.sortByDim(sortedDim, bytesPerDim, commonPrefixLengths,
+                                         reader, from, to, scratchBytesRef1, scratchBytesRef2);
+
+      // Save the block file pointer:
+      leafBlockFPs[nodeID - leafNodeOffset] = out.getFilePointer();
+
+      // Write doc IDs
+      int[] docIDs = spareDocIds;
+      for (int i = from; i < to; ++i) {
+        docIDs[i - from] = reader.getDocID(i);
+      }
+      writeLeafBlockDocs(out, docIDs, 0, count);
+
+      // Write the common prefixes:
+      reader.getValue(from, scratchBytesRef1);
+      System.arraycopy(scratchBytesRef1.bytes, scratchBytesRef1.offset, scratch1, 0, packedBytesLength);
+
+      // Write the full values:
+      IntFunction<BytesRef> packedValues = new IntFunction<BytesRef>() {
+        @Override
+        public BytesRef apply(int i) {
+          reader.getValue(from + i, scratchBytesRef1);
+          return scratchBytesRef1;
+        }
+      };
+      assert valuesInOrderAndBounds(count, sortedDim, minPackedValue, maxPackedValue, packedValues,
+          docIDs, 0);
+      writeLeafBlockPackedValues(out, commonPrefixLengths, count, sortedDim, packedValues);
+
+    } else {
+      // inner node
+
+      // compute the split dimension and partition around it
+      final int splitDim = split(minPackedValue, maxPackedValue);
+      final int mid = (from + to + 1) >>> 1;
+
+      int commonPrefixLen = bytesPerDim;
+      for (int i = 0; i < bytesPerDim; ++i) {
+        if (minPackedValue[splitDim * bytesPerDim + i] != maxPackedValue[splitDim * bytesPerDim + i]) {
+          commonPrefixLen = i;
+          break;
+        }
+      }
+      MutablePointsReaderUtils.partition(maxDoc, splitDim, bytesPerDim, commonPrefixLen,
+          reader, from, to, mid, scratchBytesRef1, scratchBytesRef2);
+
+      // set the split value
+      final int address = nodeID * (1+bytesPerDim);
+      splitPackedValues[address] = (byte) splitDim;
+      reader.getValue(mid, scratchBytesRef1);
+      System.arraycopy(scratchBytesRef1.bytes, scratchBytesRef1.offset + splitDim * bytesPerDim, splitPackedValues, address + 1, bytesPerDim);
+
+      byte[] minSplitPackedValue = Arrays.copyOf(minPackedValue, packedBytesLength);
+      byte[] maxSplitPackedValue = Arrays.copyOf(maxPackedValue, packedBytesLength);
+      System.arraycopy(scratchBytesRef1.bytes, scratchBytesRef1.offset + splitDim * bytesPerDim,
+          minSplitPackedValue, splitDim * bytesPerDim, bytesPerDim);
+      System.arraycopy(scratchBytesRef1.bytes, scratchBytesRef1.offset + splitDim * bytesPerDim,
+          maxSplitPackedValue, splitDim * bytesPerDim, bytesPerDim);
+
+      // recurse
+      build(nodeID * 2, leafNodeOffset, reader, from, mid, out,
+          minPackedValue, maxSplitPackedValue, splitPackedValues, leafBlockFPs, spareDocIds);
+      build(nodeID * 2 + 1, leafNodeOffset, reader, mid, to, out,
+          minSplitPackedValue, maxPackedValue, splitPackedValues, leafBlockFPs, spareDocIds);
+    }
+  }
+
+  /** The array (sized numDims) of PathSlice describe the cell we have currently recursed to. */
+  private void build(int nodeID, int leafNodeOffset,
+                     PathSlice[] slices,
+                     LongBitSet ordBitSet,
+                     IndexOutput out,
+                     byte[] minPackedValue, byte[] maxPackedValue,
+                     byte[] splitPackedValues,
+                     long[] leafBlockFPs,
+                     List<Closeable> toCloseHeroically) throws IOException {
+
+    for(PathSlice slice : slices) {
+      assert slice.count == slices[0].count;
+    }
+
+    if (numDims == 1 && slices[0].writer instanceof OfflinePointWriter && slices[0].count <= maxPointsSortInHeap) {
+      // Special case for 1D, to cutover to heap once we recurse deeply enough:
+      slices[0] = switchToHeap(slices[0], toCloseHeroically);
+    }
+
+    if (nodeID >= leafNodeOffset) {
+
+      // Leaf node: write block
+      // We can write the block in any order so by default we write it sorted by the dimension that has the
+      // least number of unique bytes at commonPrefixLengths[dim], which makes compression more efficient
+      int sortedDim = 0;
+      int sortedDimCardinality = Integer.MAX_VALUE;
+
+      for (int dim=0;dim<numDims;dim++) {
+        if (slices[dim].writer instanceof HeapPointWriter == false) {
+          // Adversarial cases can cause this, e.g. very lopsided data, all equal points, such that we started
+          // offline, but then kept splitting only in one dimension, and so never had to rewrite into heap writer
+          slices[dim] = switchToHeap(slices[dim], toCloseHeroically);
+        }
+
+        PathSlice source = slices[dim];
+
+        HeapPointWriter heapSource = (HeapPointWriter) source.writer;
+
+        // Find common prefix by comparing first and last values, already sorted in this dimension:
+        heapSource.readPackedValue(Math.toIntExact(source.start), scratch1);
+        heapSource.readPackedValue(Math.toIntExact(source.start + source.count - 1), scratch2);
+
+        int offset = dim * bytesPerDim;
+        commonPrefixLengths[dim] = bytesPerDim;
+        for(int j=0;j<bytesPerDim;j++) {
+          if (scratch1[offset+j] != scratch2[offset+j]) {
+            commonPrefixLengths[dim] = j;
+            break;
+          }
+        }
+
+        int prefix = commonPrefixLengths[dim];
+        if (prefix < bytesPerDim) {
+          int cardinality = 1;
+          byte previous = scratch1[offset + prefix];
+          for (long i = 1; i < source.count; ++i) {
+            heapSource.readPackedValue(Math.toIntExact(source.start + i), scratch2);
+            byte b = scratch2[offset + prefix];
+            assert Byte.toUnsignedInt(previous) <= Byte.toUnsignedInt(b);
+            if (b != previous) {
+              cardinality++;
+              previous = b;
+            }
+          }
+          assert cardinality <= 256;
+          if (cardinality < sortedDimCardinality) {
+            sortedDim = dim;
+            sortedDimCardinality = cardinality;
+          }
+        }
+      }
+
+      PathSlice source = slices[sortedDim];
+
+      // We ensured that maxPointsSortInHeap was >= maxPointsInLeafNode, so we better be in heap at this point:
+      HeapPointWriter heapSource = (HeapPointWriter) source.writer;
+
+      // Save the block file pointer:
+      leafBlockFPs[nodeID - leafNodeOffset] = out.getFilePointer();
+      //System.out.println("  write leaf block @ fp=" + out.getFilePointer());
+
+      // Write docIDs first, as their own chunk, so that at intersect time we can add all docIDs w/o
+      // loading the values:
+      int count = Math.toIntExact(source.count);
+      assert count > 0: "nodeID=" + nodeID + " leafNodeOffset=" + leafNodeOffset;
+      writeLeafBlockDocs(out, heapSource.docIDs, Math.toIntExact(source.start), count);
+
+      // TODO: minor opto: we don't really have to write the actual common prefixes, because BKDReader on recursing can regenerate it for us
+      // from the index, much like how terms dict does so from the FST:
+
+      // Write the full values:
+      IntFunction<BytesRef> packedValues = new IntFunction<BytesRef>() {
+        final BytesRef scratch = new BytesRef();
+
+        {
+          scratch.length = packedBytesLength;
+        }
+
+        @Override
+        public BytesRef apply(int i) {
+          heapSource.getPackedValueSlice(Math.toIntExact(source.start + i), scratch);
+          return scratch;
+        }
+      };
+      assert valuesInOrderAndBounds(count, sortedDim, minPackedValue, maxPackedValue, packedValues,
+          heapSource.docIDs, Math.toIntExact(source.start));
+      writeLeafBlockPackedValues(out, commonPrefixLengths, count, sortedDim, packedValues);
+
+    } else {
+      // Inner node: partition/recurse
+
+      int splitDim;
+      if (numDims > 1) {
+        splitDim = split(minPackedValue, maxPackedValue);
+      } else {
+        splitDim = 0;
+      }
+
+      PathSlice source = slices[splitDim];
+
+      assert nodeID < splitPackedValues.length: "nodeID=" + nodeID + " splitValues.length=" + splitPackedValues.length;
+
+      // How many points will be in the left tree:
+      long rightCount = source.count / 2;
+      long leftCount = source.count - rightCount;
+
+      byte[] splitValue = markRightTree(rightCount, splitDim, source, ordBitSet);
+      int address = nodeID * (1+bytesPerDim);
+      splitPackedValues[address] = (byte) splitDim;
+      System.arraycopy(splitValue, 0, splitPackedValues, address + 1, bytesPerDim);
+
+      // Partition all PathSlice that are not the split dim into sorted left and right sets, so we can recurse:
+
+      PathSlice[] leftSlices = new PathSlice[numDims];
+      PathSlice[] rightSlices = new PathSlice[numDims];
+
+      byte[] minSplitPackedValue = new byte[packedBytesLength];
+      System.arraycopy(minPackedValue, 0, minSplitPackedValue, 0, packedBytesLength);
+
+      byte[] maxSplitPackedValue = new byte[packedBytesLength];
+      System.arraycopy(maxPackedValue, 0, maxSplitPackedValue, 0, packedBytesLength);
+
+      // When we are on this dim, below, we clear the ordBitSet:
+      int dimToClear;
+      if (numDims - 1 == splitDim) {
+        dimToClear = numDims - 2;
+      } else {
+        dimToClear = numDims - 1;
+      }
+
+      for(int dim=0;dim<numDims;dim++) {
+
+        if (dim == splitDim) {
+          // No need to partition on this dim since it's a simple slice of the incoming already sorted slice, and we
+          // will re-use its shared reader when visiting it as we recurse:
+          leftSlices[dim] = new PathSlice(source.writer, source.start, leftCount);
+          rightSlices[dim] = new PathSlice(source.writer, source.start + leftCount, rightCount);
+          System.arraycopy(splitValue, 0, minSplitPackedValue, dim*bytesPerDim, bytesPerDim);
+          System.arraycopy(splitValue, 0, maxSplitPackedValue, dim*bytesPerDim, bytesPerDim);
+          continue;
+        }
+
+        // Not inside the try because we don't want to close this one now, so that after recursion is done,
+        // we will have done a singel full sweep of the file:
+        PointReader reader = slices[dim].writer.getSharedReader(slices[dim].start, slices[dim].count, toCloseHeroically);
+
+        try (PointWriter leftPointWriter = getPointWriter(leftCount, "left" + dim);
+             PointWriter rightPointWriter = getPointWriter(source.count - leftCount, "right" + dim)) {
+
+          long nextRightCount = reader.split(source.count, ordBitSet, leftPointWriter, rightPointWriter, dim == dimToClear);
+          if (rightCount != nextRightCount) {
+            throw new IllegalStateException("wrong number of points in split: expected=" + rightCount + " but actual=" + nextRightCount);
+          }
+
+          leftSlices[dim] = new PathSlice(leftPointWriter, 0, leftCount);
+          rightSlices[dim] = new PathSlice(rightPointWriter, 0, rightCount);
+        } catch (Throwable t) {
+          verifyChecksum(t, slices[dim].writer);
+        }
+      }
+
+      // Recurse on left tree:
+      build(2*nodeID, leafNodeOffset, leftSlices,
+            ordBitSet, out,
+            minPackedValue, maxSplitPackedValue,
+            splitPackedValues, leafBlockFPs, toCloseHeroically);
+      for(int dim=0;dim<numDims;dim++) {
+        // Don't destroy the dim we split on because we just re-used what our caller above gave us for that dim:
+        if (dim != splitDim) {
+          leftSlices[dim].writer.destroy();
+        }
+      }
+
+      // TODO: we could "tail recurse" here?  have our parent discard its refs as we recurse right?
+      // Recurse on right tree:
+      build(2*nodeID+1, leafNodeOffset, rightSlices,
+            ordBitSet, out,
+            minSplitPackedValue, maxPackedValue,
+            splitPackedValues, leafBlockFPs, toCloseHeroically);
+      for(int dim=0;dim<numDims;dim++) {
+        // Don't destroy the dim we split on because we just re-used what our caller above gave us for that dim:
+        if (dim != splitDim) {
+          rightSlices[dim].writer.destroy();
+        }
+      }
+    }
+  }
+
+  // only called from assert
+  private boolean valuesInOrderAndBounds(int count, int sortedDim, byte[] minPackedValue, byte[] maxPackedValue,
+      IntFunction<BytesRef> values, int[] docs, int docsOffset) throws IOException {
+    byte[] lastPackedValue = new byte[packedBytesLength];
+    int lastDoc = -1;
+    for (int i=0;i<count;i++) {
+      BytesRef packedValue = values.apply(i);
+      assert packedValue.length == packedBytesLength;
+      assert valueInOrder(i, sortedDim, lastPackedValue, packedValue.bytes, packedValue.offset,
+          docs[docsOffset + i], lastDoc);
+      lastDoc = docs[docsOffset + i];
+
+      // Make sure this value does in fact fall within this leaf cell:
+      assert valueInBounds(packedValue, minPackedValue, maxPackedValue);
+    }
+    return true;
+  }
+
+  // only called from assert
+  private boolean valueInOrder(long ord, int sortedDim, byte[] lastPackedValue, byte[] packedValue, int packedValueOffset,
+      int doc, int lastDoc) {
+    int dimOffset = sortedDim * bytesPerDim;
+    if (ord > 0) {
+      int cmp = StringHelper.compare(bytesPerDim, lastPackedValue, dimOffset, packedValue, packedValueOffset + dimOffset);
+      if (cmp > 0) {
+        throw new AssertionError("values out of order: last value=" + new BytesRef(lastPackedValue) + " current value=" + new BytesRef(packedValue, packedValueOffset, packedBytesLength) + " ord=" + ord);
+      }
+      if (cmp == 0 && doc < lastDoc) {
+        throw new AssertionError("docs out of order: last doc=" + lastDoc + " current doc=" + doc + " ord=" + ord);
+      }
+    }
+    System.arraycopy(packedValue, packedValueOffset, lastPackedValue, 0, packedBytesLength);
+    return true;
+  }
+
+  PointWriter getPointWriter(long count, String desc) throws IOException {
+    if (count <= maxPointsSortInHeap) {
+      int size = Math.toIntExact(count);
+      return new HeapPointWriter(size, size, packedBytesLength, longOrds, singleValuePerDoc);
+    } else {
+      return new OfflinePointWriter(tempDir, tempFileNamePrefix, packedBytesLength, longOrds, desc, count, singleValuePerDoc);
+    }
+  }
+
+  private void write(IndexOutput out, String s) throws IOException {
+    SimpleTextUtil.write(out, s, scratch);
+  }
+
+  private void writeInt(IndexOutput out, int x) throws IOException {
+    SimpleTextUtil.write(out, Integer.toString(x), scratch);
+  }
+
+  private void writeLong(IndexOutput out, long x) throws IOException {
+    SimpleTextUtil.write(out, Long.toString(x), scratch);
+  }
+
+  private void write(IndexOutput out, BytesRef b) throws IOException {
+    SimpleTextUtil.write(out, b);
+  }
+
+  private void newline(IndexOutput out) throws IOException {
+    SimpleTextUtil.writeNewline(out);
+  }
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/5e8db2e0/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextPointsReader.java
----------------------------------------------------------------------
diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextPointsReader.java b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextPointsReader.java
index f7ff16e..453bd23 100644
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextPointsReader.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextPointsReader.java
@@ -36,7 +36,6 @@ import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.BytesRefBuilder;
 import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.StringHelper;
-import org.apache.lucene.util.bkd.BKDReader;
 
 import static org.apache.lucene.codecs.simpletext.SimpleTextPointsWriter.BLOCK_FP;
 import static org.apache.lucene.codecs.simpletext.SimpleTextPointsWriter.BYTES_PER_DIM;
@@ -58,7 +57,7 @@ class SimpleTextPointsReader extends PointsReader {
 
   private final IndexInput dataIn;
   final SegmentReadState readState;
-  final Map<String,BKDReader> readers = new HashMap<>();
+  final Map<String,SimpleTextBKDReader> readers = new HashMap<>();
   final BytesRefBuilder scratch = new BytesRefBuilder();
 
   public SimpleTextPointsReader(SegmentReadState readState) throws IOException {
@@ -98,7 +97,7 @@ class SimpleTextPointsReader extends PointsReader {
     this.readState = readState;
   }
 
-  private BKDReader initReader(long fp) throws IOException {
+  private SimpleTextBKDReader initReader(long fp) throws IOException {
     // NOTE: matches what writeIndex does in SimpleTextPointsWriter
     dataIn.seek(fp);
     readLine(dataIn);

[12/50] [abbrv] lucene-solr:jira/solr-8593: SOLR-9832: Schema modifications are not immediately visible on the coordinating node

Posted by kr...@apache.org.

SOLR-9832: Schema modifications are not immediately visible on the coordinating node


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/bf3a3137
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/bf3a3137
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/bf3a3137

Branch: refs/heads/jira/solr-8593
Commit: bf3a3137be8a70ceed884e87c3ada276e82b187b
Parents: 2e948fe
Author: Steve Rowe <sa...@apache.org>
Authored: Tue Dec 6 13:11:36 2016 -0500
Committer: Steve Rowe <sa...@apache.org>
Committed: Tue Dec 6 13:11:36 2016 -0500

----------------------------------------------------------------------
 solr/CHANGES.txt                                |  2 +
 .../src/java/org/apache/solr/core/SolrCore.java |  8 --
 .../solr/schema/ManagedIndexSchemaFactory.java  | 12 +++
 .../org/apache/solr/schema/SchemaManager.java   |  2 +-
 .../ManagedSchemaRoundRobinCloudTest.java       | 98 ++++++++++++++++++++
 5 files changed, 113 insertions(+), 9 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/bf3a3137/solr/CHANGES.txt
----------------------------------------------------------------------
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index e766169..bac24e5 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -220,6 +220,8 @@ Bug Fixes
 
 * SOLR-9616: Solr throws exception when expand=true on empty index (Timo Hund via Ishan Chattopadhyaya)
 
+* SOLR-9832: Schema modifications are not immediately visible on the coordinating node. (Steve Rowe)
+
 Other Changes
 ----------------------
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/bf3a3137/solr/core/src/java/org/apache/solr/core/SolrCore.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/core/SolrCore.java b/solr/core/src/java/org/apache/solr/core/SolrCore.java
index e5bc53d..a459bf2 100644
--- a/solr/core/src/java/org/apache/solr/core/SolrCore.java
+++ b/solr/core/src/java/org/apache/solr/core/SolrCore.java
@@ -119,7 +119,6 @@ import org.apache.solr.schema.FieldType;
 import org.apache.solr.schema.IndexSchema;
 import org.apache.solr.schema.IndexSchemaFactory;
 import org.apache.solr.schema.ManagedIndexSchema;
-import org.apache.solr.schema.SchemaManager;
 import org.apache.solr.schema.SimilarityFactory;
 import org.apache.solr.search.QParserPlugin;
 import org.apache.solr.search.SolrFieldCacheMBean;
@@ -2720,13 +2719,6 @@ public final class SolrCore implements SolrInfoMBean, Closeable {
       if (checkStale(zkClient, overlayPath, solrConfigversion) ||
           checkStale(zkClient, solrConfigPath, overlayVersion) ||
           checkStale(zkClient, managedSchmaResourcePath, managedSchemaVersion)) {
-
-        try (SolrCore solrCore = cc.solrCores.getCoreFromAnyList(coreName, true)) {
-          solrCore.setLatestSchema(SchemaManager.getFreshManagedSchema(solrCore));
-        } catch (Exception e) {
-          log.warn("", SolrZkClient.checkInterrupted(e));
-        }
-
         log.info("core reload {}", coreName);
         try {
           cc.reload(coreName);

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/bf3a3137/solr/core/src/java/org/apache/solr/schema/ManagedIndexSchemaFactory.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/schema/ManagedIndexSchemaFactory.java b/solr/core/src/java/org/apache/solr/schema/ManagedIndexSchemaFactory.java
index 66d947e..d4a10bd 100644
--- a/solr/core/src/java/org/apache/solr/schema/ManagedIndexSchemaFactory.java
+++ b/solr/core/src/java/org/apache/solr/schema/ManagedIndexSchemaFactory.java
@@ -377,6 +377,18 @@ public class ManagedIndexSchemaFactory extends IndexSchemaFactory implements Sol
       this.zkIndexSchemaReader = new ZkIndexSchemaReader(this, core);
       ZkSolrResourceLoader zkLoader = (ZkSolrResourceLoader)loader;
       zkLoader.setZkIndexSchemaReader(this.zkIndexSchemaReader);
+      try {
+        zkIndexSchemaReader.refreshSchemaFromZk(-1); // update immediately if newer is available
+        core.setLatestSchema(getSchema());
+      } catch (KeeperException e) {
+        String msg = "Error attempting to access " + zkLoader.getConfigSetZkPath() + "/" + managedSchemaResourceName;
+        log.error(msg, e);
+        throw new SolrException(ErrorCode.SERVER_ERROR, msg, e);
+      } catch (InterruptedException e) {
+        // Restore the interrupted status
+        Thread.currentThread().interrupt();
+        log.warn("", e);
+      }
     } else {
       this.zkIndexSchemaReader = null;
     }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/bf3a3137/solr/core/src/java/org/apache/solr/schema/SchemaManager.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/schema/SchemaManager.java b/solr/core/src/java/org/apache/solr/schema/SchemaManager.java
index 4b0ea54..3340631 100644
--- a/solr/core/src/java/org/apache/solr/schema/SchemaManager.java
+++ b/solr/core/src/java/org/apache/solr/schema/SchemaManager.java
@@ -133,8 +133,8 @@ public class SchemaManager {
         try {
           int latestVersion = ZkController.persistConfigResourceToZooKeeper(zkLoader, managedIndexSchema.getSchemaZkVersion(),
               managedIndexSchema.getResourceName(), sw.toString().getBytes(StandardCharsets.UTF_8), true);
+          req.getCore().getCoreDescriptor().getCoreContainer().reload(req.getCore().getName());
           waitForOtherReplicasToUpdate(timeOut, latestVersion);
-          core.setLatestSchema(managedIndexSchema);
           return Collections.emptyList();
         } catch (ZkController.ResourceModifiedInZkException e) {
           log.info("Schema was modified by another node. Retrying..");

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/bf3a3137/solr/core/src/test/org/apache/solr/schema/ManagedSchemaRoundRobinCloudTest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/schema/ManagedSchemaRoundRobinCloudTest.java b/solr/core/src/test/org/apache/solr/schema/ManagedSchemaRoundRobinCloudTest.java
new file mode 100644
index 0000000..883ebfd
--- /dev/null
+++ b/solr/core/src/test/org/apache/solr/schema/ManagedSchemaRoundRobinCloudTest.java
@@ -0,0 +1,98 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.solr.schema;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.solr.client.solrj.SolrClient;
+import org.apache.solr.client.solrj.impl.HttpSolrClient;
+import org.apache.solr.client.solrj.request.CollectionAdminRequest;
+import org.apache.solr.client.solrj.request.schema.SchemaRequest;
+import org.apache.solr.client.solrj.response.schema.SchemaResponse;
+import org.apache.solr.cloud.SolrCloudTestCase;
+import org.apache.solr.common.cloud.DocCollection;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+public class ManagedSchemaRoundRobinCloudTest extends SolrCloudTestCase {
+  private static final String COLLECTION = "managed_coll";
+  private static final String CONFIG = "cloud-managed";
+  private static final String FIELD_PREFIX = "NumberedField_";
+  private static final int NUM_SHARDS = 2;
+  private static final int NUM_FIELDS_TO_ADD = 10;
+
+  @BeforeClass
+  public static void setupCluster() throws Exception {
+    System.setProperty("managed.schema.mutable", "true");
+    configureCluster(NUM_SHARDS).addConfig(CONFIG, configset(CONFIG)).configure();
+    CollectionAdminRequest.createCollection(COLLECTION, CONFIG, NUM_SHARDS, 1)
+        .setMaxShardsPerNode(1)
+        .process(cluster.getSolrClient());
+    cluster.getSolrClient().waitForState(COLLECTION, DEFAULT_TIMEOUT, TimeUnit.SECONDS,
+        (n, c) -> DocCollection.isFullyActive(n, c, NUM_SHARDS, 1));
+  }
+
+  @AfterClass
+  public static void clearSysProps() throws Exception {
+    System.clearProperty("managed.schema.mutable");
+  }
+
+  @Test
+  public void testAddFieldsRoundRobin() throws Exception {
+    List<HttpSolrClient> clients = new ArrayList<>(NUM_SHARDS);
+    try {
+      for (int shardNum = 0 ; shardNum < NUM_SHARDS ; ++shardNum) {
+        clients.add(getHttpSolrClient(cluster.getJettySolrRunners().get(shardNum).getBaseUrl().toString()));
+      }
+      int shardNum = 0;
+      for (int fieldNum = 0 ; fieldNum < NUM_FIELDS_TO_ADD ; ++fieldNum) {
+        addField(clients.get(shardNum), keyValueArrayToMap("name", FIELD_PREFIX + fieldNum, "type", "string"));
+        if (++shardNum == NUM_SHARDS) { 
+          shardNum = 0;
+        }
+      }
+    } finally {
+      for (int shardNum = 0 ; shardNum < NUM_SHARDS ; ++shardNum) {
+        clients.get(shardNum).close();
+      }
+    }
+  }
+
+  private void addField(SolrClient client, Map<String,Object> field) throws Exception {
+    SchemaResponse.UpdateResponse addFieldResponse = new SchemaRequest.AddField(field).process(client, COLLECTION);
+    assertNotNull(addFieldResponse);
+    assertEquals(0, addFieldResponse.getStatus());
+    assertNull(addFieldResponse.getResponse().get("errors"));
+    String fieldName = field.get("name").toString();
+    SchemaResponse.FieldResponse fieldResponse = new SchemaRequest.Field(fieldName).process(client, COLLECTION);
+    assertNotNull(fieldResponse);
+    assertEquals(0, fieldResponse.getStatus());
+  }
+
+  private Map<String,Object> keyValueArrayToMap(String... alternatingKeysAndValues) {
+    Map<String,Object> map = new HashMap<>();
+    for (int i = 0 ; i < alternatingKeysAndValues.length ; i += 2)
+      map.put(alternatingKeysAndValues[i], alternatingKeysAndValues[i + 1]);
+    return map;
+  }
+}

[25/50] [abbrv] lucene-solr:jira/solr-8593: LUCENE-7583: move this class to the right package

Posted by kr...@apache.org.

LUCENE-7583: move this class to the right package


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/c1856175
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/c1856175
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/c1856175

Branch: refs/heads/jira/solr-8593
Commit: c185617582b4bf3ce2899c9ae67e9eeaf2c21741
Parents: 1d2e440
Author: Mike McCandless <mi...@apache.org>
Authored: Thu Dec 8 18:34:51 2016 -0500
Committer: Mike McCandless <mi...@apache.org>
Committed: Thu Dec 8 18:34:51 2016 -0500

----------------------------------------------------------------------
 .../store/GrowableByteArrayDataOutput.java      | 103 +++++++++++++++++++
 .../util/GrowableByteArrayDataOutput.java       | 103 -------------------
 2 files changed, 103 insertions(+), 103 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/c1856175/lucene/core/src/java/org/apache/lucene/store/GrowableByteArrayDataOutput.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/store/GrowableByteArrayDataOutput.java b/lucene/core/src/java/org/apache/lucene/store/GrowableByteArrayDataOutput.java
new file mode 100644
index 0000000..5f00d4a
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/store/GrowableByteArrayDataOutput.java
@@ -0,0 +1,103 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.store;
+
+import java.io.IOException;
+
+import org.apache.lucene.store.DataOutput;
+import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.UnicodeUtil;
+
+/**
+ * A {@link DataOutput} that can be used to build a byte[].
+ *
+ * @lucene.internal
+ */
+public final class GrowableByteArrayDataOutput extends DataOutput {
+
+  /** Minimum utf8 byte size of a string over which double pass over string is to save memory during encode */
+  static final int MIN_UTF8_SIZE_TO_ENABLE_DOUBLE_PASS_ENCODING = 65536;
+
+  /** The bytes */
+  private byte[] bytes;
+
+  /** The length */
+  private int length;
+
+  // scratch for utf8 encoding of small strings
+  private byte[] scratchBytes;
+
+  /** Create a {@link GrowableByteArrayDataOutput} with the given initial capacity. */
+  public GrowableByteArrayDataOutput(int cp) {
+    this.bytes = new byte[ArrayUtil.oversize(cp, 1)];
+    this.length = 0;
+  }
+
+  @Override
+  public void writeByte(byte b) {
+    if (length >= bytes.length) {
+      bytes = ArrayUtil.grow(bytes);
+    }
+    bytes[length++] = b;
+  }
+
+  @Override
+  public void writeBytes(byte[] b, int off, int len) {
+    final int newLength = length + len;
+    if (newLength > bytes.length) {
+      bytes = ArrayUtil.grow(bytes, newLength);
+    }
+    System.arraycopy(b, off, bytes, length, len);
+    length = newLength;
+  }
+
+  @Override
+  public void writeString(String string) throws IOException {
+    int maxLen = UnicodeUtil.maxUTF8Length(string.length());
+    if (maxLen <= MIN_UTF8_SIZE_TO_ENABLE_DOUBLE_PASS_ENCODING)  {
+      // string is small enough that we don't need to save memory by falling back to double-pass approach
+      // this is just an optimized writeString() that re-uses scratchBytes.
+      if (scratchBytes == null) {
+        scratchBytes = new byte[ArrayUtil.oversize(maxLen, Character.BYTES)];
+      } else {
+        scratchBytes = ArrayUtil.grow(scratchBytes, maxLen);
+      }
+      int len = UnicodeUtil.UTF16toUTF8(string, 0, string.length(), scratchBytes);
+      writeVInt(len);
+      writeBytes(scratchBytes, len);
+    } else  {
+      // use a double pass approach to avoid allocating a large intermediate buffer for string encoding
+      int numBytes = UnicodeUtil.calcUTF16toUTF8Length(string, 0, string.length());
+      writeVInt(numBytes);
+      bytes = ArrayUtil.grow(bytes, length + numBytes);
+      length = UnicodeUtil.UTF16toUTF8(string, 0, string.length(), bytes, length);
+    }
+  }
+
+  public byte[] getBytes() {
+    return bytes;
+  }
+
+  public int getPosition() {
+    return length;
+  }
+
+  public void reset() {
+    length = 0;
+  }
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/c1856175/lucene/core/src/java/org/apache/lucene/util/GrowableByteArrayDataOutput.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/util/GrowableByteArrayDataOutput.java b/lucene/core/src/java/org/apache/lucene/util/GrowableByteArrayDataOutput.java
deleted file mode 100644
index 5f00d4a..0000000
--- a/lucene/core/src/java/org/apache/lucene/util/GrowableByteArrayDataOutput.java
+++ /dev/null
@@ -1,103 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.lucene.store;
-
-import java.io.IOException;
-
-import org.apache.lucene.store.DataOutput;
-import org.apache.lucene.util.ArrayUtil;
-import org.apache.lucene.util.UnicodeUtil;
-
-/**
- * A {@link DataOutput} that can be used to build a byte[].
- *
- * @lucene.internal
- */
-public final class GrowableByteArrayDataOutput extends DataOutput {
-
-  /** Minimum utf8 byte size of a string over which double pass over string is to save memory during encode */
-  static final int MIN_UTF8_SIZE_TO_ENABLE_DOUBLE_PASS_ENCODING = 65536;
-
-  /** The bytes */
-  private byte[] bytes;
-
-  /** The length */
-  private int length;
-
-  // scratch for utf8 encoding of small strings
-  private byte[] scratchBytes;
-
-  /** Create a {@link GrowableByteArrayDataOutput} with the given initial capacity. */
-  public GrowableByteArrayDataOutput(int cp) {
-    this.bytes = new byte[ArrayUtil.oversize(cp, 1)];
-    this.length = 0;
-  }
-
-  @Override
-  public void writeByte(byte b) {
-    if (length >= bytes.length) {
-      bytes = ArrayUtil.grow(bytes);
-    }
-    bytes[length++] = b;
-  }
-
-  @Override
-  public void writeBytes(byte[] b, int off, int len) {
-    final int newLength = length + len;
-    if (newLength > bytes.length) {
-      bytes = ArrayUtil.grow(bytes, newLength);
-    }
-    System.arraycopy(b, off, bytes, length, len);
-    length = newLength;
-  }
-
-  @Override
-  public void writeString(String string) throws IOException {
-    int maxLen = UnicodeUtil.maxUTF8Length(string.length());
-    if (maxLen <= MIN_UTF8_SIZE_TO_ENABLE_DOUBLE_PASS_ENCODING)  {
-      // string is small enough that we don't need to save memory by falling back to double-pass approach
-      // this is just an optimized writeString() that re-uses scratchBytes.
-      if (scratchBytes == null) {
-        scratchBytes = new byte[ArrayUtil.oversize(maxLen, Character.BYTES)];
-      } else {
-        scratchBytes = ArrayUtil.grow(scratchBytes, maxLen);
-      }
-      int len = UnicodeUtil.UTF16toUTF8(string, 0, string.length(), scratchBytes);
-      writeVInt(len);
-      writeBytes(scratchBytes, len);
-    } else  {
-      // use a double pass approach to avoid allocating a large intermediate buffer for string encoding
-      int numBytes = UnicodeUtil.calcUTF16toUTF8Length(string, 0, string.length());
-      writeVInt(numBytes);
-      bytes = ArrayUtil.grow(bytes, length + numBytes);
-      length = UnicodeUtil.UTF16toUTF8(string, 0, string.length(), bytes, length);
-    }
-  }
-
-  public byte[] getBytes() {
-    return bytes;
-  }
-
-  public int getPosition() {
-    return length;
-  }
-
-  public void reset() {
-    length = 0;
-  }
-}

[35/50] [abbrv] lucene-solr:jira/solr-8593: SOLR:9823: CoreContainer incorrectly setting MDCLoggingContext for core

Posted by kr...@apache.org.

SOLR:9823: CoreContainer incorrectly setting MDCLoggingContext for core


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/8c79ab26
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/8c79ab26
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/8c79ab26

Branch: refs/heads/jira/solr-8593
Commit: 8c79ab2649437c8c7ca275f6481c058c67626660
Parents: fecbbe0
Author: Erick <er...@apache.org>
Authored: Mon Dec 12 18:43:30 2016 -0800
Committer: Erick <er...@apache.org>
Committed: Mon Dec 12 18:43:30 2016 -0800

----------------------------------------------------------------------
 solr/CHANGES.txt                                           | 2 ++
 solr/core/src/java/org/apache/solr/core/CoreContainer.java | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/8c79ab26/solr/CHANGES.txt
----------------------------------------------------------------------
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index a8a3f97..41af0ff 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -242,6 +242,8 @@ Bug Fixes
 
 * SOLR-9707: Don't forward DeleteByQuery requests to down replicas. (Jessica Cheng Mallet via Varun Thacker)
 
+* SOLR-9823: CoreContainer incorrectly setting MDCLoggingContext for core (Jessica Cheng Mallet via Erick Erickson)
+
 Other Changes
 ----------------------
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/8c79ab26/solr/core/src/java/org/apache/solr/core/CoreContainer.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/core/CoreContainer.java b/solr/core/src/java/org/apache/solr/core/CoreContainer.java
index ad4560e..7c38b81 100644
--- a/solr/core/src/java/org/apache/solr/core/CoreContainer.java
+++ b/solr/core/src/java/org/apache/solr/core/CoreContainer.java
@@ -857,7 +857,7 @@ public class CoreContainer {
 
     SolrCore core = null;
     try {
-      MDCLoggingContext.setCore(core);
+      MDCLoggingContext.setCoreDescriptor(dcore);
       SolrIdentifierValidator.validateCoreName(dcore.getName());
       if (zkSys.getZkController() != null) {
         zkSys.getZkController().preRegister(dcore);

[31/50] [abbrv] lucene-solr:jira/solr-8593: Add .pydevproject to .gitignore

Posted by kr...@apache.org.

Add .pydevproject to .gitignore


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/25c7855b
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/25c7855b
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/25c7855b

Branch: refs/heads/jira/solr-8593
Commit: 25c7855bbae4eaa8700e72d094442811f0e8e1d9
Parents: d75abe1
Author: Shai Erera <sh...@apache.org>
Authored: Sun Dec 11 13:08:33 2016 +0200
Committer: Shai Erera <sh...@apache.org>
Committed: Sun Dec 11 13:08:33 2016 +0200

----------------------------------------------------------------------
 .gitignore | 1 +
 1 file changed, 1 insertion(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/25c7855b/.gitignore
----------------------------------------------------------------------
diff --git a/.gitignore b/.gitignore
index 8091ecd..625cfa9 100644
--- a/.gitignore
+++ b/.gitignore
@@ -25,6 +25,7 @@ parent.iml
 **/pom.xml
 /nbproject
 /nb-build
+.pydevproject
 
 /solr/package

[47/50] [abbrv] lucene-solr:jira/solr-8593: remove bad assertion

Posted by kr...@apache.org.

remove bad assertion


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/268d4ace
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/268d4ace
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/268d4ace

Branch: refs/heads/jira/solr-8593
Commit: 268d4ace3695ad3738402d623400fa4775b113ef
Parents: 295cab7
Author: Mike McCandless <mi...@apache.org>
Authored: Thu Dec 15 09:23:48 2016 -0500
Committer: Mike McCandless <mi...@apache.org>
Committed: Thu Dec 15 09:30:25 2016 -0500

----------------------------------------------------------------------
 .../src/java/org/apache/lucene/search/QueryUtils.java            | 4 ----
 1 file changed, 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/268d4ace/lucene/test-framework/src/java/org/apache/lucene/search/QueryUtils.java
----------------------------------------------------------------------
diff --git a/lucene/test-framework/src/java/org/apache/lucene/search/QueryUtils.java b/lucene/test-framework/src/java/org/apache/lucene/search/QueryUtils.java
index a3eaa80..ae4c890 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/search/QueryUtils.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/search/QueryUtils.java
@@ -93,10 +93,6 @@ public class QueryUtils {
   public static void checkUnequal(Query q1, Query q2) {
     assertFalse(q1 + " equal to " + q2, q1.equals(q2));
     assertFalse(q2 + " equal to " + q1, q2.equals(q1));
-
-    // possible this test can fail on a hash collision... if that
-    // happens, please change test to use a different example.
-    assertTrue(q1.hashCode() != q2.hashCode());
   }
 
   /** deep check that explanations of a query 'score' correctly */