You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by yo...@apache.org on 2019/08/03 16:51:18 UTC
[lucene-solr] branch master updated: SOLR-13399: fix splitByPrefix test

This is an automated email from the ASF dual-hosted git repository.

yonik pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git


The following commit(s) were added to refs/heads/master by this push:
     new b6c26f6  SOLR-13399: fix splitByPrefix test
b6c26f6 is described below

commit b6c26f6c16130fd7ec9216b4f8798dc22aacb534
Author: yonik <yo...@apache.org>
AuthorDate: Sat Aug 3 12:52:35 2019 -0400

    SOLR-13399: fix splitByPrefix test
---
 .../org/apache/solr/handler/admin/SplitOp.java     | 22 +++++++++++++++++++++-
 .../cloud/api/collections/SplitByPrefixTest.java   |  3 +--
 2 files changed, 22 insertions(+), 3 deletions(-)

diff --git a/solr/core/src/java/org/apache/solr/handler/admin/SplitOp.java b/solr/core/src/java/org/apache/solr/handler/admin/SplitOp.java
index 512bdff..a37708f 100644
--- a/solr/core/src/java/org/apache/solr/handler/admin/SplitOp.java
+++ b/solr/core/src/java/org/apache/solr/handler/admin/SplitOp.java
@@ -47,6 +47,7 @@ import org.apache.solr.request.SolrQueryRequest;
 import org.apache.solr.search.SolrIndexSearcher;
 import org.apache.solr.update.SolrIndexSplitter;
 import org.apache.solr.update.SplitIndexCommand;
+import org.apache.solr.util.RTimer;
 import org.apache.solr.util.RefCounted;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -263,6 +264,11 @@ class SplitOp implements CoreAdminHandler.CoreAdminOp {
     public int compareTo(RangeCount o) {
       return this.range.compareTo(o.range);
     }
+
+    @Override
+    public String toString() {
+      return range.toString() + "=" + count;
+    }
   }
 
 
@@ -286,6 +292,7 @@ class SplitOp implements CoreAdminHandler.CoreAdminOp {
 
   // Returns a list of range counts sorted by the range lower bound
   static Collection<RangeCount> getHashHistogram(SolrIndexSearcher searcher, String prefixField, DocRouter router, DocCollection collection) throws IOException {
+    RTimer timer = new RTimer();
     TreeMap<DocRouter.Range,RangeCount> counts = new TreeMap<>();
 
     Terms terms = MultiTerms.getTerms(searcher.getIndexReader(), prefixField);
@@ -293,19 +300,30 @@ class SplitOp implements CoreAdminHandler.CoreAdminOp {
       return counts.values();
     }
 
+    int numPrefixes = 0;
+    int numTriLevel = 0;
+    int numCollisions = 0;
+    long sumBuckets = 0;
+
     TermsEnum termsEnum = terms.iterator();
     for (;;) {
       BytesRef term = termsEnum.next();
       if (term == null) break;
+      numPrefixes++;
 
       String termStr = term.utf8ToString();
       int firstSep = termStr.indexOf(CompositeIdRouter.SEPARATOR);
       // truncate to first separator since we don't support multiple levels currently
+      // NOTE: this does not currently work for tri-level composite ids since the number of bits allocated to the first ID is 16 for a 2 part id
+      // and 8 for a 3 part id!
       if (firstSep != termStr.length()-1 && firstSep > 0) {
+        numTriLevel++;
         termStr = termStr.substring(0, firstSep+1);
       }
+
       DocRouter.Range range = router.getSearchRangeSingle(termStr, null, collection);
       int numDocs = termsEnum.docFreq();
+      sumBuckets += numDocs;
 
       RangeCount rangeCount = new RangeCount(range, numDocs);
 
@@ -313,16 +331,18 @@ class SplitOp implements CoreAdminHandler.CoreAdminOp {
       if (prev != null) {
         // we hit a hash collision or truncated a prefix to first level, so add the buckets together.
         rangeCount.count += prev.count;
+        numCollisions++;
       }
     }
 
+    log.info("Split histogram: ms={}, numBuckets={} sumBuckets={} numPrefixes={} numTriLevel={} numCollisions={}", timer.getTime(), counts.size(), sumBuckets, numPrefixes, numTriLevel, numCollisions);
+
     return counts.values();
   }
 
 
   // returns the list of recommended splits, or null if there is not enough information
   static Collection<DocRouter.Range> getSplits(Collection<RangeCount> rawCounts, DocRouter.Range currentRange) throws Exception {
-
     int totalCount = 0;
     RangeCount biggest = null; // keep track of the largest in case we need to split it out into it's own shard
     RangeCount last = null;  // keep track of what the last range is
diff --git a/solr/core/src/test/org/apache/solr/cloud/api/collections/SplitByPrefixTest.java b/solr/core/src/test/org/apache/solr/cloud/api/collections/SplitByPrefixTest.java
index cc7686e..f3ef230 100644
--- a/solr/core/src/test/org/apache/solr/cloud/api/collections/SplitByPrefixTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/api/collections/SplitByPrefixTest.java
@@ -24,7 +24,6 @@ import java.util.Collection;
 import java.util.Collections;
 import java.util.List;
 
-import org.apache.lucene.util.LuceneTestCase.BadApple;
 import org.apache.solr.client.solrj.SolrServerException;
 import org.apache.solr.client.solrj.impl.CloudSolrClient;
 import org.apache.solr.client.solrj.request.CollectionAdminRequest;
@@ -45,7 +44,6 @@ import org.slf4j.LoggerFactory;
 // This class tests higher level SPLITSHARD functionality when splitByPrefix is specified.
 // See SplitHandlerTest for random tests of lower-level split selection logic.
 //
-@BadApple(bugUrl="https://issues.apache.org/jira/browse/SOLR-13399")
 public class SplitByPrefixTest extends SolrCloudTestCase {
   private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
 
@@ -133,6 +131,7 @@ public class SplitByPrefixTest extends SolrCloudTestCase {
   SolrInputDocument getDoc(String prefix, String unique) {
     String secondLevel = "";
     if (random().nextBoolean()) {
+      prefix = prefix.substring(0, prefix.length()-1) + "/16!";  // change "foo!" into "foo/16!" to match 2 level compositeId
       secondLevel="" + random().nextInt(2) + "!";
     }
     return sdoc("id", prefix + secondLevel + unique);