You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by an...@apache.org on 2015/04/26 02:06:08 UTC

svn commit: r1676075 - in /lucene/dev/trunk/solr: CHANGES.txt core/src/java/org/apache/solr/update/SolrIndexSplitter.java

Author: andyetitmoves
Date: Sun Apr 26 00:06:08 2015
New Revision: 1676075

URL: http://svn.apache.org/r1676075
Log:
SOLR-5213: Log when shard splitting unexpectedly leads to documents going to zero or multiple sub-shards

Modified:
    lucene/dev/trunk/solr/CHANGES.txt
    lucene/dev/trunk/solr/core/src/java/org/apache/solr/update/SolrIndexSplitter.java

Modified: lucene/dev/trunk/solr/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/CHANGES.txt?rev=1676075&r1=1676074&r2=1676075&view=diff
==============================================================================
--- lucene/dev/trunk/solr/CHANGES.txt (original)
+++ lucene/dev/trunk/solr/CHANGES.txt Sun Apr 26 00:06:08 2015
@@ -228,6 +228,9 @@ Other Changes
 * SOLR-7391: Use a time based expiration cache for one off HDFS FileSystem instances.
   (Mark Miller)
 
+* SOLR-5213: Log when shard splitting unexpectedly leads to documents going to
+  no or multiple shards (Christine Poerschke, Ramkumar Aiyengar)
+
 ==================  5.1.0 ==================
 
 Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this release

Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/update/SolrIndexSplitter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/update/SolrIndexSplitter.java?rev=1676075&r1=1676074&r2=1676075&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/update/SolrIndexSplitter.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/update/SolrIndexSplitter.java Sun Apr 26 00:06:08 2015
@@ -168,6 +168,12 @@ public class SolrIndexSplitter {
     BytesRef term = null;
     PostingsEnum postingsEnum = null;
 
+    int[] docsMatchingRanges = null;
+    if (ranges != null) {
+      // +1 because documents can belong to *zero*, one, several or all ranges in rangesArr
+      docsMatchingRanges = new int[rangesArr.length+1];
+    }
+
     CharsRefBuilder idRef = new CharsRefBuilder();
     for (;;) {
       term = termsEnum.next();
@@ -203,11 +209,37 @@ public class SolrIndexSplitter {
           docSets[currPartition].set(doc);
           currPartition = (currPartition + 1) % numPieces;
         } else  {
+          int matchingRangesCount = 0;
           for (int i=0; i<rangesArr.length; i++) {      // inner-loop: use array here for extra speed.
             if (rangesArr[i].includes(hash)) {
               docSets[i].set(doc);
+              ++matchingRangesCount;
             }
           }
+          docsMatchingRanges[matchingRangesCount]++;
+        }
+      }
+    }
+
+    if (docsMatchingRanges != null) {
+      for (int ii = 0; ii < docsMatchingRanges.length; ii++) {
+        if (0 == docsMatchingRanges[ii]) continue;
+        switch (ii) {
+          case 0:
+            // document loss
+            log.error("Splitting {}: {} documents belong to no shards and will be dropped",
+                reader, docsMatchingRanges[ii]);
+            break;
+          case 1:
+            // normal case, each document moves to one of the sub-shards
+            log.info("Splitting {}: {} documents will move into a sub-shard",
+                reader, docsMatchingRanges[ii]);
+            break;
+          default:
+            // document duplication
+            log.error("Splitting {}: {} documents will be moved to multiple ({}) sub-shards",
+                reader, docsMatchingRanges[ii], ii);
+            break;
         }
       }
     }