You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by sh...@apache.org on 2013/05/10 09:01:30 UTC

svn commit: r1480897 - in /lucene/dev/branches/lucene_solr_4_3: ./ dev-tools/ lucene/ lucene/analysis/ lucene/analysis/icu/src/java/org/apache/lucene/collation/ lucene/backwards/ lucene/benchmark/ lucene/classification/ lucene/classification/src/ lucen...

Author: shalin
Date: Fri May 10 07:01:28 2013
New Revision: 1480897

URL: http://svn.apache.org/r1480897
Log:
SOLR-4798: use correct router during index splitting

Modified:
    lucene/dev/branches/lucene_solr_4_3/   (props changed)
    lucene/dev/branches/lucene_solr_4_3/dev-tools/   (props changed)
    lucene/dev/branches/lucene_solr_4_3/lucene/   (props changed)
    lucene/dev/branches/lucene_solr_4_3/lucene/BUILD.txt   (props changed)
    lucene/dev/branches/lucene_solr_4_3/lucene/CHANGES.txt   (props changed)
    lucene/dev/branches/lucene_solr_4_3/lucene/JRE_VERSION_MIGRATION.txt   (props changed)
    lucene/dev/branches/lucene_solr_4_3/lucene/LICENSE.txt   (props changed)
    lucene/dev/branches/lucene_solr_4_3/lucene/MIGRATE.txt   (props changed)
    lucene/dev/branches/lucene_solr_4_3/lucene/NOTICE.txt   (props changed)
    lucene/dev/branches/lucene_solr_4_3/lucene/README.txt   (props changed)
    lucene/dev/branches/lucene_solr_4_3/lucene/SYSTEM_REQUIREMENTS.txt   (props changed)
    lucene/dev/branches/lucene_solr_4_3/lucene/analysis/   (props changed)
    lucene/dev/branches/lucene_solr_4_3/lucene/analysis/icu/src/java/org/apache/lucene/collation/ICUCollationKeyFilterFactory.java   (props changed)
    lucene/dev/branches/lucene_solr_4_3/lucene/backwards/   (props changed)
    lucene/dev/branches/lucene_solr_4_3/lucene/benchmark/   (props changed)
    lucene/dev/branches/lucene_solr_4_3/lucene/build.xml   (props changed)
    lucene/dev/branches/lucene_solr_4_3/lucene/classification/   (props changed)
    lucene/dev/branches/lucene_solr_4_3/lucene/classification/build.xml   (props changed)
    lucene/dev/branches/lucene_solr_4_3/lucene/classification/ivy.xml   (props changed)
    lucene/dev/branches/lucene_solr_4_3/lucene/classification/src/   (props changed)
    lucene/dev/branches/lucene_solr_4_3/lucene/codecs/   (props changed)
    lucene/dev/branches/lucene_solr_4_3/lucene/common-build.xml   (props changed)
    lucene/dev/branches/lucene_solr_4_3/lucene/core/   (props changed)
    lucene/dev/branches/lucene_solr_4_3/lucene/core/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java   (props changed)
    lucene/dev/branches/lucene_solr_4_3/lucene/core/src/test/org/apache/lucene/index/index.40.cfs.zip   (props changed)
    lucene/dev/branches/lucene_solr_4_3/lucene/core/src/test/org/apache/lucene/index/index.40.nocfs.zip   (props changed)
    lucene/dev/branches/lucene_solr_4_3/lucene/core/src/test/org/apache/lucene/index/index.40.optimized.cfs.zip   (props changed)
    lucene/dev/branches/lucene_solr_4_3/lucene/core/src/test/org/apache/lucene/index/index.40.optimized.nocfs.zip   (props changed)
    lucene/dev/branches/lucene_solr_4_3/lucene/core/src/test/org/apache/lucene/search/TestSort.java   (props changed)
    lucene/dev/branches/lucene_solr_4_3/lucene/core/src/test/org/apache/lucene/search/TestSortDocValues.java   (props changed)
    lucene/dev/branches/lucene_solr_4_3/lucene/core/src/test/org/apache/lucene/search/TestSortRandom.java   (props changed)
    lucene/dev/branches/lucene_solr_4_3/lucene/core/src/test/org/apache/lucene/search/TestTopFieldCollector.java   (props changed)
    lucene/dev/branches/lucene_solr_4_3/lucene/core/src/test/org/apache/lucene/search/TestTotalHitCountCollector.java   (props changed)
    lucene/dev/branches/lucene_solr_4_3/lucene/demo/   (props changed)
    lucene/dev/branches/lucene_solr_4_3/lucene/facet/   (props changed)
    lucene/dev/branches/lucene_solr_4_3/lucene/grouping/   (props changed)
    lucene/dev/branches/lucene_solr_4_3/lucene/highlighter/   (props changed)
    lucene/dev/branches/lucene_solr_4_3/lucene/ivy-settings.xml   (props changed)
    lucene/dev/branches/lucene_solr_4_3/lucene/join/   (props changed)
    lucene/dev/branches/lucene_solr_4_3/lucene/licenses/   (props changed)
    lucene/dev/branches/lucene_solr_4_3/lucene/memory/   (props changed)
    lucene/dev/branches/lucene_solr_4_3/lucene/misc/   (props changed)
    lucene/dev/branches/lucene_solr_4_3/lucene/module-build.xml   (props changed)
    lucene/dev/branches/lucene_solr_4_3/lucene/queries/   (props changed)
    lucene/dev/branches/lucene_solr_4_3/lucene/queries/src/test/org/apache/lucene/queries/function/TestFunctionQuerySort.java   (props changed)
    lucene/dev/branches/lucene_solr_4_3/lucene/queryparser/   (props changed)
    lucene/dev/branches/lucene_solr_4_3/lucene/sandbox/   (props changed)
    lucene/dev/branches/lucene_solr_4_3/lucene/site/   (props changed)
    lucene/dev/branches/lucene_solr_4_3/lucene/spatial/   (props changed)
    lucene/dev/branches/lucene_solr_4_3/lucene/suggest/   (props changed)
    lucene/dev/branches/lucene_solr_4_3/lucene/test-framework/   (props changed)
    lucene/dev/branches/lucene_solr_4_3/lucene/tools/   (props changed)
    lucene/dev/branches/lucene_solr_4_3/solr/   (props changed)
    lucene/dev/branches/lucene_solr_4_3/solr/CHANGES.txt   (contents, props changed)
    lucene/dev/branches/lucene_solr_4_3/solr/LICENSE.txt   (props changed)
    lucene/dev/branches/lucene_solr_4_3/solr/NOTICE.txt   (props changed)
    lucene/dev/branches/lucene_solr_4_3/solr/README.txt   (props changed)
    lucene/dev/branches/lucene_solr_4_3/solr/SYSTEM_REQUIREMENTS.txt   (props changed)
    lucene/dev/branches/lucene_solr_4_3/solr/build.xml   (props changed)
    lucene/dev/branches/lucene_solr_4_3/solr/cloud-dev/   (props changed)
    lucene/dev/branches/lucene_solr_4_3/solr/common-build.xml   (props changed)
    lucene/dev/branches/lucene_solr_4_3/solr/contrib/   (props changed)
    lucene/dev/branches/lucene_solr_4_3/solr/core/   (props changed)
    lucene/dev/branches/lucene_solr_4_3/solr/core/src/java/org/apache/solr/handler/admin/CoreAdminHandler.java
    lucene/dev/branches/lucene_solr_4_3/solr/core/src/java/org/apache/solr/update/SolrIndexSplitter.java
    lucene/dev/branches/lucene_solr_4_3/solr/core/src/java/org/apache/solr/update/SplitIndexCommand.java
    lucene/dev/branches/lucene_solr_4_3/solr/core/src/test/org/apache/solr/core/TestConfig.java   (props changed)
    lucene/dev/branches/lucene_solr_4_3/solr/core/src/test/org/apache/solr/update/SolrIndexSplitterTest.java
    lucene/dev/branches/lucene_solr_4_3/solr/example/   (props changed)
    lucene/dev/branches/lucene_solr_4_3/solr/licenses/   (props changed)
    lucene/dev/branches/lucene_solr_4_3/solr/licenses/httpclient-LICENSE-ASL.txt   (props changed)
    lucene/dev/branches/lucene_solr_4_3/solr/licenses/httpclient-NOTICE.txt   (props changed)
    lucene/dev/branches/lucene_solr_4_3/solr/licenses/httpcore-LICENSE-ASL.txt   (props changed)
    lucene/dev/branches/lucene_solr_4_3/solr/licenses/httpcore-NOTICE.txt   (props changed)
    lucene/dev/branches/lucene_solr_4_3/solr/licenses/httpmime-LICENSE-ASL.txt   (props changed)
    lucene/dev/branches/lucene_solr_4_3/solr/licenses/httpmime-NOTICE.txt   (props changed)
    lucene/dev/branches/lucene_solr_4_3/solr/scripts/   (props changed)
    lucene/dev/branches/lucene_solr_4_3/solr/site/   (props changed)
    lucene/dev/branches/lucene_solr_4_3/solr/solrj/   (props changed)
    lucene/dev/branches/lucene_solr_4_3/solr/solrj/src/java/org/apache/solr/common/cloud/CompositeIdRouter.java
    lucene/dev/branches/lucene_solr_4_3/solr/solrj/src/java/org/apache/solr/common/cloud/HashBasedRouter.java
    lucene/dev/branches/lucene_solr_4_3/solr/test-framework/   (props changed)
    lucene/dev/branches/lucene_solr_4_3/solr/webapp/   (props changed)

Modified: lucene/dev/branches/lucene_solr_4_3/solr/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene_solr_4_3/solr/CHANGES.txt?rev=1480897&r1=1480896&r2=1480897&view=diff
==============================================================================
--- lucene/dev/branches/lucene_solr_4_3/solr/CHANGES.txt (original)
+++ lucene/dev/branches/lucene_solr_4_3/solr/CHANGES.txt Fri May 10 07:01:28 2013
@@ -40,6 +40,13 @@ Bug Fixes
 * SOLR-4795: Sub shard leader should not accept any updates from parent after
   it goes active (shalin)
 
+* SOLR-4798: shard splitting does not respect the router for the collection
+  when executing the index split.   One effect of this is that documents
+  may be placed in the wrong shard when the default compositeId router
+  is used in conjunction with IDs containing "!". (yonik)
+
+
+
 Other Changes
 ----------------------
 

Modified: lucene/dev/branches/lucene_solr_4_3/solr/core/src/java/org/apache/solr/handler/admin/CoreAdminHandler.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene_solr_4_3/solr/core/src/java/org/apache/solr/handler/admin/CoreAdminHandler.java?rev=1480897&r1=1480896&r2=1480897&view=diff
==============================================================================
--- lucene/dev/branches/lucene_solr_4_3/solr/core/src/java/org/apache/solr/handler/admin/CoreAdminHandler.java (original)
+++ lucene/dev/branches/lucene_solr_4_3/solr/core/src/java/org/apache/solr/handler/admin/CoreAdminHandler.java Fri May 10 07:01:28 2013
@@ -252,6 +252,7 @@ public class CoreAdminHandler extends Re
       List<String> paths = null;
       int partitions = pathsArr != null ? pathsArr.length : newCoreNames.length;
 
+      DocRouter router = null;
       if (coreContainer.isZooKeeperAware()) {
         ClusterState clusterState = coreContainer.getZkController().getClusterState();
         String collectionName = req.getCore().getCoreDescriptor().getCloudDescriptor().getCollectionName();
@@ -259,8 +260,8 @@ public class CoreAdminHandler extends Re
         String sliceName = req.getCore().getCoreDescriptor().getCloudDescriptor().getShardId();
         Slice slice = clusterState.getSlice(collectionName, sliceName);
         DocRouter.Range currentRange = slice.getRange();
-        DocRouter hp = collection.getRouter() != null ? collection.getRouter() : DocRouter.DEFAULT;
-        ranges = currentRange != null ? hp.partitionRange(partitions, currentRange) : null;
+        router = collection.getRouter() != null ? collection.getRouter() : DocRouter.DEFAULT;
+        ranges = currentRange != null ? router.partitionRange(partitions, currentRange) : null;
       }
 
       if (pathsArr == null) {
@@ -278,7 +279,7 @@ public class CoreAdminHandler extends Re
       }
 
 
-      SplitIndexCommand cmd = new SplitIndexCommand(req, paths, newCores, ranges);
+      SplitIndexCommand cmd = new SplitIndexCommand(req, paths, newCores, ranges, router);
       core.getUpdateHandler().split(cmd);
 
       // After the split has completed, someone (here?) should start the process of replaying the buffered updates.

Modified: lucene/dev/branches/lucene_solr_4_3/solr/core/src/java/org/apache/solr/update/SolrIndexSplitter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene_solr_4_3/solr/core/src/java/org/apache/solr/update/SolrIndexSplitter.java?rev=1480897&r1=1480896&r2=1480897&view=diff
==============================================================================
--- lucene/dev/branches/lucene_solr_4_3/solr/core/src/java/org/apache/solr/update/SolrIndexSplitter.java (original)
+++ lucene/dev/branches/lucene_solr_4_3/solr/core/src/java/org/apache/solr/update/SolrIndexSplitter.java Fri May 10 07:01:28 2013
@@ -32,9 +32,11 @@ import org.apache.lucene.util.CharsRef;
 import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.OpenBitSet;
 import org.apache.solr.common.cloud.DocRouter;
+import org.apache.solr.common.cloud.HashBasedRouter;
 import org.apache.solr.common.util.Hash;
 import org.apache.solr.core.SolrCore;
 import org.apache.solr.schema.SchemaField;
+import org.apache.solr.schema.StrField;
 import org.apache.solr.search.SolrIndexSearcher;
 import org.apache.solr.util.RefCounted;
 import org.slf4j.Logger;
@@ -53,6 +55,8 @@ public class SolrIndexSplitter {
   DocRouter.Range[] rangesArr; // same as ranges list, but an array for extra speed in inner loops
   List<String> paths;
   List<SolrCore> cores;
+  DocRouter router;
+  HashBasedRouter hashRouter;
   int numPieces;
   int currPartition = 0;
 
@@ -62,6 +66,9 @@ public class SolrIndexSplitter {
     ranges = cmd.ranges;
     paths = cmd.paths;
     cores = cmd.cores;
+    router = cmd.router;
+    hashRouter = router instanceof HashBasedRouter ? (HashBasedRouter)router : null;
+
     if (ranges == null) {
       numPieces =  paths != null ? paths.size() : cores.size();
     } else  {
@@ -151,16 +158,24 @@ public class SolrIndexSplitter {
     BytesRef term = null;
     DocsEnum docsEnum = null;
 
+    CharsRef idRef = new CharsRef(100);
     for (;;) {
       term = termsEnum.next();
       if (term == null) break;
 
       // figure out the hash for the term
-      // TODO: hook in custom hashes (or store hashes)
-      // TODO: performance implications of using indexedToReadable?
-      CharsRef ref = new CharsRef(term.length);
-      ref = field.getType().indexedToReadable(term, ref);
-      int hash = Hash.murmurhash3_x86_32(ref, ref.offset, ref.length, 0);
+
+      // FUTURE: if conversion to strings costs too much, we could
+      // specialize and use the hash function that can work over bytes.
+      idRef = field.getType().indexedToReadable(term, idRef);
+      String idString = idRef.toString();
+
+      int hash = 0;
+      if (hashRouter != null) {
+        hash = hashRouter.sliceHash(idString, null, null);
+      }
+      // int hash = Hash.murmurhash3_x86_32(ref, ref.offset, ref.length, 0);
+
       docsEnum = termsEnum.docs(liveDocs, docsEnum, DocsEnum.FLAG_NONE);
       for (;;) {
         int doc = docsEnum.nextDoc();

Modified: lucene/dev/branches/lucene_solr_4_3/solr/core/src/java/org/apache/solr/update/SplitIndexCommand.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene_solr_4_3/solr/core/src/java/org/apache/solr/update/SplitIndexCommand.java?rev=1480897&r1=1480896&r2=1480897&view=diff
==============================================================================
--- lucene/dev/branches/lucene_solr_4_3/solr/core/src/java/org/apache/solr/update/SplitIndexCommand.java (original)
+++ lucene/dev/branches/lucene_solr_4_3/solr/core/src/java/org/apache/solr/update/SplitIndexCommand.java Fri May 10 07:01:28 2013
@@ -34,13 +34,14 @@ public class SplitIndexCommand extends U
   public List<String> paths;
   public List<SolrCore> cores;  // either paths or cores should be specified
   public List<DocRouter.Range> ranges;
-  // TODO: allow specification of custom hash function
+  public DocRouter router;
 
-  public SplitIndexCommand(SolrQueryRequest req, List<String> paths,  List<SolrCore> cores, List<DocRouter.Range> ranges) {
+  public SplitIndexCommand(SolrQueryRequest req, List<String> paths,  List<SolrCore> cores, List<DocRouter.Range> ranges, DocRouter router) {
     super(req);
     this.paths = paths;
     this.cores = cores;
     this.ranges = ranges;
+    this.router = router;
   }
 
   @Override
@@ -54,6 +55,7 @@ public class SplitIndexCommand extends U
     sb.append(",paths=" + paths);
     sb.append(",cores=" + cores);
     sb.append(",ranges=" + ranges);
+    sb.append(",router=" + router);
     sb.append('}');
     return sb.toString();
   }

Modified: lucene/dev/branches/lucene_solr_4_3/solr/core/src/test/org/apache/solr/update/SolrIndexSplitterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene_solr_4_3/solr/core/src/test/org/apache/solr/update/SolrIndexSplitterTest.java?rev=1480897&r1=1480896&r2=1480897&view=diff
==============================================================================
--- lucene/dev/branches/lucene_solr_4_3/solr/core/src/test/org/apache/solr/update/SolrIndexSplitterTest.java (original)
+++ lucene/dev/branches/lucene_solr_4_3/solr/core/src/test/org/apache/solr/update/SolrIndexSplitterTest.java Fri May 10 07:01:28 2013
@@ -95,7 +95,7 @@ public class SolrIndexSplitterTest exten
       request = lrf.makeRequest("q", "dummy");
 
       SplitIndexCommand command = new SplitIndexCommand(request,
-          Lists.newArrayList(indexDir1.getAbsolutePath(), indexDir2.getAbsolutePath()), null, ranges);
+          Lists.newArrayList(indexDir1.getAbsolutePath(), indexDir2.getAbsolutePath()), null, ranges, new PlainIdRouter());
       new SolrIndexSplitter(command).split();
 
       Directory directory = h.getCore().getDirectoryFactory().get(indexDir1.getAbsolutePath(),
@@ -148,7 +148,7 @@ public class SolrIndexSplitterTest exten
       try {
         request = lrf.makeRequest("q", "dummy");
 
-        SplitIndexCommand command = new SplitIndexCommand(request, null, Lists.newArrayList(core1, core2), ranges);
+        SplitIndexCommand command = new SplitIndexCommand(request, null, Lists.newArrayList(core1, core2), ranges, new PlainIdRouter());
         new SolrIndexSplitter(command).split();
       } finally {
         if (request != null) request.close();
@@ -185,7 +185,7 @@ public class SolrIndexSplitterTest exten
       request = lrf.makeRequest("q", "dummy");
 
       SplitIndexCommand command = new SplitIndexCommand(request,
-          Lists.newArrayList(indexDir1.getAbsolutePath(), indexDir2.getAbsolutePath(), indexDir3.getAbsolutePath()), null, null);
+          Lists.newArrayList(indexDir1.getAbsolutePath(), indexDir2.getAbsolutePath(), indexDir3.getAbsolutePath()), null, null, new PlainIdRouter());
       new SolrIndexSplitter(command).split();
 
       directory = h.getCore().getDirectoryFactory().get(indexDir1.getAbsolutePath(),

Modified: lucene/dev/branches/lucene_solr_4_3/solr/solrj/src/java/org/apache/solr/common/cloud/CompositeIdRouter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene_solr_4_3/solr/solrj/src/java/org/apache/solr/common/cloud/CompositeIdRouter.java?rev=1480897&r1=1480896&r2=1480897&view=diff
==============================================================================
--- lucene/dev/branches/lucene_solr_4_3/solr/solrj/src/java/org/apache/solr/common/cloud/CompositeIdRouter.java (original)
+++ lucene/dev/branches/lucene_solr_4_3/solr/solrj/src/java/org/apache/solr/common/cloud/CompositeIdRouter.java Fri May 10 07:01:28 2013
@@ -36,7 +36,7 @@ public class CompositeIdRouter extends H
   private int separator = '!';
 
   // separator used to optionally specify number of bits to allocate toward first part.
-  private int bitsSepartor = '/';
+  private int bitsSeparator = '/';
   private int bits = 16;
   private int mask1 = 0xffff0000;
   private int mask2 = 0x0000ffff;
@@ -59,7 +59,7 @@ public class CompositeIdRouter extends H
   }
 
   @Override
-  protected int sliceHash(String id, SolrInputDocument doc, SolrParams params) {
+  public int sliceHash(String id, SolrInputDocument doc, SolrParams params) {
     int idx = id.indexOf(separator);
     if (idx < 0) {
       return Hash.murmurhash3_x86_32(id, 0, id.length(), 0);
@@ -69,7 +69,7 @@ public class CompositeIdRouter extends H
     int m2 = mask2;
 
     String part1 = id.substring(0,idx);
-    int commaIdx = part1.indexOf(bitsSepartor);
+    int commaIdx = part1.indexOf(bitsSeparator);
     if (commaIdx > 0) {
       int firstBits = getBits(part1, commaIdx);
       if (firstBits >= 0) {
@@ -105,7 +105,7 @@ public class CompositeIdRouter extends H
     int m2 = mask2;
 
     String part1 = id.substring(0,idx);
-    int bitsSepIdx = part1.indexOf(bitsSepartor);
+    int bitsSepIdx = part1.indexOf(bitsSeparator);
     if (bitsSepIdx > 0) {
       int firstBits = getBits(part1, bitsSepIdx);
       if (firstBits >= 0) {

Modified: lucene/dev/branches/lucene_solr_4_3/solr/solrj/src/java/org/apache/solr/common/cloud/HashBasedRouter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene_solr_4_3/solr/solrj/src/java/org/apache/solr/common/cloud/HashBasedRouter.java?rev=1480897&r1=1480896&r2=1480897&view=diff
==============================================================================
--- lucene/dev/branches/lucene_solr_4_3/solr/solrj/src/java/org/apache/solr/common/cloud/HashBasedRouter.java (original)
+++ lucene/dev/branches/lucene_solr_4_3/solr/solrj/src/java/org/apache/solr/common/cloud/HashBasedRouter.java Fri May 10 07:01:28 2013
@@ -42,7 +42,7 @@ public abstract class HashBasedRouter ex
     return range != null && range.includes(hash);
   }
 
-  protected int sliceHash(String id, SolrInputDocument sdoc, SolrParams params) {
+  public int sliceHash(String id, SolrInputDocument sdoc, SolrParams params) {
     return Hash.murmurhash3_x86_32(id, 0, id.length(), 0);
   }