You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by yo...@apache.org on 2013/05/08 20:10:57 UTC
svn commit: r1480383 - in /lucene/dev/trunk/solr: ./
core/src/java/org/apache/solr/handler/admin/
core/src/java/org/apache/solr/update/ core/src/test/org/apache/solr/update/
solrj/src/java/org/apache/solr/common/cloud/
Author: yonik
Date: Wed May 8 18:10:56 2013
New Revision: 1480383
URL: http://svn.apache.org/r1480383
Log:
SOLR-4798: use correct router during index splitting
Modified:
lucene/dev/trunk/solr/CHANGES.txt
lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/admin/CoreAdminHandler.java
lucene/dev/trunk/solr/core/src/java/org/apache/solr/update/SolrIndexSplitter.java
lucene/dev/trunk/solr/core/src/java/org/apache/solr/update/SplitIndexCommand.java
lucene/dev/trunk/solr/core/src/test/org/apache/solr/update/SolrIndexSplitterTest.java
lucene/dev/trunk/solr/solrj/src/java/org/apache/solr/common/cloud/CompositeIdRouter.java
lucene/dev/trunk/solr/solrj/src/java/org/apache/solr/common/cloud/HashBasedRouter.java
Modified: lucene/dev/trunk/solr/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/CHANGES.txt?rev=1480383&r1=1480382&r2=1480383&view=diff
==============================================================================
--- lucene/dev/trunk/solr/CHANGES.txt (original)
+++ lucene/dev/trunk/solr/CHANGES.txt Wed May 8 18:10:56 2013
@@ -99,6 +99,13 @@ Bug Fixes
* SOLR-4795: Sub shard leader should not accept any updates from parent after
it goes active (shalin)
+* SOLR-4798: shard splitting does not respect the router for the collection
+ when executing the index split. One effect of this is that documents
+ may be placed in the wrong shard when the default compositeId router
+ is used in conjunction with IDs containing "!". (yonik)
+
+
+
Other Changes
----------------------
Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/admin/CoreAdminHandler.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/admin/CoreAdminHandler.java?rev=1480383&r1=1480382&r2=1480383&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/admin/CoreAdminHandler.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/admin/CoreAdminHandler.java Wed May 8 18:10:56 2013
@@ -252,6 +252,7 @@ public class CoreAdminHandler extends Re
List<String> paths = null;
int partitions = pathsArr != null ? pathsArr.length : newCoreNames.length;
+ DocRouter router = null;
if (coreContainer.isZooKeeperAware()) {
ClusterState clusterState = coreContainer.getZkController().getClusterState();
String collectionName = req.getCore().getCoreDescriptor().getCloudDescriptor().getCollectionName();
@@ -259,8 +260,8 @@ public class CoreAdminHandler extends Re
String sliceName = req.getCore().getCoreDescriptor().getCloudDescriptor().getShardId();
Slice slice = clusterState.getSlice(collectionName, sliceName);
DocRouter.Range currentRange = slice.getRange();
- DocRouter hp = collection.getRouter() != null ? collection.getRouter() : DocRouter.DEFAULT;
- ranges = currentRange != null ? hp.partitionRange(partitions, currentRange) : null;
+ router = collection.getRouter() != null ? collection.getRouter() : DocRouter.DEFAULT;
+ ranges = currentRange != null ? router.partitionRange(partitions, currentRange) : null;
}
if (pathsArr == null) {
@@ -278,7 +279,7 @@ public class CoreAdminHandler extends Re
}
- SplitIndexCommand cmd = new SplitIndexCommand(req, paths, newCores, ranges);
+ SplitIndexCommand cmd = new SplitIndexCommand(req, paths, newCores, ranges, router);
core.getUpdateHandler().split(cmd);
// After the split has completed, someone (here?) should start the process of replaying the buffered updates.
Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/update/SolrIndexSplitter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/update/SolrIndexSplitter.java?rev=1480383&r1=1480382&r2=1480383&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/update/SolrIndexSplitter.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/update/SolrIndexSplitter.java Wed May 8 18:10:56 2013
@@ -32,9 +32,11 @@ import org.apache.lucene.util.CharsRef;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.OpenBitSet;
import org.apache.solr.common.cloud.DocRouter;
+import org.apache.solr.common.cloud.HashBasedRouter;
import org.apache.solr.common.util.Hash;
import org.apache.solr.core.SolrCore;
import org.apache.solr.schema.SchemaField;
+import org.apache.solr.schema.StrField;
import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.util.RefCounted;
import org.slf4j.Logger;
@@ -53,6 +55,8 @@ public class SolrIndexSplitter {
DocRouter.Range[] rangesArr; // same as ranges list, but an array for extra speed in inner loops
List<String> paths;
List<SolrCore> cores;
+ DocRouter router;
+ HashBasedRouter hashRouter;
int numPieces;
int currPartition = 0;
@@ -62,6 +66,9 @@ public class SolrIndexSplitter {
ranges = cmd.ranges;
paths = cmd.paths;
cores = cmd.cores;
+ router = cmd.router;
+ hashRouter = router instanceof HashBasedRouter ? (HashBasedRouter)router : null;
+
if (ranges == null) {
numPieces = paths != null ? paths.size() : cores.size();
} else {
@@ -151,16 +158,24 @@ public class SolrIndexSplitter {
BytesRef term = null;
DocsEnum docsEnum = null;
+ CharsRef idRef = new CharsRef(100);
for (;;) {
term = termsEnum.next();
if (term == null) break;
// figure out the hash for the term
- // TODO: hook in custom hashes (or store hashes)
- // TODO: performance implications of using indexedToReadable?
- CharsRef ref = new CharsRef(term.length);
- ref = field.getType().indexedToReadable(term, ref);
- int hash = Hash.murmurhash3_x86_32(ref, ref.offset, ref.length, 0);
+
+ // FUTURE: if conversion to strings costs too much, we could
+ // specialize and use the hash function that can work over bytes.
+ idRef = field.getType().indexedToReadable(term, idRef);
+ String idString = idRef.toString();
+
+ int hash = 0;
+ if (hashRouter != null) {
+ hash = hashRouter.sliceHash(idString, null, null);
+ }
+ // int hash = Hash.murmurhash3_x86_32(ref, ref.offset, ref.length, 0);
+
docsEnum = termsEnum.docs(liveDocs, docsEnum, DocsEnum.FLAG_NONE);
for (;;) {
int doc = docsEnum.nextDoc();
Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/update/SplitIndexCommand.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/update/SplitIndexCommand.java?rev=1480383&r1=1480382&r2=1480383&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/update/SplitIndexCommand.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/update/SplitIndexCommand.java Wed May 8 18:10:56 2013
@@ -34,13 +34,14 @@ public class SplitIndexCommand extends U
public List<String> paths;
public List<SolrCore> cores; // either paths or cores should be specified
public List<DocRouter.Range> ranges;
- // TODO: allow specification of custom hash function
+ public DocRouter router;
- public SplitIndexCommand(SolrQueryRequest req, List<String> paths, List<SolrCore> cores, List<DocRouter.Range> ranges) {
+ public SplitIndexCommand(SolrQueryRequest req, List<String> paths, List<SolrCore> cores, List<DocRouter.Range> ranges, DocRouter router) {
super(req);
this.paths = paths;
this.cores = cores;
this.ranges = ranges;
+ this.router = router;
}
@Override
@@ -54,6 +55,7 @@ public class SplitIndexCommand extends U
sb.append(",paths=" + paths);
sb.append(",cores=" + cores);
sb.append(",ranges=" + ranges);
+ sb.append(",router=" + router);
sb.append('}');
return sb.toString();
}
Modified: lucene/dev/trunk/solr/core/src/test/org/apache/solr/update/SolrIndexSplitterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/test/org/apache/solr/update/SolrIndexSplitterTest.java?rev=1480383&r1=1480382&r2=1480383&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/test/org/apache/solr/update/SolrIndexSplitterTest.java (original)
+++ lucene/dev/trunk/solr/core/src/test/org/apache/solr/update/SolrIndexSplitterTest.java Wed May 8 18:10:56 2013
@@ -95,7 +95,7 @@ public class SolrIndexSplitterTest exten
request = lrf.makeRequest("q", "dummy");
SplitIndexCommand command = new SplitIndexCommand(request,
- Lists.newArrayList(indexDir1.getAbsolutePath(), indexDir2.getAbsolutePath()), null, ranges);
+ Lists.newArrayList(indexDir1.getAbsolutePath(), indexDir2.getAbsolutePath()), null, ranges, new PlainIdRouter());
new SolrIndexSplitter(command).split();
Directory directory = h.getCore().getDirectoryFactory().get(indexDir1.getAbsolutePath(),
@@ -148,7 +148,7 @@ public class SolrIndexSplitterTest exten
try {
request = lrf.makeRequest("q", "dummy");
- SplitIndexCommand command = new SplitIndexCommand(request, null, Lists.newArrayList(core1, core2), ranges);
+ SplitIndexCommand command = new SplitIndexCommand(request, null, Lists.newArrayList(core1, core2), ranges, new PlainIdRouter());
new SolrIndexSplitter(command).split();
} finally {
if (request != null) request.close();
@@ -185,7 +185,7 @@ public class SolrIndexSplitterTest exten
request = lrf.makeRequest("q", "dummy");
SplitIndexCommand command = new SplitIndexCommand(request,
- Lists.newArrayList(indexDir1.getAbsolutePath(), indexDir2.getAbsolutePath(), indexDir3.getAbsolutePath()), null, null);
+ Lists.newArrayList(indexDir1.getAbsolutePath(), indexDir2.getAbsolutePath(), indexDir3.getAbsolutePath()), null, null, new PlainIdRouter());
new SolrIndexSplitter(command).split();
directory = h.getCore().getDirectoryFactory().get(indexDir1.getAbsolutePath(),
Modified: lucene/dev/trunk/solr/solrj/src/java/org/apache/solr/common/cloud/CompositeIdRouter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/solrj/src/java/org/apache/solr/common/cloud/CompositeIdRouter.java?rev=1480383&r1=1480382&r2=1480383&view=diff
==============================================================================
--- lucene/dev/trunk/solr/solrj/src/java/org/apache/solr/common/cloud/CompositeIdRouter.java (original)
+++ lucene/dev/trunk/solr/solrj/src/java/org/apache/solr/common/cloud/CompositeIdRouter.java Wed May 8 18:10:56 2013
@@ -36,7 +36,7 @@ public class CompositeIdRouter extends H
private int separator = '!';
// separator used to optionally specify number of bits to allocate toward first part.
- private int bitsSepartor = '/';
+ private int bitsSeparator = '/';
private int bits = 16;
private int mask1 = 0xffff0000;
private int mask2 = 0x0000ffff;
@@ -59,7 +59,7 @@ public class CompositeIdRouter extends H
}
@Override
- protected int sliceHash(String id, SolrInputDocument doc, SolrParams params) {
+ public int sliceHash(String id, SolrInputDocument doc, SolrParams params) {
int idx = id.indexOf(separator);
if (idx < 0) {
return Hash.murmurhash3_x86_32(id, 0, id.length(), 0);
@@ -69,7 +69,7 @@ public class CompositeIdRouter extends H
int m2 = mask2;
String part1 = id.substring(0,idx);
- int commaIdx = part1.indexOf(bitsSepartor);
+ int commaIdx = part1.indexOf(bitsSeparator);
if (commaIdx > 0) {
int firstBits = getBits(part1, commaIdx);
if (firstBits >= 0) {
@@ -105,7 +105,7 @@ public class CompositeIdRouter extends H
int m2 = mask2;
String part1 = id.substring(0,idx);
- int bitsSepIdx = part1.indexOf(bitsSepartor);
+ int bitsSepIdx = part1.indexOf(bitsSeparator);
if (bitsSepIdx > 0) {
int firstBits = getBits(part1, bitsSepIdx);
if (firstBits >= 0) {
Modified: lucene/dev/trunk/solr/solrj/src/java/org/apache/solr/common/cloud/HashBasedRouter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/solrj/src/java/org/apache/solr/common/cloud/HashBasedRouter.java?rev=1480383&r1=1480382&r2=1480383&view=diff
==============================================================================
--- lucene/dev/trunk/solr/solrj/src/java/org/apache/solr/common/cloud/HashBasedRouter.java (original)
+++ lucene/dev/trunk/solr/solrj/src/java/org/apache/solr/common/cloud/HashBasedRouter.java Wed May 8 18:10:56 2013
@@ -42,7 +42,7 @@ public abstract class HashBasedRouter ex
return range != null && range.includes(hash);
}
- protected int sliceHash(String id, SolrInputDocument sdoc, SolrParams params) {
+ public int sliceHash(String id, SolrInputDocument sdoc, SolrParams params) {
return Hash.murmurhash3_x86_32(id, 0, id.length(), 0);
}