You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ma...@apache.org on 2012/08/31 15:58:53 UTC
svn commit: r1379452 - in /lucene/dev/trunk/solr: ./
core/src/java/org/apache/solr/handler/component/
core/src/java/org/apache/solr/update/
core/src/java/org/apache/solr/update/processor/
Author: markrmiller
Date: Fri Aug 31 13:58:52 2012
New Revision: 1379452
URL: http://svn.apache.org/viewvc?rev=1379452&view=rev
Log:
SOLR-3773: Hash based on the external String id rather than the indexed representation for distributed updates.
Modified:
lucene/dev/trunk/solr/CHANGES.txt
lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/component/RealTimeGetComponent.java
lucene/dev/trunk/solr/core/src/java/org/apache/solr/update/AddUpdateCommand.java
lucene/dev/trunk/solr/core/src/java/org/apache/solr/update/processor/DistributedUpdateProcessor.java
Modified: lucene/dev/trunk/solr/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/CHANGES.txt?rev=1379452&r1=1379451&r2=1379452&view=diff
==============================================================================
--- lucene/dev/trunk/solr/CHANGES.txt (original)
+++ lucene/dev/trunk/solr/CHANGES.txt Fri Aug 31 13:58:52 2012
@@ -37,7 +37,15 @@ Apache ZooKeeper 3.3.6
Upgrading from Solr 4.0.0-BETA
----------------------
-In order to better support distributed search mode, the TermVectorComponent's response format has been changed so that if the schema defines a uniqueKeyField, then that field value is used as the "key" for each document in it's response section, instead of the internal lucene doc id. Users w/o a uniqueKeyField will continue to see the same response format. See SOLR-3229 for more details.
+In order to better support distributed search mode, the TermVectorComponent's
+response format has been changed so that if the schema defines a
+uniqueKeyField, then that field value is used as the "key" for each document in
+it's response section, instead of the internal lucene doc id. Users w/o a
+uniqueKeyField will continue to see the same response format. See SOLR-3229
+for more details.
+
+If you are using SolrCloud's distributed update request capabilities and a non
+string type id field, you must re-index.
Detailed Change List
----------------------
@@ -130,6 +138,10 @@ Other Changes
* SOLR-3751: Add defensive checks for SolrCloud updates and requests that ensure
the local state matches what we can tell the request expected. (Mark Miller)
+
+* SOLR-3773: Hash based on the external String id rather than the indexed
+ representation for distributed updates. (Michael Garski, yonik, Mark Miller)
+
================== 4.0.0-BETA ===================
Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/component/RealTimeGetComponent.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/component/RealTimeGetComponent.java?rev=1379452&r1=1379451&r2=1379452&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/component/RealTimeGetComponent.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/component/RealTimeGetComponent.java Fri Aug 31 13:58:52 2012
@@ -22,7 +22,6 @@ import org.apache.lucene.index.Indexable
import org.apache.lucene.index.Term;
import org.apache.lucene.util.BytesRef;
import org.apache.solr.client.solrj.SolrResponse;
-import org.apache.solr.client.solrj.util.ClientUtils;
import org.apache.solr.cloud.CloudDescriptor;
import org.apache.solr.cloud.ZkController;
import org.apache.solr.common.SolrDocument;
@@ -36,7 +35,6 @@ import org.apache.solr.common.params.Sol
import org.apache.solr.common.util.Hash;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.StrUtils;
-import org.apache.solr.core.CoreDescriptor;
import org.apache.solr.core.SolrCore;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.response.SolrQueryResponse;
@@ -301,7 +299,6 @@ public class RealTimeGetComponent extend
private static SolrDocument toSolrDoc(SolrInputDocument sdoc, IndexSchema schema) {
// TODO: do something more performant than this double conversion
Document doc = DocumentBuilder.toDocument(sdoc, schema);
- List<IndexableField> fields = doc.getFields();
// copy the stored fields only
Document out = new Document();
@@ -351,8 +348,6 @@ public class RealTimeGetComponent extend
// if shards=... then use that
if (zkController != null && params.get("shards") == null) {
- SchemaField sf = rb.req.getSchema().getUniqueKeyField();
-
CloudDescriptor cloudDescriptor = rb.req.getCore().getCoreDescriptor().getCloudDescriptor();
String collection = cloudDescriptor.getCollectionName();
@@ -361,9 +356,7 @@ public class RealTimeGetComponent extend
Map<String, List<String>> shardToId = new HashMap<String, List<String>>();
for (String id : allIds) {
- BytesRef br = new BytesRef();
- sf.getType().readableToIndexed(id, br);
- int hash = Hash.murmurhash3_x86_32(br.bytes, br.offset, br.length, 0);
+ int hash = Hash.murmurhash3_x86_32(id, 0, id.length(), 0);
String shard = clusterState.getShard(hash, collection);
List<String> idsForShard = shardToId.get(shard);
Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/update/AddUpdateCommand.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/update/AddUpdateCommand.java?rev=1379452&r1=1379451&r2=1379452&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/update/AddUpdateCommand.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/update/AddUpdateCommand.java Fri Aug 31 13:58:52 2012
@@ -19,7 +19,6 @@ package org.apache.solr.update;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.Term;
-import org.apache.lucene.search.Query;
import org.apache.lucene.util.BytesRef;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrInputDocument;
@@ -28,8 +27,6 @@ import org.apache.solr.request.SolrQuery
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.schema.SchemaField;
-import java.util.List;
-
/**
*
*/
@@ -118,6 +115,35 @@ public class AddUpdateCommand extends Up
return "(null)";
}
+ /**
+ * @return String id to hash
+ */
+ public String getHashableId() {
+ String id = null;
+ IndexSchema schema = req.getSchema();
+ SchemaField sf = schema.getUniqueKeyField();
+ if (sf != null) {
+ if (solrDoc != null) {
+ SolrInputField field = solrDoc.getField(sf.getName());
+
+ int count = field == null ? 0 : field.getValueCount();
+ if (count == 0) {
+ if (overwrite) {
+ throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
+ "Document is missing mandatory uniqueKey field: "
+ + sf.getName());
+ }
+ } else if (count > 1) {
+ throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
+ "Document contains multiple values for uniqueKey field: " + field);
+ } else {
+ return field.getFirstValue().toString();
+ }
+ }
+ }
+ return id;
+ }
+
@Override
public String toString() {
StringBuilder sb = new StringBuilder(super.toString());
Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/update/processor/DistributedUpdateProcessor.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/update/processor/DistributedUpdateProcessor.java?rev=1379452&r1=1379451&r2=1379452&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/update/processor/DistributedUpdateProcessor.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/update/processor/DistributedUpdateProcessor.java Fri Aug 31 13:58:52 2012
@@ -1064,13 +1064,13 @@ public class DistributedUpdateProcessor
// make the hash pluggable of course.
// The hash also needs to be pluggable
private int hash(AddUpdateCommand cmd) {
- BytesRef br = cmd.getIndexedId();
- return Hash.murmurhash3_x86_32(br.bytes, br.offset, br.length, 0);
+ String hashableId = cmd.getHashableId();
+
+ return Hash.murmurhash3_x86_32(hashableId, 0, hashableId.length(), 0);
}
private int hash(DeleteUpdateCommand cmd) {
- BytesRef br = cmd.getIndexedId();
- return Hash.murmurhash3_x86_32(br.bytes, br.offset, br.length, 0);
+ return Hash.murmurhash3_x86_32(cmd.getId(), 0, cmd.getId().length(), 0);
}
// RetryNodes are used in the case of 'forward to leader' where we want