You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ma...@apache.org on 2012/08/31 15:58:53 UTC

svn commit: r1379452 - in /lucene/dev/trunk/solr: ./ core/src/java/org/apache/solr/handler/component/ core/src/java/org/apache/solr/update/ core/src/java/org/apache/solr/update/processor/

Author: markrmiller
Date: Fri Aug 31 13:58:52 2012
New Revision: 1379452

URL: http://svn.apache.org/viewvc?rev=1379452&view=rev
Log:
SOLR-3773: Hash based on the external String id rather than the indexed representation for distributed updates.

Modified:
    lucene/dev/trunk/solr/CHANGES.txt
    lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/component/RealTimeGetComponent.java
    lucene/dev/trunk/solr/core/src/java/org/apache/solr/update/AddUpdateCommand.java
    lucene/dev/trunk/solr/core/src/java/org/apache/solr/update/processor/DistributedUpdateProcessor.java

Modified: lucene/dev/trunk/solr/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/CHANGES.txt?rev=1379452&r1=1379451&r2=1379452&view=diff
==============================================================================
--- lucene/dev/trunk/solr/CHANGES.txt (original)
+++ lucene/dev/trunk/solr/CHANGES.txt Fri Aug 31 13:58:52 2012
@@ -37,7 +37,15 @@ Apache ZooKeeper 3.3.6
 Upgrading from Solr 4.0.0-BETA
 ----------------------
 
-In order to better support distributed search mode, the TermVectorComponent's response format has been changed so that if the schema defines a uniqueKeyField, then that field value is used as the "key" for each document in it's response section, instead of the internal lucene doc id.  Users w/o a uniqueKeyField will continue to see the same response format.  See SOLR-3229 for more details.
+In order to better support distributed search mode, the TermVectorComponent's
+response format has been changed so that if the schema defines a 
+uniqueKeyField, then that field value is used as the "key" for each document in
+it's response section, instead of the internal lucene doc id.  Users w/o a 
+uniqueKeyField will continue to see the same response format.  See SOLR-3229
+for more details.
+
+If you are using SolrCloud's distributed update request capabilities and a non
+string type id field, you must re-index.
 
 Detailed Change List
 ----------------------
@@ -130,6 +138,10 @@ Other Changes
 
 * SOLR-3751: Add defensive checks for SolrCloud updates and requests that ensure 
   the local state matches what we can tell the request expected. (Mark Miller)
+  
+* SOLR-3773: Hash based on the external String id rather than the indexed 
+  representation for distributed updates. (Michael Garski, yonik, Mark Miller)
+
 
 ==================  4.0.0-BETA ===================
 

Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/component/RealTimeGetComponent.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/component/RealTimeGetComponent.java?rev=1379452&r1=1379451&r2=1379452&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/component/RealTimeGetComponent.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/component/RealTimeGetComponent.java Fri Aug 31 13:58:52 2012
@@ -22,7 +22,6 @@ import org.apache.lucene.index.Indexable
 import org.apache.lucene.index.Term;
 import org.apache.lucene.util.BytesRef;
 import org.apache.solr.client.solrj.SolrResponse;
-import org.apache.solr.client.solrj.util.ClientUtils;
 import org.apache.solr.cloud.CloudDescriptor;
 import org.apache.solr.cloud.ZkController;
 import org.apache.solr.common.SolrDocument;
@@ -36,7 +35,6 @@ import org.apache.solr.common.params.Sol
 import org.apache.solr.common.util.Hash;
 import org.apache.solr.common.util.NamedList;
 import org.apache.solr.common.util.StrUtils;
-import org.apache.solr.core.CoreDescriptor;
 import org.apache.solr.core.SolrCore;
 import org.apache.solr.request.SolrQueryRequest;
 import org.apache.solr.response.SolrQueryResponse;
@@ -301,7 +299,6 @@ public class RealTimeGetComponent extend
   private static SolrDocument toSolrDoc(SolrInputDocument sdoc, IndexSchema schema) {
     // TODO: do something more performant than this double conversion
     Document doc = DocumentBuilder.toDocument(sdoc, schema);
-    List<IndexableField> fields = doc.getFields();
 
     // copy the stored fields only
     Document out = new Document();
@@ -351,8 +348,6 @@ public class RealTimeGetComponent extend
 
     // if shards=... then use that
     if (zkController != null && params.get("shards") == null) {
-      SchemaField sf = rb.req.getSchema().getUniqueKeyField();
-
       CloudDescriptor cloudDescriptor = rb.req.getCore().getCoreDescriptor().getCloudDescriptor();
 
       String collection = cloudDescriptor.getCollectionName();
@@ -361,9 +356,7 @@ public class RealTimeGetComponent extend
       
       Map<String, List<String>> shardToId = new HashMap<String, List<String>>();
       for (String id : allIds) {
-        BytesRef br = new BytesRef();
-        sf.getType().readableToIndexed(id, br);
-        int hash = Hash.murmurhash3_x86_32(br.bytes, br.offset, br.length, 0);
+        int hash = Hash.murmurhash3_x86_32(id, 0, id.length(), 0);
         String shard = clusterState.getShard(hash,  collection);
 
         List<String> idsForShard = shardToId.get(shard);

Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/update/AddUpdateCommand.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/update/AddUpdateCommand.java?rev=1379452&r1=1379451&r2=1379452&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/update/AddUpdateCommand.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/update/AddUpdateCommand.java Fri Aug 31 13:58:52 2012
@@ -19,7 +19,6 @@ package org.apache.solr.update;
 
 import org.apache.lucene.document.Document;
 import org.apache.lucene.index.Term;
-import org.apache.lucene.search.Query;
 import org.apache.lucene.util.BytesRef;
 import org.apache.solr.common.SolrException;
 import org.apache.solr.common.SolrInputDocument;
@@ -28,8 +27,6 @@ import org.apache.solr.request.SolrQuery
 import org.apache.solr.schema.IndexSchema;
 import org.apache.solr.schema.SchemaField;
 
-import java.util.List;
-
 /**
  *
  */
@@ -118,6 +115,35 @@ public class AddUpdateCommand extends Up
      return "(null)";
    }
 
+  /**
+   * @return String id to hash
+   */
+  public String getHashableId() {
+    String id = null;
+    IndexSchema schema = req.getSchema();
+    SchemaField sf = schema.getUniqueKeyField();
+    if (sf != null) {
+      if (solrDoc != null) {
+        SolrInputField field = solrDoc.getField(sf.getName());
+        
+        int count = field == null ? 0 : field.getValueCount();
+        if (count == 0) {
+          if (overwrite) {
+            throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
+                "Document is missing mandatory uniqueKey field: "
+                    + sf.getName());
+          }
+        } else if (count > 1) {
+          throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
+              "Document contains multiple values for uniqueKey field: " + field);
+        } else {
+          return field.getFirstValue().toString();
+        }
+      }
+    }
+    return id;
+  }
+  
    @Override
   public String toString() {
      StringBuilder sb = new StringBuilder(super.toString());

Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/update/processor/DistributedUpdateProcessor.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/update/processor/DistributedUpdateProcessor.java?rev=1379452&r1=1379451&r2=1379452&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/update/processor/DistributedUpdateProcessor.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/update/processor/DistributedUpdateProcessor.java Fri Aug 31 13:58:52 2012
@@ -1064,13 +1064,13 @@ public class DistributedUpdateProcessor 
   // make the hash pluggable of course.
   // The hash also needs to be pluggable
   private int hash(AddUpdateCommand cmd) {
-    BytesRef br = cmd.getIndexedId();
-    return Hash.murmurhash3_x86_32(br.bytes, br.offset, br.length, 0);
+    String hashableId = cmd.getHashableId();
+    
+    return Hash.murmurhash3_x86_32(hashableId, 0, hashableId.length(), 0);
   }
   
   private int hash(DeleteUpdateCommand cmd) {
-    BytesRef br = cmd.getIndexedId();
-    return Hash.murmurhash3_x86_32(br.bytes, br.offset, br.length, 0);
+    return Hash.murmurhash3_x86_32(cmd.getId(), 0, cmd.getId().length(), 0);
   }
   
   // RetryNodes are used in the case of 'forward to leader' where we want