You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by sh...@apache.org on 2014/07/19 09:10:36 UTC
svn commit: r1611852 - in /lucene/dev/trunk/solr: CHANGES.txt core/src/java/org/apache/solr/update/DocumentBuilder.java

Author: shalin
Date: Sat Jul 19 07:10:36 2014
New Revision: 1611852

URL: http://svn.apache.org/r1611852
Log:
SOLR-6259: Reduce CPU usage by avoiding repeated costly calls to Document.getField inside DocumentBuilder.toDocument for use-cases with large number of fields and copyFields

Modified:
    lucene/dev/trunk/solr/CHANGES.txt
    lucene/dev/trunk/solr/core/src/java/org/apache/solr/update/DocumentBuilder.java

Modified: lucene/dev/trunk/solr/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/CHANGES.txt?rev=1611852&r1=1611851&r2=1611852&view=diff
==============================================================================
--- lucene/dev/trunk/solr/CHANGES.txt (original)
+++ lucene/dev/trunk/solr/CHANGES.txt Sat Jul 19 07:10:36 2014
@@ -204,6 +204,10 @@ Optimizations
   indexes with many fields of same type just use one TokenStream per thread.
   (Shay Banon, Uwe Schindler, Robert Muir)
 
+* SOLR-6259: Reduce CPU usage by avoiding repeated costly calls to Document.getField inside
+  DocumentBuilder.toDocument for use-cases with large number of fields and copyFields.
+  (Steven Bower via shalin)
+
 Other Changes
 ---------------------
 

Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/update/DocumentBuilder.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/update/DocumentBuilder.java?rev=1611852&r1=1611851&r2=1611852&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/update/DocumentBuilder.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/update/DocumentBuilder.java Sat Jul 19 07:10:36 2014
@@ -18,6 +18,7 @@
 package org.apache.solr.update;
 
 import java.util.List;
+import java.util.Set;
 
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
@@ -30,6 +31,8 @@ import org.apache.solr.schema.IndexSchem
 import org.apache.solr.schema.SchemaField;
 
 
+import com.google.common.collect.Sets;
+
 /**
  *
  */
@@ -75,6 +78,7 @@ public class DocumentBuilder {
   { 
     Document out = new Document();
     final float docBoost = doc.getDocumentBoost();
+    Set<String> usedFields = Sets.newHashSet();
     
     // Load fields from SolrDocument to Document
     for( SolrInputField field : doc ) {
@@ -103,6 +107,9 @@ public class DocumentBuilder {
       // it ourselves 
       float compoundBoost = fieldBoost * docBoost;
 
+      List<CopyField> copyFields = schema.getCopyFieldsList(name);
+      if( copyFields.size() == 0 ) copyFields = null;
+
       // load each field value
       boolean hasField = false;
       try {
@@ -114,48 +121,52 @@ public class DocumentBuilder {
           if (sfield != null) {
             used = true;
             addField(out, sfield, v, applyBoost ? compoundBoost : 1f);
+            // record the field as having a value
+            usedFields.add(sfield.getName());
           }
   
           // Check if we should copy this field value to any other fields.
           // This could happen whether it is explicit or not.
-          List<CopyField> copyFields = schema.getCopyFieldsList(name);
-          for (CopyField cf : copyFields) {
-            SchemaField destinationField = cf.getDestination();
-
-            final boolean destHasValues = 
-              (null != out.getField(destinationField.getName()));
-
-            // check if the copy field is a multivalued or not
-            if (!destinationField.multiValued() && destHasValues) {
-              throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
-                      "ERROR: "+getID(doc, schema)+"multiple values encountered for non multiValued copy field " +
-                              destinationField.getName() + ": " + v);
-            }
+          if( copyFields != null ){
+            for (CopyField cf : copyFields) {
+              SchemaField destinationField = cf.getDestination();
   
-            used = true;
-            
-            // Perhaps trim the length of a copy field
-            Object val = v;
-            if( val instanceof String && cf.getMaxChars() > 0 ) {
-              val = cf.getLimitedValue((String)val);
+              final boolean destHasValues = usedFields.contains(destinationField.getName());
+  
+              // check if the copy field is a multivalued or not
+              if (!destinationField.multiValued() && destHasValues) {
+                throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
+                        "ERROR: "+getID(doc, schema)+"multiple values encountered for non multiValued copy field " +
+                                destinationField.getName() + ": " + v);
+              }
+    
+              used = true;
+              
+              // Perhaps trim the length of a copy field
+              Object val = v;
+              if( val instanceof String && cf.getMaxChars() > 0 ) {
+                val = cf.getLimitedValue((String)val);
+              }
+  
+              // we can't copy any boost unless the dest field is 
+              // indexed & !omitNorms, but which boost we copy depends
+              // on whether the dest field already contains values (we
+              // don't want to apply the compounded docBoost more then once)
+              final float destBoost = 
+                (destinationField.indexed() && !destinationField.omitNorms()) ?
+                (destHasValues ? fieldBoost : compoundBoost) : 1.0F;
+              
+              addField(out, destinationField, val, destBoost);
+              // record the field as having a value
+              usedFields.add(destinationField.getName());
             }
-
-            // we can't copy any boost unless the dest field is 
-            // indexed & !omitNorms, but which boost we copy depends
-            // on whether the dest field already contains values (we
-            // don't want to apply the compounded docBoost more then once)
-            final float destBoost = 
-              (destinationField.indexed() && !destinationField.omitNorms()) ?
-              (destHasValues ? fieldBoost : compoundBoost) : 1.0F;
             
-            addField(out, destinationField, val, destBoost);
+            // The final boost for a given field named is the product of the 
+            // *all* boosts on values of that field. 
+            // For multi-valued fields, we only want to set the boost on the
+            // first field.
+            fieldBoost = compoundBoost = 1.0f;
           }
-          
-          // The final boost for a given field named is the product of the 
-          // *all* boosts on values of that field. 
-          // For multi-valued fields, we only want to set the boost on the
-          // first field.
-          fieldBoost = compoundBoost = 1.0f;
         }
       }
       catch( SolrException ex ) {