You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by sh...@apache.org on 2014/07/19 09:10:36 UTC
svn commit: r1611852 - in /lucene/dev/trunk/solr: CHANGES.txt
core/src/java/org/apache/solr/update/DocumentBuilder.java
Author: shalin
Date: Sat Jul 19 07:10:36 2014
New Revision: 1611852
URL: http://svn.apache.org/r1611852
Log:
SOLR-6259: Reduce CPU usage by avoiding repeated costly calls to Document.getField inside DocumentBuilder.toDocument for use-cases with large number of fields and copyFields
Modified:
lucene/dev/trunk/solr/CHANGES.txt
lucene/dev/trunk/solr/core/src/java/org/apache/solr/update/DocumentBuilder.java
Modified: lucene/dev/trunk/solr/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/CHANGES.txt?rev=1611852&r1=1611851&r2=1611852&view=diff
==============================================================================
--- lucene/dev/trunk/solr/CHANGES.txt (original)
+++ lucene/dev/trunk/solr/CHANGES.txt Sat Jul 19 07:10:36 2014
@@ -204,6 +204,10 @@ Optimizations
indexes with many fields of same type just use one TokenStream per thread.
(Shay Banon, Uwe Schindler, Robert Muir)
+* SOLR-6259: Reduce CPU usage by avoiding repeated costly calls to Document.getField inside
+ DocumentBuilder.toDocument for use-cases with large number of fields and copyFields.
+ (Steven Bower via shalin)
+
Other Changes
---------------------
Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/update/DocumentBuilder.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/update/DocumentBuilder.java?rev=1611852&r1=1611851&r2=1611852&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/update/DocumentBuilder.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/update/DocumentBuilder.java Sat Jul 19 07:10:36 2014
@@ -18,6 +18,7 @@
package org.apache.solr.update;
import java.util.List;
+import java.util.Set;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
@@ -30,6 +31,8 @@ import org.apache.solr.schema.IndexSchem
import org.apache.solr.schema.SchemaField;
+import com.google.common.collect.Sets;
+
/**
*
*/
@@ -75,6 +78,7 @@ public class DocumentBuilder {
{
Document out = new Document();
final float docBoost = doc.getDocumentBoost();
+ Set<String> usedFields = Sets.newHashSet();
// Load fields from SolrDocument to Document
for( SolrInputField field : doc ) {
@@ -103,6 +107,9 @@ public class DocumentBuilder {
// it ourselves
float compoundBoost = fieldBoost * docBoost;
+ List<CopyField> copyFields = schema.getCopyFieldsList(name);
+ if( copyFields.size() == 0 ) copyFields = null;
+
// load each field value
boolean hasField = false;
try {
@@ -114,48 +121,52 @@ public class DocumentBuilder {
if (sfield != null) {
used = true;
addField(out, sfield, v, applyBoost ? compoundBoost : 1f);
+ // record the field as having a value
+ usedFields.add(sfield.getName());
}
// Check if we should copy this field value to any other fields.
// This could happen whether it is explicit or not.
- List<CopyField> copyFields = schema.getCopyFieldsList(name);
- for (CopyField cf : copyFields) {
- SchemaField destinationField = cf.getDestination();
-
- final boolean destHasValues =
- (null != out.getField(destinationField.getName()));
-
- // check if the copy field is a multivalued or not
- if (!destinationField.multiValued() && destHasValues) {
- throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
- "ERROR: "+getID(doc, schema)+"multiple values encountered for non multiValued copy field " +
- destinationField.getName() + ": " + v);
- }
+ if( copyFields != null ){
+ for (CopyField cf : copyFields) {
+ SchemaField destinationField = cf.getDestination();
- used = true;
-
- // Perhaps trim the length of a copy field
- Object val = v;
- if( val instanceof String && cf.getMaxChars() > 0 ) {
- val = cf.getLimitedValue((String)val);
+ final boolean destHasValues = usedFields.contains(destinationField.getName());
+
+ // check if the copy field is a multivalued or not
+ if (!destinationField.multiValued() && destHasValues) {
+ throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
+ "ERROR: "+getID(doc, schema)+"multiple values encountered for non multiValued copy field " +
+ destinationField.getName() + ": " + v);
+ }
+
+ used = true;
+
+ // Perhaps trim the length of a copy field
+ Object val = v;
+ if( val instanceof String && cf.getMaxChars() > 0 ) {
+ val = cf.getLimitedValue((String)val);
+ }
+
+ // we can't copy any boost unless the dest field is
+ // indexed & !omitNorms, but which boost we copy depends
+ // on whether the dest field already contains values (we
+ // don't want to apply the compounded docBoost more then once)
+ final float destBoost =
+ (destinationField.indexed() && !destinationField.omitNorms()) ?
+ (destHasValues ? fieldBoost : compoundBoost) : 1.0F;
+
+ addField(out, destinationField, val, destBoost);
+ // record the field as having a value
+ usedFields.add(destinationField.getName());
}
-
- // we can't copy any boost unless the dest field is
- // indexed & !omitNorms, but which boost we copy depends
- // on whether the dest field already contains values (we
- // don't want to apply the compounded docBoost more then once)
- final float destBoost =
- (destinationField.indexed() && !destinationField.omitNorms()) ?
- (destHasValues ? fieldBoost : compoundBoost) : 1.0F;
- addField(out, destinationField, val, destBoost);
+ // The final boost for a given field named is the product of the
+ // *all* boosts on values of that field.
+ // For multi-valued fields, we only want to set the boost on the
+ // first field.
+ fieldBoost = compoundBoost = 1.0f;
}
-
- // The final boost for a given field named is the product of the
- // *all* boosts on values of that field.
- // For multi-valued fields, we only want to set the boost on the
- // first field.
- fieldBoost = compoundBoost = 1.0f;
}
}
catch( SolrException ex ) {