You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by no...@apache.org on 2019/12/13 06:43:13 UTC

[lucene-solr] branch branch_8x updated: SOLR-14013: javabin performance regressions

This is an automated email from the ASF dual-hosted git repository.

noble pushed a commit to branch branch_8x
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git


The following commit(s) were added to refs/heads/branch_8x by this push:
     new 9717540  SOLR-14013: javabin performance regressions
9717540 is described below

commit 9717540b8ecb6f5e142aaef8e27464690684a0f9
Author: noble <no...@apache.org>
AuthorDate: Fri Dec 13 17:42:24 2019 +1100

    SOLR-14013: javabin performance regressions
---
 solr/CHANGES.txt                                   |  2 +
 .../LanguageIdentifierUpdateProcessor.java         | 22 +++----
 ...geIdentifierUpdateProcessorFactoryTestCase.java | 21 -------
 .../solr/handler/component/HttpShardHandler.java   |  9 +--
 .../apache/solr/handler/loader/JavabinLoader.java  |  1 -
 .../org/apache/solr/response/DocsStreamer.java     |  7 ++-
 .../java/org/apache/solr/schema/IndexSchema.java   |  5 +-
 .../org/apache/solr/update/DocumentBuilder.java    |  4 +-
 .../processor/AtomicUpdateDocumentMerger.java      |  2 +-
 .../cloud/TestDynamicFieldNamesIndexCorrectly.java |  2 +-
 .../java/org/apache/solr/common/SolrDocument.java  | 10 ++--
 .../org/apache/solr/common/SolrInputField.java     | 68 +++-------------------
 .../org/apache/solr/common/util/JavaBinCodec.java  |  2 +-
 .../solrj/embedded/SolrExampleJettyTest.java       | 19 ++++++
 14 files changed, 59 insertions(+), 115 deletions(-)

diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index a8c1e05..4e08370 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -237,6 +237,8 @@ Bug Fixes
 
 * SOLR-14071: Untrusted configsets cannot use <lib> directive (Ishan Chattopadhyaya)
 
+* SOLR-14013: FIX: javabin performance regressions (noble, yonik, Houston Putman)
+
 Other Changes
 ---------------------
 
diff --git a/solr/contrib/langid/src/java/org/apache/solr/update/processor/LanguageIdentifierUpdateProcessor.java b/solr/contrib/langid/src/java/org/apache/solr/update/processor/LanguageIdentifierUpdateProcessor.java
index 8528a7e..487d37c 100644
--- a/solr/contrib/langid/src/java/org/apache/solr/update/processor/LanguageIdentifierUpdateProcessor.java
+++ b/solr/contrib/langid/src/java/org/apache/solr/update/processor/LanguageIdentifierUpdateProcessor.java
@@ -16,6 +16,16 @@
  */
 package org.apache.solr.update.processor;
 
+import java.io.IOException;
+import java.io.Reader;
+import java.lang.invoke.MethodHandles;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.regex.Pattern;
+
 import org.apache.solr.common.SolrException;
 import org.apache.solr.common.SolrException.ErrorCode;
 import org.apache.solr.common.SolrInputDocument;
@@ -29,16 +39,6 @@ import org.apache.solr.update.AddUpdateCommand;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-import java.io.IOException;
-import java.io.Reader;
-import java.lang.invoke.MethodHandles;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.List;
-import java.util.regex.Pattern;
-
 
 /**
  * <p>
@@ -229,7 +229,7 @@ public abstract class LanguageIdentifierUpdateProcessor extends UpdateRequestPro
       }
     } else {
       // langField is set, we sanity check it against whitelist and fallback
-      docLang = resolveLanguage((String) doc.getFieldValue(langField), fallbackLang);
+      docLang = resolveLanguage(doc.getFieldValue(langField).toString(), fallbackLang);
       docLangs.add(docLang);
       log.debug("Field "+langField+" already contained value "+docLang+", not overwriting.");
     }
diff --git a/solr/contrib/langid/src/test/org/apache/solr/update/processor/LanguageIdentifierUpdateProcessorFactoryTestCase.java b/solr/contrib/langid/src/test/org/apache/solr/update/processor/LanguageIdentifierUpdateProcessorFactoryTestCase.java
index a74c80f..4b19900 100644
--- a/solr/contrib/langid/src/test/org/apache/solr/update/processor/LanguageIdentifierUpdateProcessorFactoryTestCase.java
+++ b/solr/contrib/langid/src/test/org/apache/solr/update/processor/LanguageIdentifierUpdateProcessorFactoryTestCase.java
@@ -17,13 +17,11 @@
 package org.apache.solr.update.processor;
 
 import java.util.ArrayList;
-import java.util.Collection;
 import java.util.List;
 
 import org.apache.solr.SolrTestCaseJ4;
 import org.apache.solr.common.SolrInputDocument;
 import org.apache.solr.common.params.ModifiableSolrParams;
-import org.apache.solr.common.util.ByteArrayUtf8CharSequence;
 import org.apache.solr.core.SolrCore;
 import org.apache.solr.response.SolrQueryResponse;
 import org.apache.solr.servlet.SolrRequestParsers;
@@ -386,25 +384,6 @@ public abstract class LanguageIdentifierUpdateProcessorFactoryTestCase extends S
    */
   private SolrInputDocument process(SolrInputDocument origDoc) {
     SolrInputDocument modifiedDoc = origDoc.deepCopy();
-    if (random().nextBoolean()) {
-      modifiedDoc.forEach((s, f) -> {
-        Object rawVal = f.getRawValue();
-        if (rawVal instanceof Collection) {
-          Collection rawValue = (Collection) rawVal;
-          ArrayList<Object> newVal = new ArrayList<>(rawValue.size());
-          for (Object o : rawValue) {
-            if (o instanceof String) {
-              newVal.add(new ByteArrayUtf8CharSequence((String) o));
-            } else {
-              newVal.add(rawVal);
-            }
-          }
-          f.setValue(newVal);
-        } else if (rawVal instanceof String) {
-          f.setValue(new ByteArrayUtf8CharSequence((String) rawVal));
-        }
-      });
-    }
     liProcessor.process(modifiedDoc);
     return modifiedDoc;
   }
diff --git a/solr/core/src/java/org/apache/solr/handler/component/HttpShardHandler.java b/solr/core/src/java/org/apache/solr/handler/component/HttpShardHandler.java
index 69da394..8f8c48e 100644
--- a/solr/core/src/java/org/apache/solr/handler/component/HttpShardHandler.java
+++ b/solr/core/src/java/org/apache/solr/handler/component/HttpShardHandler.java
@@ -43,8 +43,8 @@ import org.apache.solr.client.solrj.SolrServerException;
 import org.apache.solr.client.solrj.impl.BinaryResponseParser;
 import org.apache.solr.client.solrj.impl.Http2SolrClient;
 import org.apache.solr.client.solrj.impl.LBSolrClient;
-import org.apache.solr.client.solrj.routing.ReplicaListTransformer;
 import org.apache.solr.client.solrj.request.QueryRequest;
+import org.apache.solr.client.solrj.routing.ReplicaListTransformer;
 import org.apache.solr.client.solrj.util.ClientUtils;
 import org.apache.solr.cloud.CloudDescriptor;
 import org.apache.solr.cloud.ZkController;
@@ -71,8 +71,6 @@ import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import org.slf4j.MDC;
 
-import static org.apache.solr.handler.component.ShardRequest.PURPOSE_GET_FIELDS;
-
 public class HttpShardHandler extends ShardHandler {
   
   /**
@@ -181,10 +179,7 @@ public class HttpShardHandler extends ShardHandler {
         SolrRequestInfo requestInfo = SolrRequestInfo.getRequestInfo();
         if (requestInfo != null) req.setUserPrincipal(requestInfo.getReq().getUserPrincipal());
 
-        if (sreq.purpose == PURPOSE_GET_FIELDS) {
-          req.setResponseParser(READ_STR_AS_CHARSEQ_PARSER);
-        }
-        // no need to set the response parser as binary is the default
+        // no need to set the response parser as binary is the defaultJab
         // req.setResponseParser(new BinaryResponseParser());
 
         // if there are no shards available for a slice, urls.size()==0
diff --git a/solr/core/src/java/org/apache/solr/handler/loader/JavabinLoader.java b/solr/core/src/java/org/apache/solr/handler/loader/JavabinLoader.java
index 8bbbde2..aa4b4f9 100644
--- a/solr/core/src/java/org/apache/solr/handler/loader/JavabinLoader.java
+++ b/solr/core/src/java/org/apache/solr/handler/loader/JavabinLoader.java
@@ -119,7 +119,6 @@ public class JavabinLoader extends ContentStreamLoader {
       if (in.peek() == -1) return;
       try {
         update = new JavaBinUpdateRequestCodec()
-            .setReadStringAsCharSeq(true)
             .unmarshal(in, handler);
       } catch (EOFException e) {
         break; // this is expected
diff --git a/solr/core/src/java/org/apache/solr/response/DocsStreamer.java b/solr/core/src/java/org/apache/solr/response/DocsStreamer.java
index 3d1976e..c6010e1 100644
--- a/solr/core/src/java/org/apache/solr/response/DocsStreamer.java
+++ b/solr/core/src/java/org/apache/solr/response/DocsStreamer.java
@@ -49,8 +49,8 @@ import org.apache.solr.schema.TrieIntField;
 import org.apache.solr.schema.TrieLongField;
 import org.apache.solr.search.DocIterator;
 import org.apache.solr.search.DocList;
-import org.apache.solr.search.SolrDocumentFetcher;
 import org.apache.solr.search.ReturnFields;
+import org.apache.solr.search.SolrDocumentFetcher;
 import org.apache.solr.search.SolrReturnFields;
 
 /**
@@ -148,9 +148,10 @@ public class DocsStreamer implements Iterator<SolrDocument> {
     // because that doesn't include extra fields needed by transformers
     final Set<String> fieldNamesNeeded = fields.getLuceneFieldNames();
 
+    BinaryResponseWriter.MaskCharSeqSolrDocument masked = null;
     final SolrDocument out = ResultContext.READASBYTES.get() == null ?
         new SolrDocument() :
-        new BinaryResponseWriter.MaskCharSeqSolrDocument();
+        (masked = new BinaryResponseWriter.MaskCharSeqSolrDocument());
 
     // NOTE: it would be tempting to try and optimize this to loop over fieldNamesNeeded
     // when it's smaller then the IndexableField[] in the Document -- but that's actually *less* effecient
@@ -160,7 +161,7 @@ public class DocsStreamer implements Iterator<SolrDocument> {
       final String fname = f.name();
       if (null == fieldNamesNeeded || fieldNamesNeeded.contains(fname) ) {
         // Make sure multivalued fields are represented as lists
-        Object existing = out.get(fname);
+        Object existing = masked == null ? out.get(fname) : masked.getRaw(fname);
         if (existing == null) {
           SchemaField sf = schema.getFieldOrNull(fname);
           if (sf != null && sf.multiValued()) {
diff --git a/solr/core/src/java/org/apache/solr/schema/IndexSchema.java b/solr/core/src/java/org/apache/solr/schema/IndexSchema.java
index 02168f1..5f213d3 100644
--- a/solr/core/src/java/org/apache/solr/schema/IndexSchema.java
+++ b/solr/core/src/java/org/apache/solr/schema/IndexSchema.java
@@ -38,7 +38,6 @@ import java.util.Set;
 import java.util.SortedMap;
 import java.util.TreeMap;
 import java.util.TreeSet;
-import java.util.concurrent.ConcurrentHashMap;
 import java.util.function.Function;
 import java.util.regex.Pattern;
 import java.util.stream.Collectors;
@@ -57,6 +56,7 @@ import org.apache.solr.common.params.CommonParams;
 import org.apache.solr.common.params.MapSolrParams;
 import org.apache.solr.common.params.ModifiableSolrParams;
 import org.apache.solr.common.params.SolrParams;
+import org.apache.solr.common.util.Cache;
 import org.apache.solr.common.util.NamedList;
 import org.apache.solr.common.util.Pair;
 import org.apache.solr.common.util.SimpleOrderedMap;
@@ -68,6 +68,7 @@ import org.apache.solr.response.SchemaXmlWriter;
 import org.apache.solr.response.SolrQueryResponse;
 import org.apache.solr.search.similarities.SchemaSimilarityFactory;
 import org.apache.solr.uninverting.UninvertingReader;
+import org.apache.solr.util.ConcurrentLRUCache;
 import org.apache.solr.util.DOMUtil;
 import org.apache.solr.util.PayloadUtils;
 import org.apache.solr.util.plugin.SolrCoreAware;
@@ -141,7 +142,7 @@ public class IndexSchema {
   protected DynamicField[] dynamicFields = new DynamicField[] {};
   public DynamicField[] getDynamicFields() { return dynamicFields; }
 
-  protected Map<String, SchemaField> dynamicFieldCache = new ConcurrentHashMap<>();
+  protected Cache<String, SchemaField> dynamicFieldCache = new ConcurrentLRUCache(10000, 8000, 9000,100, false,false, null);
 
   private Analyzer indexAnalyzer;
   private Analyzer queryAnalyzer;
diff --git a/solr/core/src/java/org/apache/solr/update/DocumentBuilder.java b/solr/core/src/java/org/apache/solr/update/DocumentBuilder.java
index 543a4cb..64c6253 100644
--- a/solr/core/src/java/org/apache/solr/update/DocumentBuilder.java
+++ b/solr/core/src/java/org/apache/solr/update/DocumentBuilder.java
@@ -135,7 +135,7 @@ public class DocumentBuilder {
     // Load fields from SolrDocument to Document
     for( SolrInputField field : doc ) {
 
-      if (field.getFirstRawValue() instanceof SolrDocumentBase) {
+      if (field.getFirstValue() instanceof SolrDocumentBase) {
         if (ignoreNestedDocs) {
           continue;
         }
@@ -159,7 +159,7 @@ public class DocumentBuilder {
       // load each field value
       boolean hasField = false;
       try {
-        Iterator it = field.getRawIterator();
+        Iterator it = field.iterator();
         while (it.hasNext()) {
           Object v = it.next();
           if( v == null ) {
diff --git a/solr/core/src/java/org/apache/solr/update/processor/AtomicUpdateDocumentMerger.java b/solr/core/src/java/org/apache/solr/update/processor/AtomicUpdateDocumentMerger.java
index 0f5a3a6..8bd51c2 100644
--- a/solr/core/src/java/org/apache/solr/update/processor/AtomicUpdateDocumentMerger.java
+++ b/solr/core/src/java/org/apache/solr/update/processor/AtomicUpdateDocumentMerger.java
@@ -412,7 +412,7 @@ public class AtomicUpdateDocumentMerger {
    */
   public SolrInputDocument updateDocInSif(SolrInputField updateSif, SolrInputDocument cmdDocWChildren, SolrInputDocument updateDoc) {
     List sifToReplaceValues = (List) updateSif.getValues();
-    final boolean wasList = updateSif.getRawValue() instanceof Collection;
+    final boolean wasList = updateSif.getValue() instanceof Collection;
     int index = getDocIndexFromCollection(cmdDocWChildren, sifToReplaceValues);
     SolrInputDocument updatedDoc = merge(updateDoc, cmdDocWChildren);
     if(index == -1) {
diff --git a/solr/core/src/test/org/apache/solr/cloud/TestDynamicFieldNamesIndexCorrectly.java b/solr/core/src/test/org/apache/solr/cloud/TestDynamicFieldNamesIndexCorrectly.java
index c6ac56b..ddb558a 100644
--- a/solr/core/src/test/org/apache/solr/cloud/TestDynamicFieldNamesIndexCorrectly.java
+++ b/solr/core/src/test/org/apache/solr/cloud/TestDynamicFieldNamesIndexCorrectly.java
@@ -89,7 +89,7 @@ public class TestDynamicFieldNamesIndexCorrectly extends AbstractFullDistribZkTe
     Iterator<SolrInputDocument> it = solrDocs.iterator();
     while (it.hasNext()) {
       final SolrInputDocument inDoc = it.next();
-      final String id = inDoc.getField("id").getRawValue().toString();
+      final String id = inDoc.getField("id").getValue().toString();
       final SolrDocument resultDoc = resultMap.get(id);
       final Collection<String> resultFieldNames = resultDoc.getFieldNames();
       inDoc
diff --git a/solr/solrj/src/java/org/apache/solr/common/SolrDocument.java b/solr/solrj/src/java/org/apache/solr/common/SolrDocument.java
index fd56d9d..d7b268b 100644
--- a/solr/solrj/src/java/org/apache/solr/common/SolrDocument.java
+++ b/solr/solrj/src/java/org/apache/solr/common/SolrDocument.java
@@ -28,8 +28,6 @@ import java.util.Set;
 
 import org.apache.solr.common.util.NamedList;
 
-import static org.apache.solr.common.util.ByteArrayUtf8CharSequence.convertCharSeq;
-
 
 /**
  * A concrete representation of a document within a Solr index.  Unlike a lucene
@@ -289,14 +287,14 @@ public class SolrDocument extends SolrDocumentBase<Object, SolrDocument> impleme
       /** Get the field Value */
       @Override
       public Object get(Object key) { 
-        return convertCharSeq(getFirstValue( (String)key));
+        return getFirstValue( (String)key);
       }
       
       // Easily Supported methods
       @Override
       public boolean containsKey(Object key) { return _fields.containsKey( key ); }
       @Override
-      public Set<String>  keySet()           { return (Set<String>) convertCharSeq(_fields.keySet());  }
+      public Set<String>  keySet()           { return _fields.keySet();  }
       @Override
       public int          size()             { return _fields.size();    }
       @Override
@@ -368,7 +366,7 @@ public class SolrDocument extends SolrDocumentBase<Object, SolrDocument> impleme
 
   @Override
   public Object remove(Object key) {
-    return convertCharSeq(_fields.remove(key));
+    return _fields.remove(key);
   }
 
   @Override
@@ -378,7 +376,7 @@ public class SolrDocument extends SolrDocumentBase<Object, SolrDocument> impleme
 
   @Override
   public Collection<Object> values() {
-    return convertCharSeq(_fields.values());
+    return _fields.values();
   }
 
   @Override
diff --git a/solr/solrj/src/java/org/apache/solr/common/SolrInputField.java b/solr/solrj/src/java/org/apache/solr/common/SolrInputField.java
index 8b4add6..db36b3e 100644
--- a/solr/solrj/src/java/org/apache/solr/common/SolrInputField.java
+++ b/solr/solrj/src/java/org/apache/solr/common/SolrInputField.java
@@ -21,8 +21,6 @@ import java.util.ArrayList;
 import java.util.Collection;
 import java.util.Iterator;
 
-import static org.apache.solr.common.util.ByteArrayUtf8CharSequence.convertCharSeq;
-
 /**
  *
  * @since solr 1.3
@@ -107,17 +105,16 @@ public class SolrInputField implements Iterable<Object>, Serializable
 
   //---------------------------------------------------------------
   //---------------------------------------------------------------
-  
-  @SuppressWarnings("unchecked")
+
   public Object getFirstValue() {
-    if( value instanceof Collection ) {
-      Collection c = (Collection<Object>)value;
-      if( c.size() > 0 ) {
-        return convertCharSeq(c.iterator().next());
+    if (value instanceof Collection) {
+      Collection c = (Collection<Object>) value;
+      if (c.size() > 0) {
+        return c.iterator().next();
       }
       return null;
     }
-    return convertCharSeq(value);
+    return value;
   }
 
   /**
@@ -125,28 +122,6 @@ public class SolrInputField implements Iterable<Object>, Serializable
    * will be a collection.
    */
   public Object getValue() {
-    return convertCharSeq(value);
-  }
-
-
-  /**
-   * Return a value as is without converting and CharSequence Objects
-   */
-  public Object getRawValue() {
-    return value;
-  }
-
-  /**
-   * Return the first value as is without converting and CharSequence Objects
-   */
-  public Object getFirstRawValue() {
-    if (value instanceof Collection) {
-      Collection c = (Collection<Object>) value;
-      if (c.size() > 0) {
-        return c.iterator().next();
-      }
-      return null;
-    }
     return value;
   }
 
@@ -157,11 +132,11 @@ public class SolrInputField implements Iterable<Object>, Serializable
   @SuppressWarnings("unchecked")
   public Collection<Object> getValues() {
     if (value instanceof Collection) {
-      return convertCharSeq((Collection<Object>) value);
+      return (Collection<Object>) value;
     }
     if( value != null ) {
       Collection<Object> vals = new ArrayList<>(1);
-      vals.add(convertCharSeq(value));
+      vals.add(value);
       return vals;
     }
     return null;
@@ -189,33 +164,8 @@ public class SolrInputField implements Iterable<Object>, Serializable
   }
 
   @Override
-  public Iterator<Object> iterator(){
-    if( value instanceof Collection ) {
-      return (convertCharSeq ((Collection)value)).iterator();
-    }
-    return new Iterator<Object>() {
-      boolean nxt = (value!=null);
-
-      @Override
-      public boolean hasNext() {
-        return nxt;
-      }
-
-      @Override
-      public Object next() {
-        nxt = false;
-        return convertCharSeq(value);
-      }
-
-      @Override
-      public void remove() {
-        throw new UnsupportedOperationException();
-      }
-    };
-
-  }
   @SuppressWarnings("unchecked")
-  public Iterator<Object> getRawIterator() {
+  public Iterator<Object> iterator(){
     if( value instanceof Collection ) {
       return ((Collection)value).iterator();
     }
diff --git a/solr/solrj/src/java/org/apache/solr/common/util/JavaBinCodec.java b/solr/solrj/src/java/org/apache/solr/common/util/JavaBinCodec.java
index a4167d8..f5b087f 100644
--- a/solr/solrj/src/java/org/apache/solr/common/util/JavaBinCodec.java
+++ b/solr/solrj/src/java/org/apache/solr/common/util/JavaBinCodec.java
@@ -362,7 +362,7 @@ public class JavaBinCodec implements PushWriter {
       return true;
     }
     if (val instanceof SolrInputField) {
-      return writeKnownType(((SolrInputField) val).getRawValue());
+      return writeKnownType(((SolrInputField) val).getValue());
     }
     if (val instanceof IteratorWriter) {
       writeIterator((IteratorWriter) val);
diff --git a/solr/solrj/src/test/org/apache/solr/client/solrj/embedded/SolrExampleJettyTest.java b/solr/solrj/src/test/org/apache/solr/client/solrj/embedded/SolrExampleJettyTest.java
index db0bd28..ee747e3 100644
--- a/solr/solrj/src/test/org/apache/solr/client/solrj/embedded/SolrExampleJettyTest.java
+++ b/solr/solrj/src/test/org/apache/solr/client/solrj/embedded/SolrExampleJettyTest.java
@@ -23,6 +23,8 @@ import java.nio.charset.StandardCharsets;
 import java.util.ArrayList;
 import java.util.List;
 import java.util.Map;
+import java.util.stream.Collectors;
+import java.util.stream.IntStream;
 
 import org.apache.commons.io.IOUtils;
 import org.apache.http.HttpResponse;
@@ -107,6 +109,23 @@ public class SolrExampleJettyTest extends SolrExampleTests {
         baseURL + "/update/json/docs";
   }
 
+  @Test
+  public void testUtf8PerfDegradation() throws Exception {
+    SolrInputDocument doc = new SolrInputDocument();
+
+    doc.addField("id", "1");
+    doc.addField("b_is", IntStream.range(0, 30000).boxed().collect(Collectors.toList()));
+
+    HttpSolrClient client = (HttpSolrClient) getSolrClient();
+    client.add(doc);
+    client.commit();
+    long start = System.nanoTime();
+    QueryResponse rsp = client.query(new SolrQuery("*:*"));
+    System.out.println("time taken : " + ((System.nanoTime() - start)) / (1000 * 1000));
+    assertEquals(1, rsp.getResults().getNumFound());
+
+  }
+
   @Ignore
   public void testUtf8QueryPerf() throws Exception {
     HttpSolrClient client = (HttpSolrClient) getSolrClient();