You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@solr.apache.org by ds...@apache.org on 2022/11/04 14:00:21 UTC
[solr] branch main updated: [SOLR-16515] Remove synchronized access to cachedOrdMaps in SlowCompositeReaderWrapper (#1156)

This is an automated email from the ASF dual-hosted git repository.

dsmiley pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/solr.git


The following commit(s) were added to refs/heads/main by this push:
     new 53d90030d0c [SOLR-16515] Remove synchronized access to cachedOrdMaps in SlowCompositeReaderWrapper (#1156)
53d90030d0c is described below

commit 53d90030d0c432a4b1288100ced527615ed71d59
Author: Torsten Bøgh Köster <tb...@thiswayup.de>
AuthorDate: Fri Nov 4 15:00:14 2022 +0100

    [SOLR-16515] Remove synchronized access to cachedOrdMaps in SlowCompositeReaderWrapper (#1156)
    
    By using a ConcurrentHashMap underlying implementation and the {{ConcurrentHashMap#computeIfAbsent}} method to compute cache values, we were able to reduce locking contention significantly.
    
    Co-authored-by: Dennis Berger <db...@knownhosts.org>
    Co-authored-by: Torsten Bøgh Köster <tb...@thiswayup.de>
    Co-authored-by: Marco Petris <ma...@web.de>
---
 solr/CHANGES.txt                                   |   3 +-
 .../solr/index/SlowCompositeReaderWrapper.java     | 137 +++++++++++++++------
 2 files changed, 100 insertions(+), 40 deletions(-)

diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 5e36678fb07..8362dfca7f6 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -64,7 +64,8 @@ Improvements
 
 Optimizations
 ---------------------
-(No changes)
+
+* SOLR-16515: Remove synchronized access to cachedOrdMaps in SlowCompositeReaderWrapper (Dennis Berger, Torsten Bøgh Köster, Marco Petris)
 
 Bug Fixes
 ---------------------
diff --git a/solr/core/src/java/org/apache/solr/index/SlowCompositeReaderWrapper.java b/solr/core/src/java/org/apache/solr/index/SlowCompositeReaderWrapper.java
index 21396549f71..68197308ca8 100644
--- a/solr/core/src/java/org/apache/solr/index/SlowCompositeReaderWrapper.java
+++ b/solr/core/src/java/org/apache/solr/index/SlowCompositeReaderWrapper.java
@@ -17,9 +17,10 @@
 package org.apache.solr.index;
 
 import java.io.IOException;
-import java.util.HashMap;
+import java.util.List;
 import java.util.Map;
 import java.util.concurrent.ConcurrentHashMap;
+import java.util.function.Function;
 import org.apache.lucene.index.BinaryDocValues;
 import org.apache.lucene.index.CompositeReader;
 import org.apache.lucene.index.DirectoryReader;
@@ -49,6 +50,7 @@ import org.apache.lucene.index.VectorValues;
 import org.apache.lucene.search.TopDocs;
 import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.Version;
+import org.apache.lucene.util.packed.PackedInts;
 
 /**
  * This class forces a composite reader (eg a {@link MultiReader} or {@link DirectoryReader}) to
@@ -73,10 +75,9 @@ public final class SlowCompositeReaderWrapper extends LeafReader {
 
   final Map<String, Terms> cachedTerms = new ConcurrentHashMap<>();
 
-  // TODO: consider ConcurrentHashMap ?
   // TODO: this could really be a weak map somewhere else on the coreCacheKey,
   // but do we really need to optimize slow-wrapper any more?
-  final Map<String, OrdinalMap> cachedOrdMaps = new HashMap<>();
+  final Map<String, OrdinalMap> cachedOrdMaps = new ConcurrentHashMap<>();
 
   /**
    * This method is sugar for getting an {@link LeafReader} from an {@link IndexReader} of any kind.
@@ -175,23 +176,23 @@ public final class SlowCompositeReaderWrapper extends LeafReader {
   @Override
   public SortedDocValues getSortedDocValues(String field) throws IOException {
     ensureOpen();
-    OrdinalMap map = null;
-    synchronized (cachedOrdMaps) {
-      map = cachedOrdMaps.get(field);
-      if (map == null) {
-        // uncached, or not a multi dv
-        SortedDocValues dv = MultiDocValues.getSortedValues(in, field);
-        if (dv instanceof MultiSortedDocValues) {
-          map = ((MultiSortedDocValues) dv).mapping;
-          IndexReader.CacheHelper cacheHelper = getReaderCacheHelper();
-          if (cacheHelper != null && map.owner == cacheHelper.getKey()) {
-            cachedOrdMaps.put(field, map);
-          }
-        }
-        return dv;
-      }
+
+    // Integration of what was previously in MultiDocValues.getSortedValues:
+    // The purpose of this integration is to be able to construct a value producer which can always
+    // produce a value that is actually needed. The reason for the producer is to avoid getAndSet
+    // pitfalls in this multithreaded context.
+    // So all cases that do not lead to a cacheable value are handled upfront.
+    // We kept the semantics of MultiDocValues.getSortedValues.
+    final List<LeafReaderContext> leaves = in.leaves();
+    final int size = leaves.size();
+
+    if (size == 0) {
+      return null;
+    } else if (size == 1) {
+      return leaves.get(0).reader().getSortedDocValues(field);
     }
-    int size = in.leaves().size();
+
+    boolean anyReal = false;
     final SortedDocValues[] values = new SortedDocValues[size];
     final int[] starts = new int[size + 1];
     long totalCost = 0;
@@ -205,40 +206,68 @@ public final class SlowCompositeReaderWrapper extends LeafReader {
       SortedDocValues v = reader.getSortedDocValues(field);
       if (v == null) {
         v = DocValues.emptySorted();
+      } else {
+        anyReal = true;
       }
       totalCost += v.cost();
       values[i] = v;
       starts[i] = context.docBase;
     }
     starts[size] = maxDoc();
+    if (anyReal == false) {
+      return null;
+    }
+
+    // at this point in time we are able to formulate the producer
+    OrdinalMap map = null;
+    CacheHelper cacheHelper = getReaderCacheHelper();
+
+    Function<? super String, ? extends OrdinalMap> producer =
+        (notUsed) -> {
+          try {
+            OrdinalMap mapping =
+                OrdinalMap.build(
+                    cacheHelper == null ? null : cacheHelper.getKey(), values, PackedInts.DEFAULT);
+            return mapping;
+          } catch (IOException e) {
+            throw new RuntimeException(e);
+          }
+        };
+
+    // either we use a cached result that gets produced eventually during caching,
+    // or we produce directly without caching
+    if (cacheHelper != null) {
+      map = cachedOrdMaps.computeIfAbsent(field + cacheHelper.getKey(), producer);
+    } else {
+      map = producer.apply("notUsed");
+    }
+
     return new MultiSortedDocValues(values, starts, map, totalCost);
   }
 
   @Override
   public SortedSetDocValues getSortedSetDocValues(String field) throws IOException {
     ensureOpen();
-    OrdinalMap map = null;
-    synchronized (cachedOrdMaps) {
-      map = cachedOrdMaps.get(field);
-      if (map == null) {
-        // uncached, or not a multi dv
-        SortedSetDocValues dv = MultiDocValues.getSortedSetValues(in, field);
-        if (dv instanceof MultiDocValues.MultiSortedSetDocValues) {
-          map = ((MultiDocValues.MultiSortedSetDocValues) dv).mapping;
-          IndexReader.CacheHelper cacheHelper = getReaderCacheHelper();
-          if (cacheHelper != null && map.owner == cacheHelper.getKey()) {
-            cachedOrdMaps.put(field, map);
-          }
-        }
-        return dv;
-      }
+
+    // Integration of what was previously in MultiDocValues.getSortedSetValues:
+    // The purpose of this integration is to be able to construct a value producer which can always
+    // produce a value that is actually needed. The reason for the producer is to avoid getAndSet
+    // pitfalls in this multithreaded context.
+    // So all cases that do not lead to a cacheable value are handled upfront.
+    // We kept the semantics of MultiDocValues.getSortedSetValues.
+    final List<LeafReaderContext> leaves = in.leaves();
+    final int size = leaves.size();
+
+    if (size == 0) {
+      return null;
+    } else if (size == 1) {
+      return leaves.get(0).reader().getSortedSetDocValues(field);
     }
 
-    assert map != null;
-    int size = in.leaves().size();
+    boolean anyReal = false;
     final SortedSetDocValues[] values = new SortedSetDocValues[size];
     final int[] starts = new int[size + 1];
-    long cost = 0;
+    long totalCost = 0;
     for (int i = 0; i < size; i++) {
       LeafReaderContext context = in.leaves().get(i);
       final LeafReader reader = context.reader();
@@ -249,13 +278,43 @@ public final class SlowCompositeReaderWrapper extends LeafReader {
       SortedSetDocValues v = reader.getSortedSetDocValues(field);
       if (v == null) {
         v = DocValues.emptySortedSet();
+      } else {
+        anyReal = true;
       }
+      totalCost += v.cost();
       values[i] = v;
       starts[i] = context.docBase;
-      cost += v.cost();
     }
     starts[size] = maxDoc();
-    return new MultiDocValues.MultiSortedSetDocValues(values, starts, map, cost);
+    if (anyReal == false) {
+      return null;
+    }
+
+    // at this point in time we are able to formulate the producer
+    OrdinalMap map = null;
+    CacheHelper cacheHelper = getReaderCacheHelper();
+
+    Function<? super String, ? extends OrdinalMap> producer =
+        (notUsed) -> {
+          try {
+            OrdinalMap mapping =
+                OrdinalMap.build(
+                    cacheHelper == null ? null : cacheHelper.getKey(), values, PackedInts.DEFAULT);
+            return mapping;
+          } catch (IOException e) {
+            throw new RuntimeException(e);
+          }
+        };
+
+    // either we use a cached result that gets produced eventually during caching,
+    // or we produce directly without caching
+    if (cacheHelper != null) {
+      map = cachedOrdMaps.computeIfAbsent(field + cacheHelper.getKey(), producer);
+    } else {
+      map = producer.apply("notUsed");
+    }
+
+    return new MultiDocValues.MultiSortedSetDocValues(values, starts, map, totalCost);
   }
 
   @Override