You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2021/03/12 16:28:53 UTC
[lucene-solr] 01/03: LUCENE-9791 Allow calling BytesRefHash#find
concurrently (#8)
This is an automated email from the ASF dual-hosted git repository.
mikemccand pushed a commit to branch branch_8x
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git
commit f171417e99548b9f5aa84605809ee58228e1e91a
Author: pawel-bugalski-dynatrace <67...@users.noreply.github.com>
AuthorDate: Thu Mar 11 14:06:03 2021 +0100
LUCENE-9791 Allow calling BytesRefHash#find concurrently (#8)
Removes `scratch1` field in `BytesRefHash` by accessing underlying bytes pool directly
in `equals` method. As a result it is now possible to call `BytesRefHash#find`
concurrently as long as there are no concurrent modifications to BytesRefHash instance
and it is correctly published.
This addresses the concurrency issue with Monitor (aka Luwak) since it
is using `BytesRefHash#find` concurrently without additional synchronization.
---
.../java/org/apache/lucene/util/BytesRefHash.java | 27 ++++++---
.../org/apache/lucene/util/TestBytesRefHash.java | 69 +++++++++++++++++++++-
2 files changed, 87 insertions(+), 9 deletions(-)
diff --git a/lucene/core/src/java/org/apache/lucene/util/BytesRefHash.java b/lucene/core/src/java/org/apache/lucene/util/BytesRefHash.java
index da3178e..60d8135 100644
--- a/lucene/core/src/java/org/apache/lucene/util/BytesRefHash.java
+++ b/lucene/core/src/java/org/apache/lucene/util/BytesRefHash.java
@@ -43,11 +43,10 @@ import static org.apache.lucene.util.ByteBlockPool.BYTE_BLOCK_SIZE;
* @lucene.internal
*/
public final class BytesRefHash implements Accountable {
- private static final long BASE_RAM_BYTES = RamUsageEstimator.shallowSizeOfInstance(BytesRefHash.class) +
- // size of scratch1
- RamUsageEstimator.shallowSizeOfInstance(BytesRef.class) +
- // size of Counter
- RamUsageEstimator.primitiveSizes.get(long.class);
+ private static final long BASE_RAM_BYTES =
+ RamUsageEstimator.shallowSizeOfInstance(BytesRefHash.class) +
+ // size of Counter
+ RamUsageEstimator.primitiveSizes.get(long.class);
public static final int DEFAULT_CAPACITY = 16;
@@ -56,7 +55,6 @@ public final class BytesRefHash implements Accountable {
final ByteBlockPool pool;
int[] bytesStart;
- private final BytesRef scratch1 = new BytesRef();
private int hashSize;
private int hashHalfSize;
private int hashMask;
@@ -187,8 +185,21 @@ public final class BytesRefHash implements Accountable {
}
private boolean equals(int id, BytesRef b) {
- pool.setBytesRef(scratch1, bytesStart[id]);
- return scratch1.bytesEquals(b);
+ final int textStart = bytesStart[id];
+ final byte[] bytes = pool.buffers[textStart >> BYTE_BLOCK_SHIFT];
+ int pos = textStart & BYTE_BLOCK_MASK;
+ final int length;
+ final int offset;
+ if ((bytes[pos] & 0x80) == 0) {
+ // length is 1 byte
+ length = bytes[pos];
+ offset = pos + 1;
+ } else {
+ // length is 2 bytes
+ length = (bytes[pos] & 0x7f) + ((bytes[pos + 1] & 0xff) << 7);
+ offset = pos + 2;
+ }
+ return Arrays.equals(bytes, offset, offset + length, b.bytes, b.offset, b.offset + b.length);
}
private boolean shrink(int targetSize) {
diff --git a/lucene/core/src/test/org/apache/lucene/util/TestBytesRefHash.java b/lucene/core/src/test/org/apache/lucene/util/TestBytesRefHash.java
index f80aea4..9c6b796 100644
--- a/lucene/core/src/test/org/apache/lucene/util/TestBytesRefHash.java
+++ b/lucene/core/src/test/org/apache/lucene/util/TestBytesRefHash.java
@@ -16,15 +16,19 @@
*/
package org.apache.lucene.util;
-
+import java.util.ArrayList;
import java.util.BitSet;
import java.util.HashMap;
import java.util.HashSet;
+import java.util.List;
import java.util.Map.Entry;
import java.util.Map;
+import java.util.Map;
import java.util.Set;
import java.util.SortedSet;
import java.util.TreeSet;
+import java.util.concurrent.CountDownLatch;
+import java.util.concurrent.atomic.AtomicInteger;
import org.apache.lucene.util.BytesRefHash.MaxBytesLengthExceededException;
import org.junit.Before;
@@ -281,6 +285,69 @@ public class TestBytesRefHash extends LuceneTestCase {
}
}
+ @Test
+ public void testConcurrentAccessToBytesRefHash() throws Exception {
+ int num = atLeast(2);
+ for (int j = 0; j < num; j++) {
+ int numStrings = 797;
+ List<String> strings = new ArrayList<>(numStrings);
+ for (int i = 0; i < numStrings; i++) {
+ final String str = TestUtil.randomRealisticUnicodeString(random(), 1, 1000);
+ hash.add(new BytesRef(str));
+ assertTrue(strings.add(str));
+ }
+ int hashSize = hash.size();
+
+ AtomicInteger notFound = new AtomicInteger();
+ AtomicInteger notEquals = new AtomicInteger();
+ AtomicInteger wrongSize = new AtomicInteger();
+ int numThreads = atLeast(3);
+ CountDownLatch latch = new CountDownLatch(numThreads);
+ Thread[] threads = new Thread[numThreads];
+ for (int i = 0; i < threads.length; i++) {
+ int loops = atLeast(100);
+ threads[i] =
+ new Thread(
+ () -> {
+ BytesRef scratch = new BytesRef();
+ latch.countDown();
+ try {
+ latch.await();
+ } catch (InterruptedException e) {
+ Thread.currentThread().interrupt();
+ throw new RuntimeException(e);
+ }
+ for (int k = 0; k < loops; k++) {
+ BytesRef find = new BytesRef(strings.get(k % strings.size()));
+ int id = hash.find(find);
+ if (id < 0) {
+ notFound.incrementAndGet();
+ } else {
+ BytesRef get = hash.get(id, scratch);
+ if (!get.bytesEquals(find)) {
+ notEquals.incrementAndGet();
+ }
+ }
+ if (hash.size() != hashSize) {
+ wrongSize.incrementAndGet();
+ }
+ }
+ },
+ "t" + i);
+ }
+
+ for (Thread t : threads) t.start();
+ for (Thread t : threads) t.join();
+
+ assertEquals(0, notFound.get());
+ assertEquals(0, notEquals.get());
+ assertEquals(0, wrongSize.get());
+ hash.clear();
+ assertEquals(0, hash.size());
+ hash.reinit();
+ }
+ }
+
@Test(expected = MaxBytesLengthExceededException.class)
public void testLargeValue() {
int[] sizes = new int[] { random().nextInt(5),