You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by us...@apache.org on 2023/02/06 21:45:18 UTC
[lucene] branch branch_9x updated: Simplify LongHashSet by completely removing java.util.Set APIs (#12133)
This is an automated email from the ASF dual-hosted git repository.
uschindler pushed a commit to branch branch_9x
in repository https://gitbox.apache.org/repos/asf/lucene.git
The following commit(s) were added to refs/heads/branch_9x by this push:
new 84f22d5fefc Simplify LongHashSet by completely removing java.util.Set APIs (#12133)
84f22d5fefc is described below
commit 84f22d5fefc895d8e4808b830b1f242a14088f3b
Author: Uwe Schindler <us...@apache.org>
AuthorDate: Mon Feb 6 22:43:20 2023 +0100
Simplify LongHashSet by completely removing java.util.Set APIs (#12133)
---
lucene/CHANGES.txt | 4 +-
.../org/apache/lucene/document/LongHashSet.java | 45 ++++++----------------
.../apache/lucene/document/TestLongHashSet.java | 11 ++----
3 files changed, 19 insertions(+), 41 deletions(-)
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 7d0dbdf92a7..51fe306dbad 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -30,7 +30,9 @@ Optimizations
in order to achieve the same false positive probability with less memory.
(Jean-François Boeuf)
-* GITHUB#12118 Optimize FeatureQuery to TermQuery & weight when scoring is not required (Ben Trent, Robert Muir)
+* GITHUB#12118: Optimize FeatureQuery to TermQuery & weight when scoring is not required. (Ben Trent, Robert Muir)
+
+* GITHUB#12128, GITHUB#12133: Speed up docvalues set query by making use of sortedness. (Robert Muir, Uwe Schindler)
Bug Fixes
---------------------
diff --git a/lucene/core/src/java/org/apache/lucene/document/LongHashSet.java b/lucene/core/src/java/org/apache/lucene/document/LongHashSet.java
index e0e0b252a78..b9da19370df 100644
--- a/lucene/core/src/java/org/apache/lucene/document/LongHashSet.java
+++ b/lucene/core/src/java/org/apache/lucene/document/LongHashSet.java
@@ -17,9 +17,9 @@
package org.apache.lucene.document;
import java.util.Arrays;
-import java.util.HashSet;
import java.util.Objects;
-import java.util.Set;
+import java.util.stream.Collectors;
+import java.util.stream.LongStream;
import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.packed.PackedInts;
@@ -101,6 +101,15 @@ final class LongHashSet implements Accountable {
}
}
+ /** returns a stream of all values contained in this set */
+ LongStream stream() {
+ LongStream stream = Arrays.stream(table).filter(v -> v != MISSING);
+ if (hasMissingValue) {
+ stream = LongStream.concat(LongStream.of(MISSING), stream);
+ }
+ return stream;
+ }
+
@Override
public int hashCode() {
return Objects.hash(size, minValue, maxValue, mask, hasMissingValue, Arrays.hashCode(table));
@@ -122,23 +131,7 @@ final class LongHashSet implements Accountable {
@Override
public String toString() {
- StringBuilder sb = new StringBuilder("[");
- boolean seenValue = false;
- if (hasMissingValue) {
- sb.append(MISSING);
- seenValue = true;
- }
- for (long v : table) {
- if (v != MISSING) {
- if (seenValue) {
- sb.append(", ");
- }
- sb.append(v);
- seenValue = true;
- }
- }
- sb.append("]");
- return sb.toString();
+ return stream().mapToObj(String::valueOf).collect(Collectors.joining(", ", "[", "]"));
}
/** number of elements in the set */
@@ -150,18 +143,4 @@ final class LongHashSet implements Accountable {
public long ramBytesUsed() {
return BASE_RAM_BYTES + RamUsageEstimator.sizeOfObject(table);
}
-
- // for testing only
- Set<Long> toSet() {
- Set<Long> set = new HashSet<>();
- if (hasMissingValue) {
- set.add(MISSING);
- }
- for (long v : table) {
- if (v != MISSING) {
- set.add(v);
- }
- }
- return set;
- }
}
diff --git a/lucene/core/src/test/org/apache/lucene/document/TestLongHashSet.java b/lucene/core/src/test/org/apache/lucene/document/TestLongHashSet.java
index 19c4725da4d..534e49e961e 100644
--- a/lucene/core/src/test/org/apache/lucene/document/TestLongHashSet.java
+++ b/lucene/core/src/test/org/apache/lucene/document/TestLongHashSet.java
@@ -26,7 +26,7 @@ import org.apache.lucene.tests.util.LuceneTestCase;
public class TestLongHashSet extends LuceneTestCase {
private void assertEquals(Set<Long> set1, LongHashSet longHashSet) {
- Set<Long> set2 = longHashSet.toSet();
+ Set<Long> set2 = longHashSet.stream().boxed().collect(Collectors.toSet());
LuceneTestCase.assertEquals(set1, set2);
@@ -44,13 +44,13 @@ public class TestLongHashSet extends LuceneTestCase {
}
private void assertNotEquals(Set<Long> set1, LongHashSet longHashSet) {
- Set<Long> set2 = longHashSet.toSet();
+ Set<Long> set2 = longHashSet.stream().boxed().collect(Collectors.toSet());
LuceneTestCase.assertNotEquals(set1, set2);
LongHashSet set3 = new LongHashSet(set1.stream().mapToLong(Long::longValue).sorted().toArray());
- LuceneTestCase.assertNotEquals(set2, set3.toSet());
+ LuceneTestCase.assertNotEquals(set2, set3.stream().boxed().collect(Collectors.toSet()));
}
public void testEmpty() {
@@ -103,10 +103,7 @@ public class TestLongHashSet extends LuceneTestCase {
if (values.length > 0 && random().nextBoolean()) {
values[values.length / 2] = Long.MIN_VALUE;
}
- Set<Long> set1 =
- LongStream.of(values)
- .mapToObj(Long::valueOf)
- .collect(Collectors.toCollection(HashSet::new));
+ Set<Long> set1 = LongStream.of(values).mapToObj(Long::valueOf).collect(Collectors.toSet());
Arrays.sort(values);
LongHashSet set2 = new LongHashSet(values);
assertEquals(set1, set2);