You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@solr.apache.org by ma...@apache.org on 2024/03/05 21:30:49 UTC
(solr) branch branch_9x updated: SOLR-17190: Replace org.apache.solr.util.LongSet with hppc LongHashSet (#2328)
This is an automated email from the ASF dual-hosted git repository.
magibney pushed a commit to branch branch_9x
in repository https://gitbox.apache.org/repos/asf/solr.git
The following commit(s) were added to refs/heads/branch_9x by this push:
new 7c0fc6f8164 SOLR-17190: Replace org.apache.solr.util.LongSet with hppc LongHashSet (#2328)
7c0fc6f8164 is described below
commit 7c0fc6f8164ad8ade9db74a1646efd147ee32874
Author: Michael Gibney <mi...@michaelgibney.net>
AuthorDate: Tue Mar 5 16:24:13 2024 -0500
SOLR-17190: Replace org.apache.solr.util.LongSet with hppc LongHashSet (#2328)
(cherry picked from commit 4a02d1a07e85313dc04aa07fbc483b214bdb0588)
---
solr/CHANGES.txt | 2 +
.../handler/component/RealTimeGetComponent.java | 5 +-
.../org/apache/solr/search/facet/UniqueAgg.java | 14 +-
.../solr/search/join/GraphPointsCollector.java | 37 +++---
.../src/java/org/apache/solr/update/UpdateLog.java | 5 +-
.../src/java/org/apache/solr/util/LongSet.java | 144 ---------------------
.../src/test/org/apache/solr/util/LongSetTest.java | 51 ++++----
7 files changed, 60 insertions(+), 198 deletions(-)
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 3550421a985..58df3288061 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -67,6 +67,8 @@ Other Changes
* SOLR-17066: GenericSolrRequest now has a `setRequiresCollection` setter that allows it to specify whether
it should make use of the client-level default collection/core. (Jason Gerlowski)
+* SOLR-17190: Replace org.apache.solr.util.LongSet with hppc LongHashSet (Michael Gibney)
+
================== 9.5.0 ==================
New Features
---------------------
diff --git a/solr/core/src/java/org/apache/solr/handler/component/RealTimeGetComponent.java b/solr/core/src/java/org/apache/solr/handler/component/RealTimeGetComponent.java
index 1f098bd8ca0..33fa334758a 100644
--- a/solr/core/src/java/org/apache/solr/handler/component/RealTimeGetComponent.java
+++ b/solr/core/src/java/org/apache/solr/handler/component/RealTimeGetComponent.java
@@ -21,6 +21,8 @@ import static org.apache.solr.common.params.CommonParams.ID;
import static org.apache.solr.common.params.CommonParams.VERSION_FIELD;
import static org.apache.solr.search.QueryUtils.makeQueryable;
+import com.carrotsearch.hppc.LongHashSet;
+import com.carrotsearch.hppc.LongSet;
import java.io.IOException;
import java.lang.invoke.MethodHandles;
import java.util.ArrayList;
@@ -94,7 +96,6 @@ import org.apache.solr.update.PeerSync;
import org.apache.solr.update.PeerSyncWithLeader;
import org.apache.solr.update.UpdateLog;
import org.apache.solr.update.processor.AtomicUpdateDocumentMerger;
-import org.apache.solr.util.LongSet;
import org.apache.solr.util.RefCounted;
import org.apache.solr.util.TestInjection;
import org.slf4j.Logger;
@@ -1342,7 +1343,7 @@ public class RealTimeGetComponent extends SearchComponent {
// TODO: get this from cache instead of rebuilding?
try (UpdateLog.RecentUpdates recentUpdates = ulog.getRecentUpdates()) {
- LongSet updateVersions = new LongSet(versions.size());
+ LongSet updateVersions = new LongHashSet(versions.size());
for (Long version : versions) {
try {
Object o = recentUpdates.lookup(version);
diff --git a/solr/core/src/java/org/apache/solr/search/facet/UniqueAgg.java b/solr/core/src/java/org/apache/solr/search/facet/UniqueAgg.java
index acf8596f1e5..6f0cbad1088 100644
--- a/solr/core/src/java/org/apache/solr/search/facet/UniqueAgg.java
+++ b/solr/core/src/java/org/apache/solr/search/facet/UniqueAgg.java
@@ -16,6 +16,9 @@
*/
package org.apache.solr.search.facet;
+import com.carrotsearch.hppc.LongHashSet;
+import com.carrotsearch.hppc.LongSet;
+import com.carrotsearch.hppc.cursors.LongCursor;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
@@ -27,8 +30,6 @@ import org.apache.lucene.index.SortedNumericDocValues;
import org.apache.solr.common.util.CollectionUtil;
import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.schema.SchemaField;
-import org.apache.solr.util.LongIterator;
-import org.apache.solr.util.LongSet;
public class UniqueAgg extends StrAggValueSource {
public static final String UNIQUE = "unique";
@@ -146,7 +147,7 @@ public class UniqueAgg extends StrAggValueSource {
protected void collectValues(int doc, int slot) throws IOException {
LongSet set = sets[slot];
if (set == null) {
- set = sets[slot] = new LongSet(16);
+ set = sets[slot] = new LongHashSet(16);
}
collectValues(doc, set);
}
@@ -172,7 +173,7 @@ public class UniqueAgg extends StrAggValueSource {
*/
private int getCardinality(int slot) {
LongSet set = sets[slot];
- return set == null ? 0 : set.cardinality();
+ return set == null ? 0 : set.size();
}
public Object getShardValue(int slot) throws IOException {
@@ -188,9 +189,8 @@ public class UniqueAgg extends StrAggValueSource {
if (unique <= maxExplicit) {
List<Long> lst = new ArrayList<>(Math.min(unique, maxExplicit));
if (set != null) {
- LongIterator iter = set.iterator();
- while (iter.hasNext()) {
- lst.add(iter.next());
+ for (LongCursor v : set) {
+ lst.add(v.value);
}
}
map.add(VALS, lst);
diff --git a/solr/core/src/java/org/apache/solr/search/join/GraphPointsCollector.java b/solr/core/src/java/org/apache/solr/search/join/GraphPointsCollector.java
index 2ad2455e540..e02d003e15f 100644
--- a/solr/core/src/java/org/apache/solr/search/join/GraphPointsCollector.java
+++ b/solr/core/src/java/org/apache/solr/search/join/GraphPointsCollector.java
@@ -17,6 +17,9 @@
package org.apache.solr.search.join;
+import com.carrotsearch.hppc.LongHashSet;
+import com.carrotsearch.hppc.LongSet;
+import com.carrotsearch.hppc.cursors.LongCursor;
import java.io.IOException;
import org.apache.lucene.document.DoublePoint;
import org.apache.lucene.document.FloatPoint;
@@ -30,14 +33,12 @@ import org.apache.lucene.util.NumericUtils;
import org.apache.solr.schema.NumberType;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.search.DocSet;
-import org.apache.solr.util.LongIterator;
-import org.apache.solr.util.LongSet;
/**
* @lucene.internal
*/
public class GraphPointsCollector extends GraphEdgeCollector {
- final LongSet set = new LongSet(256);
+ final LongSet set = new LongHashSet(256);
SortedNumericDocValues values = null;
@@ -70,7 +71,7 @@ public class GraphPointsCollector extends GraphEdgeCollector {
@Override
public Query getResultQuery(SchemaField matchField, boolean useAutomaton) {
- if (set.cardinality() == 0) return null;
+ if (set.isEmpty()) return null;
Query q = null;
@@ -81,38 +82,34 @@ public class GraphPointsCollector extends GraphEdgeCollector {
boolean multiValued = collectField.multiValued();
if (ntype == NumberType.LONG || ntype == NumberType.DATE) {
- long[] vals = new long[set.cardinality()];
+ long[] vals = new long[set.size()];
int i = 0;
- for (LongIterator iter = set.iterator(); iter.hasNext(); ) {
- long bits = iter.next();
- long v = bits;
- vals[i++] = v;
+ for (LongCursor c : set) {
+ vals[i++] = c.value;
}
q = LongPoint.newSetQuery(matchField.getName(), vals);
} else if (ntype == NumberType.INTEGER) {
- int[] vals = new int[set.cardinality()];
+ int[] vals = new int[set.size()];
int i = 0;
- for (LongIterator iter = set.iterator(); iter.hasNext(); ) {
- long bits = iter.next();
- int v = (int) bits;
- vals[i++] = v;
+ for (LongCursor c : set) {
+ vals[i++] = (int) c.value;
}
q = IntPoint.newSetQuery(matchField.getName(), vals);
} else if (ntype == NumberType.DOUBLE) {
- double[] vals = new double[set.cardinality()];
+ double[] vals = new double[set.size()];
int i = 0;
- for (LongIterator iter = set.iterator(); iter.hasNext(); ) {
- long bits = iter.next();
+ for (LongCursor c : set) {
+ long bits = c.value;
double v =
multiValued ? NumericUtils.sortableLongToDouble(bits) : Double.longBitsToDouble(bits);
vals[i++] = v;
}
q = DoublePoint.newSetQuery(matchField.getName(), vals);
} else if (ntype == NumberType.FLOAT) {
- float[] vals = new float[set.cardinality()];
+ float[] vals = new float[set.size()];
int i = 0;
- for (LongIterator iter = set.iterator(); iter.hasNext(); ) {
- long bits = iter.next();
+ for (LongCursor c : set) {
+ long bits = c.value;
float v =
multiValued
? NumericUtils.sortableIntToFloat((int) bits)
diff --git a/solr/core/src/java/org/apache/solr/update/UpdateLog.java b/solr/core/src/java/org/apache/solr/update/UpdateLog.java
index c13530891fd..5f2d925c14e 100644
--- a/solr/core/src/java/org/apache/solr/update/UpdateLog.java
+++ b/solr/core/src/java/org/apache/solr/update/UpdateLog.java
@@ -19,6 +19,8 @@ package org.apache.solr.update;
import static org.apache.solr.update.processor.DistributedUpdateProcessor.DistribPhase.FROMLEADER;
import static org.apache.solr.update.processor.DistributingUpdateProcessorFactory.DISTRIB_UPDATE_PARAM;
+import com.carrotsearch.hppc.LongHashSet;
+import com.carrotsearch.hppc.LongSet;
import com.codahale.metrics.Gauge;
import com.codahale.metrics.Meter;
import java.io.Closeable;
@@ -80,7 +82,6 @@ import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.update.processor.DistributedUpdateProcessor;
import org.apache.solr.update.processor.UpdateRequestProcessor;
import org.apache.solr.update.processor.UpdateRequestProcessorChain;
-import org.apache.solr.util.LongSet;
import org.apache.solr.util.OrderedExecutor;
import org.apache.solr.util.RefCounted;
import org.apache.solr.util.TestInjection;
@@ -1545,7 +1546,7 @@ public class UpdateLog implements PluginInfoInitialized, SolrMetricProducer {
public List<Long> getVersions(int n, long maxVersion) {
List<Long> ret = new ArrayList<>(n);
- LongSet set = new LongSet(n);
+ LongSet set = new LongHashSet(n);
final int nInput = n;
for (List<Update> singleList : updateList) {
diff --git a/solr/core/src/java/org/apache/solr/util/LongSet.java b/solr/core/src/java/org/apache/solr/util/LongSet.java
deleted file mode 100644
index ef4665eea76..00000000000
--- a/solr/core/src/java/org/apache/solr/util/LongSet.java
+++ /dev/null
@@ -1,144 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.solr.util;
-
-import java.util.NoSuchElementException;
-
-/**
- * Collects long values in a hash set (closed hashing on power-of-two sized long[])
- *
- * @lucene.internal
- */
-public class LongSet {
-
- private static final float LOAD_FACTOR = 0.7f;
-
- private long[] vals;
- private int cardinality;
- private int mask;
- private int threshold;
- private int zeroCount; // 1 if a 0 was collected
-
- public LongSet(int sz) {
- sz = Math.max(org.apache.lucene.util.BitUtil.nextHighestPowerOfTwo(sz), 2);
- vals = new long[sz];
- mask = sz - 1;
- threshold = (int) (sz * LOAD_FACTOR);
- }
-
- /**
- * Returns the long[] array that has entries filled in with values or "0" for empty. To see if "0"
- * itself is in the set, call containsZero()
- */
- public long[] getBackingArray() {
- return vals;
- }
-
- public boolean containsZero() {
- return zeroCount != 0;
- }
-
- /**
- * Adds an additional value to the set, returns true if the set did not already contain the value.
- */
- public boolean add(long val) {
- if (val == 0) {
- if (zeroCount != 0) {
- return false;
- } else {
- zeroCount = 1;
- return true;
- }
- }
- if (cardinality >= threshold) {
- rehash();
- }
-
- // For floats: exponent bits start at bit 23 for single precision,
- // and bit 52 for double precision.
- // Many values will only have significant bits just to the right of that.
-
- // For now, lets just settle to get first 8 significant mantissa bits of double or float in the
- // lowest bits of our hash. The upper bits of our hash will be irrelevant.
- int h = (int) (val + (val >>> 44) + (val >>> 15));
- for (int slot = h & mask; ; slot = (slot + 1) & mask) {
- long v = vals[slot];
- if (v == 0) {
- vals[slot] = val;
- cardinality++;
- return true;
- } else if (v == val) {
- // val is already in the set
- break;
- }
- }
- return false;
- }
-
- private void rehash() {
- long[] oldVals = vals;
- int newCapacity = vals.length << 1;
- vals = new long[newCapacity];
- mask = newCapacity - 1;
- threshold = (int) (newCapacity * LOAD_FACTOR);
- cardinality = 0;
-
- for (long val : oldVals) {
- if (val != 0) {
- add(val);
- }
- }
- }
-
- /** The number of values in the set */
- public int cardinality() {
- return cardinality + zeroCount;
- }
-
- /** Returns an iterator over the values in the set. */
- public LongIterator iterator() {
- return new LongIterator() {
- private int remainingValues = cardinality();
- private int valsIdx = 0;
-
- @Override
- public boolean hasNext() {
- return remainingValues > 0;
- }
-
- @Override
- public long next() {
- if (!hasNext()) {
- throw new NoSuchElementException();
- }
- remainingValues--;
-
- if (remainingValues == 0 && zeroCount > 0) {
- return 0;
- }
-
- while (true) { // guaranteed to find another value if we get here
- long value = vals[valsIdx++];
- if (value != 0) {
- return value;
- }
- }
- }
- };
- }
-}
diff --git a/solr/core/src/test/org/apache/solr/util/LongSetTest.java b/solr/core/src/test/org/apache/solr/util/LongSetTest.java
index 4d3183ccdea..5fc3ffd94d0 100644
--- a/solr/core/src/test/org/apache/solr/util/LongSetTest.java
+++ b/solr/core/src/test/org/apache/solr/util/LongSetTest.java
@@ -16,7 +16,13 @@
*/
package org.apache.solr.util;
+import static com.carrotsearch.hppc.HashContainers.MIN_HASH_ARRAY_LENGTH;
+
+import com.carrotsearch.hppc.LongHashSet;
+import com.carrotsearch.hppc.LongSet;
+import com.carrotsearch.hppc.cursors.LongCursor;
import java.util.HashSet;
+import java.util.Iterator;
import org.apache.solr.SolrTestCase;
import org.junit.Test;
@@ -24,69 +30,68 @@ public class LongSetTest extends SolrTestCase {
@Test
public void testZeroInitialCapacity() {
- final LongSet ls = new LongSet(0);
- assertEquals(0, ls.cardinality());
- assertEquals(2, ls.getBackingArray().length);
- assertFalse(ls.containsZero());
+ final LongHashSet ls = new LongHashSet(0);
+ assertEquals(0, ls.size());
+ assertEquals(MIN_HASH_ARRAY_LENGTH, ls.keys.length - 1); // -1 to correct for empty slot
+ assertFalse(ls.contains(0));
assertFalse(ls.iterator().hasNext());
final HashSet<Long> hs = new HashSet<>();
for (long jj = 1; jj <= 10; ++jj) {
assertTrue(ls.add(jj));
assertFalse(ls.add(jj));
- assertEquals(jj, ls.cardinality());
+ assertEquals(jj, ls.size());
assertTrue(hs.add(jj));
assertFalse(hs.add(jj));
}
- final LongIterator it = ls.iterator();
- while (it.hasNext()) {
- hs.remove(it.next());
+ for (LongCursor c : ls) {
+ hs.remove(c.value);
}
assertTrue(hs.isEmpty());
- assertEquals(10, ls.cardinality());
- assertEquals(16, ls.getBackingArray().length);
+ assertEquals(10, ls.size());
+ assertEquals(16, ls.keys.length - 1); // -1 to correct for empty slot
}
@Test
public void testAddZero() {
- final LongSet ls = new LongSet(1);
- assertEquals(0, ls.cardinality());
- assertFalse(ls.containsZero());
+ final LongSet ls = new LongHashSet(1);
+ assertEquals(0, ls.size());
+ assertFalse(ls.contains(0));
assertFalse(ls.iterator().hasNext());
assertTrue(ls.add(0L));
- assertTrue(ls.containsZero());
+ assertTrue(ls.contains(0));
assertFalse(ls.add(0L));
- assertTrue(ls.containsZero());
+ assertTrue(ls.contains(0));
- final LongIterator it = ls.iterator();
+ final Iterator<LongCursor> it = ls.iterator();
assertTrue(it.hasNext());
- assertEquals(0L, it.next());
+ assertEquals(0L, it.next().value);
assertFalse(it.hasNext());
}
@Test
public void testIterating() {
- final LongSet ls = new LongSet(4);
+ final LongSet ls = new LongHashSet(4);
assertTrue(ls.add(0L));
assertTrue(ls.add(6L));
assertTrue(ls.add(7L));
assertTrue(ls.add(42L));
- final LongIterator it = ls.iterator();
+ final Iterator<LongCursor> it = ls.iterator();
// non-zero values are returned first
assertTrue(it.hasNext());
- assertNotEquals(0L, it.next());
+ assertNotEquals(0L, it.next().value);
assertTrue(it.hasNext());
- assertNotEquals(0L, it.next());
+ assertNotEquals(0L, it.next().value);
assertTrue(it.hasNext());
- assertNotEquals(0L, it.next());
+ assertNotEquals(0L, it.next().value);
// and zero value (if any) is returned last
assertTrue(it.hasNext());
- assertEquals(0L, it.next());
+ assertEquals(0L, it.next().value);
assertFalse(it.hasNext());
}
}