You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@solr.apache.org by ma...@apache.org on 2024/03/05 21:24:18 UTC

(solr) branch main updated: SOLR-17190: Replace org.apache.solr.util.LongSet with hppc LongHashSet (#2328)

This is an automated email from the ASF dual-hosted git repository.

magibney pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/solr.git


The following commit(s) were added to refs/heads/main by this push:
     new 4a02d1a07e8 SOLR-17190: Replace org.apache.solr.util.LongSet with hppc LongHashSet (#2328)
4a02d1a07e8 is described below

commit 4a02d1a07e85313dc04aa07fbc483b214bdb0588
Author: Michael Gibney <mi...@michaelgibney.net>
AuthorDate: Tue Mar 5 16:24:13 2024 -0500

    SOLR-17190: Replace org.apache.solr.util.LongSet with hppc LongHashSet (#2328)
---
 solr/CHANGES.txt                                   |   2 +
 .../handler/component/RealTimeGetComponent.java    |   5 +-
 .../org/apache/solr/search/facet/UniqueAgg.java    |  14 +-
 .../solr/search/join/GraphPointsCollector.java     |  37 +++---
 .../src/java/org/apache/solr/update/UpdateLog.java |   5 +-
 .../src/java/org/apache/solr/util/LongSet.java     | 144 ---------------------
 .../src/test/org/apache/solr/util/LongSetTest.java |  51 ++++----
 7 files changed, 60 insertions(+), 198 deletions(-)

diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 959206270b9..2c62ab74afe 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -146,6 +146,8 @@ Other Changes
 * SOLR-17066: GenericSolrRequest now has a `setRequiresCollection` setter that allows it to specify whether
   it should make use of the client-level default collection/core. (Jason Gerlowski)
 
+* SOLR-17190: Replace org.apache.solr.util.LongSet with hppc LongHashSet (Michael Gibney)
+
 ==================  9.5.0 ==================
 New Features
 ---------------------
diff --git a/solr/core/src/java/org/apache/solr/handler/component/RealTimeGetComponent.java b/solr/core/src/java/org/apache/solr/handler/component/RealTimeGetComponent.java
index 1f098bd8ca0..33fa334758a 100644
--- a/solr/core/src/java/org/apache/solr/handler/component/RealTimeGetComponent.java
+++ b/solr/core/src/java/org/apache/solr/handler/component/RealTimeGetComponent.java
@@ -21,6 +21,8 @@ import static org.apache.solr.common.params.CommonParams.ID;
 import static org.apache.solr.common.params.CommonParams.VERSION_FIELD;
 import static org.apache.solr.search.QueryUtils.makeQueryable;
 
+import com.carrotsearch.hppc.LongHashSet;
+import com.carrotsearch.hppc.LongSet;
 import java.io.IOException;
 import java.lang.invoke.MethodHandles;
 import java.util.ArrayList;
@@ -94,7 +96,6 @@ import org.apache.solr.update.PeerSync;
 import org.apache.solr.update.PeerSyncWithLeader;
 import org.apache.solr.update.UpdateLog;
 import org.apache.solr.update.processor.AtomicUpdateDocumentMerger;
-import org.apache.solr.util.LongSet;
 import org.apache.solr.util.RefCounted;
 import org.apache.solr.util.TestInjection;
 import org.slf4j.Logger;
@@ -1342,7 +1343,7 @@ public class RealTimeGetComponent extends SearchComponent {
 
     // TODO: get this from cache instead of rebuilding?
     try (UpdateLog.RecentUpdates recentUpdates = ulog.getRecentUpdates()) {
-      LongSet updateVersions = new LongSet(versions.size());
+      LongSet updateVersions = new LongHashSet(versions.size());
       for (Long version : versions) {
         try {
           Object o = recentUpdates.lookup(version);
diff --git a/solr/core/src/java/org/apache/solr/search/facet/UniqueAgg.java b/solr/core/src/java/org/apache/solr/search/facet/UniqueAgg.java
index acf8596f1e5..6f0cbad1088 100644
--- a/solr/core/src/java/org/apache/solr/search/facet/UniqueAgg.java
+++ b/solr/core/src/java/org/apache/solr/search/facet/UniqueAgg.java
@@ -16,6 +16,9 @@
  */
 package org.apache.solr.search.facet;
 
+import com.carrotsearch.hppc.LongHashSet;
+import com.carrotsearch.hppc.LongSet;
+import com.carrotsearch.hppc.cursors.LongCursor;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.List;
@@ -27,8 +30,6 @@ import org.apache.lucene.index.SortedNumericDocValues;
 import org.apache.solr.common.util.CollectionUtil;
 import org.apache.solr.common.util.SimpleOrderedMap;
 import org.apache.solr.schema.SchemaField;
-import org.apache.solr.util.LongIterator;
-import org.apache.solr.util.LongSet;
 
 public class UniqueAgg extends StrAggValueSource {
   public static final String UNIQUE = "unique";
@@ -146,7 +147,7 @@ public class UniqueAgg extends StrAggValueSource {
     protected void collectValues(int doc, int slot) throws IOException {
       LongSet set = sets[slot];
       if (set == null) {
-        set = sets[slot] = new LongSet(16);
+        set = sets[slot] = new LongHashSet(16);
       }
       collectValues(doc, set);
     }
@@ -172,7 +173,7 @@ public class UniqueAgg extends StrAggValueSource {
      */
     private int getCardinality(int slot) {
       LongSet set = sets[slot];
-      return set == null ? 0 : set.cardinality();
+      return set == null ? 0 : set.size();
     }
 
     public Object getShardValue(int slot) throws IOException {
@@ -188,9 +189,8 @@ public class UniqueAgg extends StrAggValueSource {
       if (unique <= maxExplicit) {
         List<Long> lst = new ArrayList<>(Math.min(unique, maxExplicit));
         if (set != null) {
-          LongIterator iter = set.iterator();
-          while (iter.hasNext()) {
-            lst.add(iter.next());
+          for (LongCursor v : set) {
+            lst.add(v.value);
           }
         }
         map.add(VALS, lst);
diff --git a/solr/core/src/java/org/apache/solr/search/join/GraphPointsCollector.java b/solr/core/src/java/org/apache/solr/search/join/GraphPointsCollector.java
index 2ad2455e540..e02d003e15f 100644
--- a/solr/core/src/java/org/apache/solr/search/join/GraphPointsCollector.java
+++ b/solr/core/src/java/org/apache/solr/search/join/GraphPointsCollector.java
@@ -17,6 +17,9 @@
 
 package org.apache.solr.search.join;
 
+import com.carrotsearch.hppc.LongHashSet;
+import com.carrotsearch.hppc.LongSet;
+import com.carrotsearch.hppc.cursors.LongCursor;
 import java.io.IOException;
 import org.apache.lucene.document.DoublePoint;
 import org.apache.lucene.document.FloatPoint;
@@ -30,14 +33,12 @@ import org.apache.lucene.util.NumericUtils;
 import org.apache.solr.schema.NumberType;
 import org.apache.solr.schema.SchemaField;
 import org.apache.solr.search.DocSet;
-import org.apache.solr.util.LongIterator;
-import org.apache.solr.util.LongSet;
 
 /**
  * @lucene.internal
  */
 public class GraphPointsCollector extends GraphEdgeCollector {
-  final LongSet set = new LongSet(256);
+  final LongSet set = new LongHashSet(256);
 
   SortedNumericDocValues values = null;
 
@@ -70,7 +71,7 @@ public class GraphPointsCollector extends GraphEdgeCollector {
 
   @Override
   public Query getResultQuery(SchemaField matchField, boolean useAutomaton) {
-    if (set.cardinality() == 0) return null;
+    if (set.isEmpty()) return null;
 
     Query q = null;
 
@@ -81,38 +82,34 @@ public class GraphPointsCollector extends GraphEdgeCollector {
     boolean multiValued = collectField.multiValued();
 
     if (ntype == NumberType.LONG || ntype == NumberType.DATE) {
-      long[] vals = new long[set.cardinality()];
+      long[] vals = new long[set.size()];
       int i = 0;
-      for (LongIterator iter = set.iterator(); iter.hasNext(); ) {
-        long bits = iter.next();
-        long v = bits;
-        vals[i++] = v;
+      for (LongCursor c : set) {
+        vals[i++] = c.value;
       }
       q = LongPoint.newSetQuery(matchField.getName(), vals);
     } else if (ntype == NumberType.INTEGER) {
-      int[] vals = new int[set.cardinality()];
+      int[] vals = new int[set.size()];
       int i = 0;
-      for (LongIterator iter = set.iterator(); iter.hasNext(); ) {
-        long bits = iter.next();
-        int v = (int) bits;
-        vals[i++] = v;
+      for (LongCursor c : set) {
+        vals[i++] = (int) c.value;
       }
       q = IntPoint.newSetQuery(matchField.getName(), vals);
     } else if (ntype == NumberType.DOUBLE) {
-      double[] vals = new double[set.cardinality()];
+      double[] vals = new double[set.size()];
       int i = 0;
-      for (LongIterator iter = set.iterator(); iter.hasNext(); ) {
-        long bits = iter.next();
+      for (LongCursor c : set) {
+        long bits = c.value;
         double v =
             multiValued ? NumericUtils.sortableLongToDouble(bits) : Double.longBitsToDouble(bits);
         vals[i++] = v;
       }
       q = DoublePoint.newSetQuery(matchField.getName(), vals);
     } else if (ntype == NumberType.FLOAT) {
-      float[] vals = new float[set.cardinality()];
+      float[] vals = new float[set.size()];
       int i = 0;
-      for (LongIterator iter = set.iterator(); iter.hasNext(); ) {
-        long bits = iter.next();
+      for (LongCursor c : set) {
+        long bits = c.value;
         float v =
             multiValued
                 ? NumericUtils.sortableIntToFloat((int) bits)
diff --git a/solr/core/src/java/org/apache/solr/update/UpdateLog.java b/solr/core/src/java/org/apache/solr/update/UpdateLog.java
index c13530891fd..5f2d925c14e 100644
--- a/solr/core/src/java/org/apache/solr/update/UpdateLog.java
+++ b/solr/core/src/java/org/apache/solr/update/UpdateLog.java
@@ -19,6 +19,8 @@ package org.apache.solr.update;
 import static org.apache.solr.update.processor.DistributedUpdateProcessor.DistribPhase.FROMLEADER;
 import static org.apache.solr.update.processor.DistributingUpdateProcessorFactory.DISTRIB_UPDATE_PARAM;
 
+import com.carrotsearch.hppc.LongHashSet;
+import com.carrotsearch.hppc.LongSet;
 import com.codahale.metrics.Gauge;
 import com.codahale.metrics.Meter;
 import java.io.Closeable;
@@ -80,7 +82,6 @@ import org.apache.solr.search.SolrIndexSearcher;
 import org.apache.solr.update.processor.DistributedUpdateProcessor;
 import org.apache.solr.update.processor.UpdateRequestProcessor;
 import org.apache.solr.update.processor.UpdateRequestProcessorChain;
-import org.apache.solr.util.LongSet;
 import org.apache.solr.util.OrderedExecutor;
 import org.apache.solr.util.RefCounted;
 import org.apache.solr.util.TestInjection;
@@ -1545,7 +1546,7 @@ public class UpdateLog implements PluginInfoInitialized, SolrMetricProducer {
 
     public List<Long> getVersions(int n, long maxVersion) {
       List<Long> ret = new ArrayList<>(n);
-      LongSet set = new LongSet(n);
+      LongSet set = new LongHashSet(n);
       final int nInput = n;
 
       for (List<Update> singleList : updateList) {
diff --git a/solr/core/src/java/org/apache/solr/util/LongSet.java b/solr/core/src/java/org/apache/solr/util/LongSet.java
deleted file mode 100644
index ef4665eea76..00000000000
--- a/solr/core/src/java/org/apache/solr/util/LongSet.java
+++ /dev/null
@@ -1,144 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.solr.util;
-
-import java.util.NoSuchElementException;
-
-/**
- * Collects long values in a hash set (closed hashing on power-of-two sized long[])
- *
- * @lucene.internal
- */
-public class LongSet {
-
-  private static final float LOAD_FACTOR = 0.7f;
-
-  private long[] vals;
-  private int cardinality;
-  private int mask;
-  private int threshold;
-  private int zeroCount; // 1 if a 0 was collected
-
-  public LongSet(int sz) {
-    sz = Math.max(org.apache.lucene.util.BitUtil.nextHighestPowerOfTwo(sz), 2);
-    vals = new long[sz];
-    mask = sz - 1;
-    threshold = (int) (sz * LOAD_FACTOR);
-  }
-
-  /**
-   * Returns the long[] array that has entries filled in with values or "0" for empty. To see if "0"
-   * itself is in the set, call containsZero()
-   */
-  public long[] getBackingArray() {
-    return vals;
-  }
-
-  public boolean containsZero() {
-    return zeroCount != 0;
-  }
-
-  /**
-   * Adds an additional value to the set, returns true if the set did not already contain the value.
-   */
-  public boolean add(long val) {
-    if (val == 0) {
-      if (zeroCount != 0) {
-        return false;
-      } else {
-        zeroCount = 1;
-        return true;
-      }
-    }
-    if (cardinality >= threshold) {
-      rehash();
-    }
-
-    // For floats: exponent bits start at bit 23 for single precision,
-    // and bit 52 for double precision.
-    // Many values will only have significant bits just to the right of that.
-
-    // For now, lets just settle to get first 8 significant mantissa bits of double or float in the
-    // lowest bits of our hash. The upper bits of our hash will be irrelevant.
-    int h = (int) (val + (val >>> 44) + (val >>> 15));
-    for (int slot = h & mask; ; slot = (slot + 1) & mask) {
-      long v = vals[slot];
-      if (v == 0) {
-        vals[slot] = val;
-        cardinality++;
-        return true;
-      } else if (v == val) {
-        // val is already in the set
-        break;
-      }
-    }
-    return false;
-  }
-
-  private void rehash() {
-    long[] oldVals = vals;
-    int newCapacity = vals.length << 1;
-    vals = new long[newCapacity];
-    mask = newCapacity - 1;
-    threshold = (int) (newCapacity * LOAD_FACTOR);
-    cardinality = 0;
-
-    for (long val : oldVals) {
-      if (val != 0) {
-        add(val);
-      }
-    }
-  }
-
-  /** The number of values in the set */
-  public int cardinality() {
-    return cardinality + zeroCount;
-  }
-
-  /** Returns an iterator over the values in the set. */
-  public LongIterator iterator() {
-    return new LongIterator() {
-      private int remainingValues = cardinality();
-      private int valsIdx = 0;
-
-      @Override
-      public boolean hasNext() {
-        return remainingValues > 0;
-      }
-
-      @Override
-      public long next() {
-        if (!hasNext()) {
-          throw new NoSuchElementException();
-        }
-        remainingValues--;
-
-        if (remainingValues == 0 && zeroCount > 0) {
-          return 0;
-        }
-
-        while (true) { // guaranteed to find another value if we get here
-          long value = vals[valsIdx++];
-          if (value != 0) {
-            return value;
-          }
-        }
-      }
-    };
-  }
-}
diff --git a/solr/core/src/test/org/apache/solr/util/LongSetTest.java b/solr/core/src/test/org/apache/solr/util/LongSetTest.java
index 4d3183ccdea..5fc3ffd94d0 100644
--- a/solr/core/src/test/org/apache/solr/util/LongSetTest.java
+++ b/solr/core/src/test/org/apache/solr/util/LongSetTest.java
@@ -16,7 +16,13 @@
  */
 package org.apache.solr.util;
 
+import static com.carrotsearch.hppc.HashContainers.MIN_HASH_ARRAY_LENGTH;
+
+import com.carrotsearch.hppc.LongHashSet;
+import com.carrotsearch.hppc.LongSet;
+import com.carrotsearch.hppc.cursors.LongCursor;
 import java.util.HashSet;
+import java.util.Iterator;
 import org.apache.solr.SolrTestCase;
 import org.junit.Test;
 
@@ -24,69 +30,68 @@ public class LongSetTest extends SolrTestCase {
 
   @Test
   public void testZeroInitialCapacity() {
-    final LongSet ls = new LongSet(0);
-    assertEquals(0, ls.cardinality());
-    assertEquals(2, ls.getBackingArray().length);
-    assertFalse(ls.containsZero());
+    final LongHashSet ls = new LongHashSet(0);
+    assertEquals(0, ls.size());
+    assertEquals(MIN_HASH_ARRAY_LENGTH, ls.keys.length - 1); // -1 to correct for empty slot
+    assertFalse(ls.contains(0));
     assertFalse(ls.iterator().hasNext());
 
     final HashSet<Long> hs = new HashSet<>();
     for (long jj = 1; jj <= 10; ++jj) {
       assertTrue(ls.add(jj));
       assertFalse(ls.add(jj));
-      assertEquals(jj, ls.cardinality());
+      assertEquals(jj, ls.size());
       assertTrue(hs.add(jj));
       assertFalse(hs.add(jj));
     }
 
-    final LongIterator it = ls.iterator();
-    while (it.hasNext()) {
-      hs.remove(it.next());
+    for (LongCursor c : ls) {
+      hs.remove(c.value);
     }
     assertTrue(hs.isEmpty());
 
-    assertEquals(10, ls.cardinality());
-    assertEquals(16, ls.getBackingArray().length);
+    assertEquals(10, ls.size());
+    assertEquals(16, ls.keys.length - 1); // -1 to correct for empty slot
   }
 
   @Test
   public void testAddZero() {
-    final LongSet ls = new LongSet(1);
-    assertEquals(0, ls.cardinality());
-    assertFalse(ls.containsZero());
+    final LongSet ls = new LongHashSet(1);
+    assertEquals(0, ls.size());
+    assertFalse(ls.contains(0));
     assertFalse(ls.iterator().hasNext());
 
     assertTrue(ls.add(0L));
-    assertTrue(ls.containsZero());
+    assertTrue(ls.contains(0));
     assertFalse(ls.add(0L));
-    assertTrue(ls.containsZero());
+    assertTrue(ls.contains(0));
 
-    final LongIterator it = ls.iterator();
+    final Iterator<LongCursor> it = ls.iterator();
     assertTrue(it.hasNext());
-    assertEquals(0L, it.next());
+    assertEquals(0L, it.next().value);
     assertFalse(it.hasNext());
   }
 
   @Test
   public void testIterating() {
-    final LongSet ls = new LongSet(4);
+    final LongSet ls = new LongHashSet(4);
     assertTrue(ls.add(0L));
     assertTrue(ls.add(6L));
     assertTrue(ls.add(7L));
     assertTrue(ls.add(42L));
 
-    final LongIterator it = ls.iterator();
+    final Iterator<LongCursor> it = ls.iterator();
     // non-zero values are returned first
     assertTrue(it.hasNext());
-    assertNotEquals(0L, it.next());
+    assertNotEquals(0L, it.next().value);
     assertTrue(it.hasNext());
-    assertNotEquals(0L, it.next());
+    assertNotEquals(0L, it.next().value);
     assertTrue(it.hasNext());
-    assertNotEquals(0L, it.next());
+    assertNotEquals(0L, it.next().value);
 
     // and zero value (if any) is returned last
     assertTrue(it.hasNext());
-    assertEquals(0L, it.next());
+    assertEquals(0L, it.next().value);
     assertFalse(it.hasNext());
   }
 }