You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by jp...@apache.org on 2014/11/05 17:35:13 UTC

svn commit: r1636914 - in /lucene/dev/branches/branch_5x: ./ lucene/ lucene/core/ lucene/core/src/java/org/apache/lucene/util/ lucene/core/src/java/org/apache/lucene/util/packed/ lucene/core/src/test/org/apache/lucene/util/

Author: jpountz
Date: Wed Nov  5 16:35:13 2014
New Revision: 1636914

URL: http://svn.apache.org/r1636914
Log:
LUCENE-6040: Speedup broadword bit selection.

Added:
    lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/util/TestBitUtil.java
      - copied unchanged from r1636913, lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/TestBitUtil.java
Removed:
    lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/BroadWord.java
    lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/util/TestBroadWord.java
Modified:
    lucene/dev/branches/branch_5x/   (props changed)
    lucene/dev/branches/branch_5x/lucene/   (props changed)
    lucene/dev/branches/branch_5x/lucene/CHANGES.txt   (contents, props changed)
    lucene/dev/branches/branch_5x/lucene/core/   (props changed)
    lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/BitUtil.java
    lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/packed/EliasFanoDecoder.java

Modified: lucene/dev/branches/branch_5x/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/CHANGES.txt?rev=1636914&r1=1636913&r2=1636914&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/CHANGES.txt (original)
+++ lucene/dev/branches/branch_5x/lucene/CHANGES.txt Wed Nov  5 16:35:13 2014
@@ -258,6 +258,9 @@ Optimizations
 * LUCENE-6030: Add norms patched compression for a small number of common values
   (Ryan Ernst)
 
+* LUCENE-6040: Speed up EliasFanoDocIdSet through broadword bit selection.
+  (Paul Elschot)
+
 Build
 
 * LUCENE-5909: Smoke tester now has better command line parsing and

Modified: lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/BitUtil.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/BitUtil.java?rev=1636914&r1=1636913&r2=1636914&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/BitUtil.java (original)
+++ lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/BitUtil.java Wed Nov  5 16:35:13 2014
@@ -212,4 +212,64 @@ public final class BitUtil {
      return ((l >>> 1) ^ -(l & 1));
    }
 
+
+  /** Select a 1-bit from a long. See also LUCENE-6040.
+   * @return The index of the r-th 1 bit in x. This bit must exist.
+   */
+  public static int select(long x, int r) {
+    long s = x - ((x & 0xAAAAAAAAAAAAAAAAL) >>> 1); // pairwise bitsums
+    s = (s & 0x3333333333333333L) + ((s >>> 2) & 0x3333333333333333L); // nibblewise bitsums
+    s = ((s + (s >>> 4)) & 0x0F0F0F0F0F0F0F0FL) * L8_L; // bytewise bitsums, cumulative
+
+    int b = (Long.numberOfTrailingZeros((s + psOverflow[r-1]) & (L8_L << 7)) >> 3) << 3; // bit position of byte with r-th 1 bit.
+    long l = r - (((s << 8) >>> b) & 0xFFL); // bit rank in byte at b
+
+    // Select bit l from byte (x >>> b):
+    int selectIndex = (int) (((x >>> b) & 0xFFL) | ((l-1) << 8));
+    int res = b + select256[selectIndex];
+    return res;
+  }
+
+  private final static long L8_L = 0x0101010101010101L;
+
+  private static final long[] psOverflow = new long[64];
+  static {
+    for (int s = 1; s <= 64; s++) {
+      psOverflow[s-1] = (128-s) * L8_L;
+    }
+  }
+
+  private static final byte[] select256 = new byte[8 * 256];
+  static {
+    for (int b = 0; b <= 0xFF; b++) {
+      for (int s = 1; s <= 8; s++) {
+        int byteIndex = b | ((s-1) << 8);
+        int bitIndex = selectNaive(b, s);
+        if (bitIndex < 0) {
+          bitIndex = 127; // positive as byte
+        }
+        assert bitIndex >= 0;
+        assert ((byte) bitIndex) >= 0; // non negative as byte, no need to mask the sign
+        select256[byteIndex] = (byte) bitIndex;
+      }
+    }
+  }
+
+  /**
+   * Naive implementation of {@link #select(long,int)}, using {@link Long#numberOfTrailingZeros} repetitively.
+   * Works relatively fast for low ranks.
+   * @return The index of the r-th 1 bit in x, or -1 if no such bit exists.
+   */
+  public static int selectNaive(long x, int r) {
+    assert r >= 1;
+    int s = -1;
+    while ((x != 0L) && (r > 0)) {
+      int ntz = Long.numberOfTrailingZeros(x);
+      x >>>= (ntz + 1);
+      s += (ntz + 1);
+      r -= 1;
+    }
+    int res = (r > 0) ? -1 : s;
+    return res;
+  }
 }

Modified: lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/packed/EliasFanoDecoder.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/packed/EliasFanoDecoder.java?rev=1636914&r1=1636913&r2=1636914&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/packed/EliasFanoDecoder.java (original)
+++ lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/packed/EliasFanoDecoder.java Wed Nov  5 16:35:13 2014
@@ -17,7 +17,7 @@
 
 package org.apache.lucene.util.packed;
 
-import org.apache.lucene.util.BroadWord; // bit selection in long
+import org.apache.lucene.util.BitUtil; // bit selection in long
 
 
 /** A decoder for an {@link EliasFanoEncoder}.
@@ -312,9 +312,10 @@ public class EliasFanoDecoder {
     if (rank >= 1) {
       long invCurHighLong = ~curHighLong;
       int clearBitForValue = (rank <= 8)
-                              ? BroadWord.selectNaive(invCurHighLong, rank)
-                              : BroadWord.select(invCurHighLong, rank);
-      assert clearBitForValue <= (Long.SIZE-1);
+                              ? BitUtil.selectNaive(invCurHighLong, rank)
+                              : BitUtil.select(invCurHighLong, rank);
+      assert clearBitForValue >= 0;
+      assert clearBitForValue <= Long.SIZE-1;
       setBitForIndex += clearBitForValue + 1; // the high bit just before setBitForIndex is zero
       int oneBitsBeforeClearBit = clearBitForValue - rank + 1;
       efIndex += oneBitsBeforeClearBit; // the high bit at setBitForIndex and belongs to the unary code for efIndex