You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by jp...@apache.org on 2014/11/05 17:35:13 UTC
svn commit: r1636914 - in /lucene/dev/branches/branch_5x: ./ lucene/
lucene/core/ lucene/core/src/java/org/apache/lucene/util/
lucene/core/src/java/org/apache/lucene/util/packed/
lucene/core/src/test/org/apache/lucene/util/
Author: jpountz
Date: Wed Nov 5 16:35:13 2014
New Revision: 1636914
URL: http://svn.apache.org/r1636914
Log:
LUCENE-6040: Speedup broadword bit selection.
Added:
lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/util/TestBitUtil.java
- copied unchanged from r1636913, lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/TestBitUtil.java
Removed:
lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/BroadWord.java
lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/util/TestBroadWord.java
Modified:
lucene/dev/branches/branch_5x/ (props changed)
lucene/dev/branches/branch_5x/lucene/ (props changed)
lucene/dev/branches/branch_5x/lucene/CHANGES.txt (contents, props changed)
lucene/dev/branches/branch_5x/lucene/core/ (props changed)
lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/BitUtil.java
lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/packed/EliasFanoDecoder.java
Modified: lucene/dev/branches/branch_5x/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/CHANGES.txt?rev=1636914&r1=1636913&r2=1636914&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/CHANGES.txt (original)
+++ lucene/dev/branches/branch_5x/lucene/CHANGES.txt Wed Nov 5 16:35:13 2014
@@ -258,6 +258,9 @@ Optimizations
* LUCENE-6030: Add norms patched compression for a small number of common values
(Ryan Ernst)
+* LUCENE-6040: Speed up EliasFanoDocIdSet through broadword bit selection.
+ (Paul Elschot)
+
Build
* LUCENE-5909: Smoke tester now has better command line parsing and
Modified: lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/BitUtil.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/BitUtil.java?rev=1636914&r1=1636913&r2=1636914&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/BitUtil.java (original)
+++ lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/BitUtil.java Wed Nov 5 16:35:13 2014
@@ -212,4 +212,64 @@ public final class BitUtil {
return ((l >>> 1) ^ -(l & 1));
}
+
+ /** Select a 1-bit from a long. See also LUCENE-6040.
+ * @return The index of the r-th 1 bit in x. This bit must exist.
+ */
+ public static int select(long x, int r) {
+ long s = x - ((x & 0xAAAAAAAAAAAAAAAAL) >>> 1); // pairwise bitsums
+ s = (s & 0x3333333333333333L) + ((s >>> 2) & 0x3333333333333333L); // nibblewise bitsums
+ s = ((s + (s >>> 4)) & 0x0F0F0F0F0F0F0F0FL) * L8_L; // bytewise bitsums, cumulative
+
+ int b = (Long.numberOfTrailingZeros((s + psOverflow[r-1]) & (L8_L << 7)) >> 3) << 3; // bit position of byte with r-th 1 bit.
+ long l = r - (((s << 8) >>> b) & 0xFFL); // bit rank in byte at b
+
+ // Select bit l from byte (x >>> b):
+ int selectIndex = (int) (((x >>> b) & 0xFFL) | ((l-1) << 8));
+ int res = b + select256[selectIndex];
+ return res;
+ }
+
+ private final static long L8_L = 0x0101010101010101L;
+
+ private static final long[] psOverflow = new long[64];
+ static {
+ for (int s = 1; s <= 64; s++) {
+ psOverflow[s-1] = (128-s) * L8_L;
+ }
+ }
+
+ private static final byte[] select256 = new byte[8 * 256];
+ static {
+ for (int b = 0; b <= 0xFF; b++) {
+ for (int s = 1; s <= 8; s++) {
+ int byteIndex = b | ((s-1) << 8);
+ int bitIndex = selectNaive(b, s);
+ if (bitIndex < 0) {
+ bitIndex = 127; // positive as byte
+ }
+ assert bitIndex >= 0;
+ assert ((byte) bitIndex) >= 0; // non negative as byte, no need to mask the sign
+ select256[byteIndex] = (byte) bitIndex;
+ }
+ }
+ }
+
+ /**
+ * Naive implementation of {@link #select(long,int)}, using {@link Long#numberOfTrailingZeros} repetitively.
+ * Works relatively fast for low ranks.
+ * @return The index of the r-th 1 bit in x, or -1 if no such bit exists.
+ */
+ public static int selectNaive(long x, int r) {
+ assert r >= 1;
+ int s = -1;
+ while ((x != 0L) && (r > 0)) {
+ int ntz = Long.numberOfTrailingZeros(x);
+ x >>>= (ntz + 1);
+ s += (ntz + 1);
+ r -= 1;
+ }
+ int res = (r > 0) ? -1 : s;
+ return res;
+ }
}
Modified: lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/packed/EliasFanoDecoder.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/packed/EliasFanoDecoder.java?rev=1636914&r1=1636913&r2=1636914&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/packed/EliasFanoDecoder.java (original)
+++ lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/packed/EliasFanoDecoder.java Wed Nov 5 16:35:13 2014
@@ -17,7 +17,7 @@
package org.apache.lucene.util.packed;
-import org.apache.lucene.util.BroadWord; // bit selection in long
+import org.apache.lucene.util.BitUtil; // bit selection in long
/** A decoder for an {@link EliasFanoEncoder}.
@@ -312,9 +312,10 @@ public class EliasFanoDecoder {
if (rank >= 1) {
long invCurHighLong = ~curHighLong;
int clearBitForValue = (rank <= 8)
- ? BroadWord.selectNaive(invCurHighLong, rank)
- : BroadWord.select(invCurHighLong, rank);
- assert clearBitForValue <= (Long.SIZE-1);
+ ? BitUtil.selectNaive(invCurHighLong, rank)
+ : BitUtil.select(invCurHighLong, rank);
+ assert clearBitForValue >= 0;
+ assert clearBitForValue <= Long.SIZE-1;
setBitForIndex += clearBitForValue + 1; // the high bit just before setBitForIndex is zero
int oneBitsBeforeClearBit = clearBitForValue - rank + 1;
efIndex += oneBitsBeforeClearBit; // the high bit at setBitForIndex and belongs to the unary code for efIndex