You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by jp...@apache.org on 2014/06/16 16:26:34 UTC

svn commit: r1602877 - in /lucene/dev/branches/branch_4x: ./ lucene/ lucene/core/ lucene/core/src/java/org/apache/lucene/util/ lucene/core/src/java/org/apache/lucene/util/packed/ lucene/core/src/test/org/apache/lucene/util/ lucene/test-framework/ lucen...

Author: jpountz
Date: Mon Jun 16 14:26:34 2014
New Revision: 1602877

URL: http://svn.apache.org/r1602877
Log:
LUCENE-5764: Add tests to DocIdSet.ramBytesUsed.

Modified:
    lucene/dev/branches/branch_4x/   (props changed)
    lucene/dev/branches/branch_4x/lucene/   (props changed)
    lucene/dev/branches/branch_4x/lucene/core/   (props changed)
    lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/DocIdBitSet.java
    lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/FixedBitSet.java
    lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/OpenBitSet.java
    lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/PForDeltaDocIdSet.java
    lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/WAH8DocIdSet.java
    lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/EliasFanoDocIdSet.java
    lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/EliasFanoEncoder.java
    lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/util/TestDocIdBitSet.java
    lucene/dev/branches/branch_4x/lucene/test-framework/   (props changed)
    lucene/dev/branches/branch_4x/lucene/test-framework/src/java/org/apache/lucene/util/BaseDocIdSetTestCase.java

Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/DocIdBitSet.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/DocIdBitSet.java?rev=1602877&r1=1602876&r2=1602877&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/DocIdBitSet.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/DocIdBitSet.java Mon Jun 16 14:26:34 2014
@@ -25,6 +25,12 @@ import org.apache.lucene.search.DocIdSet
 
 /** Simple DocIdSet and DocIdSetIterator backed by a BitSet */
 public class DocIdBitSet extends DocIdSet implements Bits {
+
+  private static final long BASE_RAM_BYTES_USED =
+        RamUsageEstimator.shallowSizeOfInstance(DocIdBitSet.class)
+      + RamUsageEstimator.shallowSizeOfInstance(BitSet.class)
+      + RamUsageEstimator.NUM_BYTES_ARRAY_HEADER; // the array that stores the bits
+
   private final BitSet bitSet;
     
   public DocIdBitSet(BitSet bitSet) {
@@ -67,7 +73,9 @@ public class DocIdBitSet extends DocIdSe
 
   @Override
   public long ramBytesUsed() {
-    return RamUsageEstimator.NUM_BYTES_OBJECT_REF + (bitSet.size() + 7) >>> 3;
+    // unfortunately this is likely underestimated if the Bitset implementation
+    // over-sizes the array that stores the bits
+    return BASE_RAM_BYTES_USED + (bitSet.size() + 7) >>> 3;
   }
 
   private static class DocIdBitSetIterator extends DocIdSetIterator {

Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/FixedBitSet.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/FixedBitSet.java?rev=1602877&r1=1602876&r2=1602877&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/FixedBitSet.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/FixedBitSet.java Mon Jun 16 14:26:34 2014
@@ -33,6 +33,8 @@ import org.apache.lucene.search.DocIdSet
  */
 public final class FixedBitSet extends DocIdSet implements Bits {
 
+  private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(FixedBitSet.class);
+
   /**
    * A {@link DocIdSetIterator} which iterates over set bits in a
    * {@link FixedBitSet}.
@@ -220,10 +222,7 @@ public final class FixedBitSet extends D
 
   @Override
   public long ramBytesUsed() {
-    return RamUsageEstimator.alignObjectSize(
-          RamUsageEstimator.NUM_BYTES_OBJECT_REF // the reference to the long[]
-        + RamUsageEstimator.NUM_BYTES_INT * 2)   // numBits and numWords
-        + RamUsageEstimator.sizeOf(bits);        // the bits
+    return BASE_RAM_BYTES_USED + RamUsageEstimator.sizeOf(bits);
   }
 
   /** Expert. */

Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/OpenBitSet.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/OpenBitSet.java?rev=1602877&r1=1602876&r2=1602877&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/OpenBitSet.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/OpenBitSet.java Mon Jun 16 14:26:34 2014
@@ -75,6 +75,9 @@ Test system: AMD Opteron, 64 bit linux, 
  */
 
 public class OpenBitSet extends DocIdSet implements Bits, Cloneable {
+
+  private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(OpenBitSet.class);
+
   protected long[] bits;
   protected int wlen;   // number of words (elements) used in the array
 
@@ -133,11 +136,7 @@ public class OpenBitSet extends DocIdSet
 
   @Override
   public long ramBytesUsed() {
-    return RamUsageEstimator.alignObjectSize(
-          RamUsageEstimator.NUM_BYTES_OBJECT_REF
-        + RamUsageEstimator.NUM_BYTES_LONG
-        + RamUsageEstimator.NUM_BYTES_INT)
-        + RamUsageEstimator.sizeOf(bits);
+    return BASE_RAM_BYTES_USED + RamUsageEstimator.sizeOf(bits);
   }
 
   /** Returns the current capacity in bits (1 greater than the index of the last bit) */

Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/PForDeltaDocIdSet.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/PForDeltaDocIdSet.java?rev=1602877&r1=1602876&r2=1602877&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/PForDeltaDocIdSet.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/PForDeltaDocIdSet.java Mon Jun 16 14:26:34 2014
@@ -35,6 +35,8 @@ import org.apache.lucene.util.packed.Pac
  */
 public final class PForDeltaDocIdSet extends DocIdSet implements Accountable {
 
+  private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(PForDeltaDocIdSet.class);
+
   static final int BLOCK_SIZE = 128;
   static final int MAX_EXCEPTIONS = 24; // no more than 24 exceptions per block
   static final PackedInts.Decoder[] DECODERS = new PackedInts.Decoder[32];
@@ -513,7 +515,17 @@ public final class PForDeltaDocIdSet ext
 
   @Override
   public long ramBytesUsed() {
-    return RamUsageEstimator.alignObjectSize(3 * RamUsageEstimator.NUM_BYTES_OBJECT_REF) + docIDs.ramBytesUsed() + offsets.ramBytesUsed();
+    if (this == EMPTY) {
+      return 0L;
+    }
+    long ramBytesUsed = BASE_RAM_BYTES_USED + RamUsageEstimator.sizeOf(data);
+    if (docIDs != SINGLE_ZERO_BUFFER) {
+      ramBytesUsed += docIDs.ramBytesUsed();
+    }
+    if (offsets != SINGLE_ZERO_BUFFER) {
+      ramBytesUsed += offsets.ramBytesUsed();
+    }
+    return ramBytesUsed;
   }
 
 }

Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/WAH8DocIdSet.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/WAH8DocIdSet.java?rev=1602877&r1=1602876&r2=1602877&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/WAH8DocIdSet.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/WAH8DocIdSet.java Mon Jun 16 14:26:34 2014
@@ -76,6 +76,8 @@ import org.apache.lucene.util.packed.Pac
  */
 public final class WAH8DocIdSet extends DocIdSet implements Accountable {
 
+  private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(WAH8DocIdSet.class);
+
   // Minimum index interval, intervals below this value can't guarantee anymore
   // that this set implementation won't be significantly larger than a FixedBitSet
   // The reason is that a single sequence saves at least one byte and an index
@@ -738,10 +740,17 @@ public final class WAH8DocIdSet extends 
 
   @Override
   public long ramBytesUsed() {
-    return RamUsageEstimator.alignObjectSize(3 * RamUsageEstimator.NUM_BYTES_OBJECT_REF + 2 * RamUsageEstimator.NUM_BYTES_INT)
-        + RamUsageEstimator.sizeOf(data)
-        + positions.ramBytesUsed()
-        + wordNums.ramBytesUsed();
+    if (this == EMPTY) {
+      return 0L;
+    }
+    long ramBytesUsed = BASE_RAM_BYTES_USED + RamUsageEstimator.sizeOf(data);
+    if (positions != SINGLE_ZERO_BUFFER) {
+      ramBytesUsed += positions.ramBytesUsed();
+    }
+    if (wordNums != SINGLE_ZERO_BUFFER) {
+      ramBytesUsed += wordNums.ramBytesUsed();
+    }
+    return ramBytesUsed;
   }
 
 }

Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/EliasFanoDocIdSet.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/EliasFanoDocIdSet.java?rev=1602877&r1=1602876&r2=1602877&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/EliasFanoDocIdSet.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/EliasFanoDocIdSet.java Mon Jun 16 14:26:34 2014
@@ -29,6 +29,9 @@ import org.apache.lucene.util.RamUsageEs
  * @lucene.internal
  */
 public class EliasFanoDocIdSet extends DocIdSet {
+
+  private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(EliasFanoDocIdSet.class);
+
   final EliasFanoEncoder efEncoder;
 
   /**
@@ -129,7 +132,7 @@ public class EliasFanoDocIdSet extends D
 
   @Override
   public long ramBytesUsed() {
-    return RamUsageEstimator.alignObjectSize(RamUsageEstimator.NUM_BYTES_OBJECT_REF) + efEncoder.ramBytesUsed();
+    return BASE_RAM_BYTES_USED + efEncoder.ramBytesUsed();
   }
 }
 

Modified: lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/EliasFanoEncoder.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/EliasFanoEncoder.java?rev=1602877&r1=1602876&r2=1602877&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/EliasFanoEncoder.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/java/org/apache/lucene/util/packed/EliasFanoEncoder.java Mon Jun 16 14:26:34 2014
@@ -85,6 +85,9 @@ import org.apache.lucene.util.ToStringUt
  */
 
 public class EliasFanoEncoder implements Accountable {
+
+  private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(EliasFanoEncoder.class);
+
   final long numValues;
   private final long upperBound;
   final int numLowBits;
@@ -354,10 +357,7 @@ public class EliasFanoEncoder implements
 
   @Override
   public long ramBytesUsed() {
-    return RamUsageEstimator.alignObjectSize(
-          RamUsageEstimator.NUM_BYTES_OBJECT_REF * 3
-        + RamUsageEstimator.NUM_BYTES_LONG * 8
-        + RamUsageEstimator.NUM_BYTES_INT * 2)
+    return BASE_RAM_BYTES_USED
         + RamUsageEstimator.sizeOf(lowerLongs)
         + RamUsageEstimator.sizeOf(upperLongs)
         + RamUsageEstimator.sizeOf(upperZeroBitPositionIndex);

Modified: lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/util/TestDocIdBitSet.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/util/TestDocIdBitSet.java?rev=1602877&r1=1602876&r2=1602877&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/util/TestDocIdBitSet.java (original)
+++ lucene/dev/branches/branch_4x/lucene/core/src/test/org/apache/lucene/util/TestDocIdBitSet.java Mon Jun 16 14:26:34 2014
@@ -3,6 +3,8 @@ package org.apache.lucene.util;
 import java.io.IOException;
 import java.util.BitSet;
 
+import org.junit.Ignore;
+
 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -26,5 +28,11 @@ public class TestDocIdBitSet extends Bas
   public DocIdBitSet copyOf(BitSet bs, int length) throws IOException {
     return new DocIdBitSet((BitSet) bs.clone());
   }
-  
+
+  @Override
+  @Ignore("no access to the internals of this impl")
+  public void testRamBytesUsed() throws IOException {
+    super.testRamBytesUsed();
+  }
+
 }

Modified: lucene/dev/branches/branch_4x/lucene/test-framework/src/java/org/apache/lucene/util/BaseDocIdSetTestCase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/test-framework/src/java/org/apache/lucene/util/BaseDocIdSetTestCase.java?rev=1602877&r1=1602876&r2=1602877&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/test-framework/src/java/org/apache/lucene/util/BaseDocIdSetTestCase.java (original)
+++ lucene/dev/branches/branch_4x/lucene/test-framework/src/java/org/apache/lucene/util/BaseDocIdSetTestCase.java Mon Jun 16 14:26:34 2014
@@ -113,6 +113,21 @@ public abstract class BaseDocIdSetTestCa
     }
   }
 
+  /** Test ram usage estimation. */
+  public void testRamBytesUsed() throws IOException {
+    final int iters = 100;
+    for (int i = 0; i < iters; ++i) {
+      final int pow = random().nextInt(20);
+      final int maxDoc = TestUtil.nextInt(random(), 1, 1 << pow);
+      final int numDocs = TestUtil.nextInt(random(), 0, Math.min(maxDoc, 1 << TestUtil.nextInt(random(), 0, pow)));
+      final BitSet set = randomSet(maxDoc, numDocs);
+      final DocIdSet copy = copyOf(set, maxDoc);
+      final long actualBytes = ramBytesUsed(copy, maxDoc);
+      final long expectedBytes = copy.ramBytesUsed();
+      assertEquals(expectedBytes, actualBytes);
+    }
+  }
+
   /** Assert that the content of the {@link DocIdSet} is the same as the content of the {@link BitSet}. */
   public void assertEquals(int numBits, BitSet ds1, T ds2) throws IOException {
     // nextDoc
@@ -172,4 +187,21 @@ public abstract class BaseDocIdSetTestCa
     }
   }
 
+  private static class Dummy {
+    @SuppressWarnings("unused")
+    Object o1, o2;
+  }
+
+  // same as RamUsageTester.sizeOf but tries to not take into account resources
+  // that might be shared across instances
+  private long ramBytesUsed(DocIdSet set, int length) throws IOException {
+    Dummy dummy = new Dummy();
+    dummy.o1 = copyOf(new BitSet(length), length);
+    dummy.o2 = set;
+    long bytes1 = RamUsageTester.sizeOf(dummy);
+    dummy.o2 = null;
+    long bytes2 = RamUsageTester.sizeOf(dummy);
+    return bytes1 - bytes2;
+  }
+
 }