You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by us...@apache.org on 2012/03/23 18:05:59 UTC

svn commit: r1304485 - in /lucene/dev/trunk/lucene/core/src: java/org/apache/lucene/util/ test/org/apache/lucene/util/

Author: uschindler
Date: Fri Mar 23 17:05:58 2012
New Revision: 1304485

URL: http://svn.apache.org/viewvc?rev=1304485&view=rev
Log:
LUCENE-3867: Fix incorrect short-circuit, improve memory usage.

Added:
    lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/TestIdentityHashSet.java   (with props)
    lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/TestRamUsageEstimatorOnWildAnimals.java   (with props)
Modified:
    lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/Constants.java
    lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/RamUsageEstimator.java
    lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/TestRamUsageEstimator.java

Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/Constants.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/Constants.java?rev=1304485&r1=1304484&r2=1304485&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/Constants.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/Constants.java Fri Mar 23 17:05:58 2012
@@ -27,6 +27,11 @@ import org.apache.lucene.LucenePackage;
 public final class Constants {
   private Constants() {}			  // can't construct
 
+  /** JVM vendor info. */
+  public static final String JVM_VENDOR = System.getProperty("java.vm.vendor");
+  public static final String JVM_VERSION = System.getProperty("java.vm.version");
+  public static final String JVM_NAME = System.getProperty("java.vm.name");
+
   /** The value of <tt>System.getProperty("java.version")<tt>. **/
   public static final String JAVA_VERSION = System.getProperty("java.version");
  

Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/RamUsageEstimator.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/RamUsageEstimator.java?rev=1304485&r1=1304484&r2=1304485&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/RamUsageEstimator.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/RamUsageEstimator.java Fri Mar 23 17:05:58 2012
@@ -24,14 +24,50 @@ import java.text.DecimalFormatSymbols;
 import java.util.*;
 
 /**
- * Estimates the size of Java objects using a simple memory model
- * for primitive size information.
+ * Estimates the size (memory representation) of Java objects.
+ * 
+ * @see #sizeOf(Object)
+ * @see #shallowSizeOf(Object)
+ * @see #shallowSizeOfInstance(Class)
  * 
  * @lucene.internal
  */
 public final class RamUsageEstimator {
+  /**
+   * JVM diagnostic features.
+   */
+  public static enum JvmFeature {
+    OBJECT_REFERENCE_SIZE("Object reference size estimated using array index scale."),
+    ARRAY_HEADER_SIZE("Array header size estimated using array based offset."),
+    FIELD_OFFSETS("Shallow instance size based on field offsets."),
+    OBJECT_ALIGNMENT("Object alignment retrieved from HotSpotDiagnostic MX bean.");
+
+    public final String description;
+
+    private JvmFeature(String description) {
+      this.description = description;
+    }
+    
+    @Override
+    public String toString() {
+      return super.name() + " (" + description + ")";
+    }
+  }
+
+  /** JVM info string for debugging and reports. */
+  public final static String JVM_INFO_STRING;
+
+  /** One kilobyte bytes. */
+  public static final long ONE_KB = 1024;
   
-  private RamUsageEstimator() {} // no instance
+  /** One megabyte bytes. */
+  public static final long ONE_MB = ONE_KB * ONE_KB;
+  
+  /** One gigabyte bytes.*/
+  public static final long ONE_GB = ONE_KB * ONE_MB;
+
+  /** No instantiation. */
+  private RamUsageEstimator() {}
 
   public final static int NUM_BYTES_BOOLEAN = 1;
   public final static int NUM_BYTES_BYTE = 1;
@@ -42,9 +78,19 @@ public final class RamUsageEstimator {
   public final static int NUM_BYTES_LONG = 8;
   public final static int NUM_BYTES_DOUBLE = 8;
 
+  /** 
+   * Number of bytes this jvm uses to represent an object reference. 
+   */
   public final static int NUM_BYTES_OBJECT_REF;
-  
+
+  /**
+   * Number of bytes to represent an object header (no fields, no alignments).
+   */
   public final static int NUM_BYTES_OBJECT_HEADER;
+
+  /**
+   * Number of bytes to represent an array header (no content, but with alignments).
+   */
   public final static int NUM_BYTES_ARRAY_HEADER;
   
   /**
@@ -69,9 +115,20 @@ public final class RamUsageEstimator {
     primitiveSizes.put(long.class, Integer.valueOf(NUM_BYTES_LONG));
   }
 
+  /**
+   * A handle to <code>sun.misc.Unsafe</code>.
+   */
   private final static Object theUnsafe;
+  
+  /**
+   * A handle to <code>sun.misc.Unsafe#fieldOffset(Field)</code>.
+   */
   private final static Method objectFieldOffsetMethod;
-  private final static boolean useUnsafe, isSupportedJVM;
+
+  /**
+   * All the supported "internal" JVM features detected at clinit. 
+   */
+  private final static EnumSet<JvmFeature> supportedFeatures;
 
   /**
    * Initialize constants and try to collect information about the JVM internals. 
@@ -85,80 +142,71 @@ public final class RamUsageEstimator {
     // so on 64 bit JVMs it'll be align(16 + 4, @8) = 24.
     int arrayHeader = Constants.JRE_IS_64BIT ? 24 : 12;
 
-    Object unsafe = null;
-    Method objectFieldOffsetM = null;
-    boolean supportedJvm = true;
+    supportedFeatures = EnumSet.noneOf(JvmFeature.class);
+
+    Class<?> unsafeClass = null;
+    Object tempTheUnsafe = null;
     try {
-      final Class<?> unsafeClass = Class.forName("sun.misc.Unsafe");
+      unsafeClass = Class.forName("sun.misc.Unsafe");
       final Field unsafeField = unsafeClass.getDeclaredField("theUnsafe");
       unsafeField.setAccessible(true);
-      unsafe = unsafeField.get(null);
-      
-      // get object reference size by getting scale factor of Object[] arrays:
-      try {
-        final Method arrayIndexScaleM = unsafeClass.getMethod("arrayIndexScale", Class.class);
-        referenceSize = ((Number) arrayIndexScaleM.invoke(unsafe, Object[].class)).intValue();
-      } catch (Exception e) {
-        // ignore
-        supportedJvm = false;
-      }
-      
-      // updated best guess based on reference size:
-      objectHeader = Constants.JRE_IS_64BIT ? (8 + referenceSize) : 8;
-      arrayHeader = Constants.JRE_IS_64BIT ? (8 + 2 * referenceSize) : 12;
-      
-      // get the object header size:
-      // - first try out if the field offsets are not scaled (see warning in Unsafe docs)
-      // - get the object header size by getting the field offset of the first field of a dummy object
-      // If the scaling is byte-wise and unsafe is available, enable dynamic size measurement for
-      // estimateRamUsage().
-      try {
-        objectFieldOffsetM = unsafeClass.getMethod("objectFieldOffset", Field.class);
-        final Field dummy1Field = DummyTwoLongObject.class.getDeclaredField("dummy1");
-        final int ofs1 = ((Number) objectFieldOffsetM.invoke(unsafe, dummy1Field)).intValue();
-        final Field dummy2Field = DummyTwoLongObject.class.getDeclaredField("dummy2");
-        final int ofs2 = ((Number) objectFieldOffsetM.invoke(unsafe, dummy2Field)).intValue();
-        if (Math.abs(ofs2 - ofs1) == NUM_BYTES_LONG) {
-          final Field baseField = DummyOneFieldObject.class.getDeclaredField("base");
-          objectHeader = ((Number) objectFieldOffsetM.invoke(unsafe, baseField)).intValue();
-        } else {
-          // it is not safe to use Unsafe.objectFieldOffset(),
-          // as it may be scaled (see "cookie" comment in Unsafe), better use defaults
-          // and conventional size estimation:
-          objectFieldOffsetM = null;
-          supportedJvm = false;
-        }
-      } catch (Exception e) {
-        // on exception ensure useUnsafe will be set to false later:
-        objectFieldOffsetM = null;
-        supportedJvm = false;
-      }
+      tempTheUnsafe = unsafeField.get(null);
+    } catch (Exception e) {
+      // Ignore.
+    }
+    theUnsafe = tempTheUnsafe;
 
-      // Get the array header size by retrieving the array base offset
-      // (offset of the first element of an array).
-      try {
-        final Method arrayBaseOffsetM = unsafeClass.getMethod("arrayBaseOffset", Class.class);
-        // we calculate that only for byte[] arrays, it's actually the same for all types:
-        arrayHeader = ((Number) arrayBaseOffsetM.invoke(unsafe, byte[].class)).intValue();
-      } catch (Exception e) {
-        // ignore
-        supportedJvm = false;
+    // get object reference size by getting scale factor of Object[] arrays:
+    try {
+      final Method arrayIndexScaleM = unsafeClass.getMethod("arrayIndexScale", Class.class);
+      referenceSize = ((Number) arrayIndexScaleM.invoke(theUnsafe, Object[].class)).intValue();
+      supportedFeatures.add(JvmFeature.OBJECT_REFERENCE_SIZE);
+    } catch (Exception e) {
+      // ignore.
+    }
+
+    // "best guess" based on reference size. We will attempt to modify
+    // these to exact values if there is supported infrastructure.
+    objectHeader = Constants.JRE_IS_64BIT ? (8 + referenceSize) : 8;
+    arrayHeader =  Constants.JRE_IS_64BIT ? (8 + 2 * referenceSize) : 12;
+
+    // get the object header size:
+    // - first try out if the field offsets are not scaled (see warning in Unsafe docs)
+    // - get the object header size by getting the field offset of the first field of a dummy object
+    // If the scaling is byte-wise and unsafe is available, enable dynamic size measurement for
+    // estimateRamUsage().
+    Method tempObjectFieldOffsetMethod = null;
+    try {
+      final Method objectFieldOffsetM = unsafeClass.getMethod("objectFieldOffset", Field.class);
+      final Field dummy1Field = DummyTwoLongObject.class.getDeclaredField("dummy1");
+      final int ofs1 = ((Number) objectFieldOffsetM.invoke(theUnsafe, dummy1Field)).intValue();
+      final Field dummy2Field = DummyTwoLongObject.class.getDeclaredField("dummy2");
+      final int ofs2 = ((Number) objectFieldOffsetM.invoke(theUnsafe, dummy2Field)).intValue();
+      if (Math.abs(ofs2 - ofs1) == NUM_BYTES_LONG) {
+        final Field baseField = DummyOneFieldObject.class.getDeclaredField("base");
+        objectHeader = ((Number) objectFieldOffsetM.invoke(theUnsafe, baseField)).intValue();
+        supportedFeatures.add(JvmFeature.FIELD_OFFSETS);
+        tempObjectFieldOffsetMethod = objectFieldOffsetM;
       }
     } catch (Exception e) {
-      // ignore
-      supportedJvm = false;
+      // Ignore.
+    }
+    objectFieldOffsetMethod = tempObjectFieldOffsetMethod;
+
+    // Get the array header size by retrieving the array base offset
+    // (offset of the first element of an array).
+    try {
+      final Method arrayBaseOffsetM = unsafeClass.getMethod("arrayBaseOffset", Class.class);
+      // we calculate that only for byte[] arrays, it's actually the same for all types:
+      arrayHeader = ((Number) arrayBaseOffsetM.invoke(theUnsafe, byte[].class)).intValue();
+      supportedFeatures.add(JvmFeature.ARRAY_HEADER_SIZE);
+    } catch (Exception e) {
+      // Ignore.
     }
 
     NUM_BYTES_OBJECT_REF = referenceSize;
     NUM_BYTES_OBJECT_HEADER = objectHeader;
     NUM_BYTES_ARRAY_HEADER = arrayHeader;
-    useUnsafe = (unsafe != null && objectFieldOffsetM != null);
-    if (useUnsafe) {
-      theUnsafe = unsafe;
-      objectFieldOffsetMethod = objectFieldOffsetM;
-    } else {
-      theUnsafe = objectFieldOffsetMethod = null;
-    }
     
     // Try to get the object alignment (the default seems to be 8 on Hotspot, 
     // regardless of the architecture).
@@ -176,18 +224,34 @@ public final class RamUsageEstimator {
         objectAlignment = Integer.parseInt(
             vmOption.getClass().getMethod("getValue").invoke(vmOption).toString()
         );
+        supportedFeatures.add(JvmFeature.OBJECT_ALIGNMENT);
       } catch (InvocationTargetException ite) {
         if (!(ite.getCause() instanceof IllegalArgumentException))
           throw ite;
         // ignore the error completely and use default of 8 (32 bit JVMs).
       }
     } catch (Exception e) {
-      // ignore
-      supportedJvm = false;
+      // Ignore.
     }
+
     NUM_BYTES_OBJECT_ALIGNMENT = objectAlignment;
 
-    isSupportedJVM = supportedJvm;
+    JVM_INFO_STRING = "[JVM: " +
+        Constants.JVM_NAME + ", " + Constants.JVM_VERSION + ", " + Constants.JVM_VENDOR + ", " + 
+        Constants.JAVA_VENDOR + ", " + Constants.JAVA_VERSION + "]";
+  }
+
+  /**
+   * Cached information about a given class.   
+   */
+  private static final class ClassCache {
+    public final long alignedShallowInstanceSize;
+    public final Field[] referenceFields;
+
+    public ClassCache(long alignedShallowInstanceSize, Field[] referenceFields) {
+      this.alignedShallowInstanceSize = alignedShallowInstanceSize;
+      this.referenceFields = referenceFields;
+    }    
   }
 
   // Object with just one field to determine the object header size by getting the offset of the dummy field:
@@ -204,14 +268,14 @@ public final class RamUsageEstimator {
   }
   
   /** 
-   * Returns true, if the current JVM is supported by {@code RamUsageEstimator}.
+   * Returns true, if the current JVM is fully supported by {@code RamUsageEstimator}.
    * If this method returns {@code false} you are maybe using a 3rd party Java VM
    * that is not supporting Oracle/Sun private APIs. The memory estimates can be 
    * imprecise then (no way of detecting compressed references, alignments, etc.). 
    * Lucene still tries to use sensible defaults.
    */
   public static boolean isSupportedJVM() {
-    return isSupportedJVM;
+    return supportedFeatures.size() == JvmFeature.values().length;
   }
 
   /** 
@@ -272,13 +336,7 @@ public final class RamUsageEstimator {
    * should be GCed.</p>
    */
   public static long sizeOf(Object obj) {
-    final Set<Object> seen = Collections.newSetFromMap(new IdentityHashMap<Object,Boolean>(64));
-    try {
-      return measureObjectSize(obj, seen);
-    } finally {
-      // Help the GC.
-      seen.clear();
-    }
+    return measureObjectSize(obj);
   }
 
   /** 
@@ -292,7 +350,7 @@ public final class RamUsageEstimator {
     if (obj == null) return 0;
     final Class<?> clz = obj.getClass();
     if (clz.isArray()) {
-      return measureArraySize(obj, null);
+      return shallowSizeOfArray(obj);
     } else {
       return shallowSizeOfInstance(clz);
     }
@@ -302,8 +360,8 @@ public final class RamUsageEstimator {
    * Returns the shallow instance size in bytes an instance of the given class would occupy.
    * This works with all conventional classes and primitive types, but not with arrays
    * (the size then depends on the number of elements and varies from object to object).
-   * Use the array-instance methods instead.
    * 
+   * @see #shallowSizeOf(Object)
    * @throws IllegalArgumentException if {@code clazz} is an array class. 
    */
   public static long shallowSizeOfInstance(Class<?> clazz) {
@@ -313,87 +371,163 @@ public final class RamUsageEstimator {
       return primitiveSizes.get(clazz);
     
     long size = NUM_BYTES_OBJECT_HEADER;
-    
+
     // Walk type hierarchy
-    while (clazz != null) {
+    for (;clazz != null; clazz = clazz.getSuperclass()) {
       final Field[] fields = clazz.getDeclaredFields();
-      boolean fieldFound = false;
-      for (final Field f : fields) {
-        if (Modifier.isStatic(f.getModifiers())) {
-          continue;
+      for (Field f : fields) {
+        if (!Modifier.isStatic(f.getModifiers())) {
+          size = adjustForField(size, f);
         }
-
-        size = reflectFieldSize(size, f);
-        fieldFound = true;
       }
-      if (useUnsafe && fieldFound) {
-        // no need to recurse to superclasses, as all fields are
-        // added at the end, so we won't find any larger offset
-        break;
-      }
-      clazz = clazz.getSuperclass();
     }
     return alignObjectSize(size);    
   }
 
   /**
-   * Recursive descend into an object.
+   * Return shallow size of any <code>array</code>.
    */
-  private static long measureObjectSize(Object obj, Set<Object> seen) {
-    if (obj == null) {
-      return 0;
+  private static long shallowSizeOfArray(Object array) {
+    long size = NUM_BYTES_ARRAY_HEADER;
+    final int len = Array.getLength(array);
+    if (len > 0) {
+      Class<?> arrayElementClazz = array.getClass().getComponentType();
+      if (arrayElementClazz.isPrimitive()) {
+        size += (long) len * primitiveSizes.get(arrayElementClazz);
+      } else {
+        size += (long) NUM_BYTES_OBJECT_REF * len;
+      }
     }
+    return alignObjectSize(size);
+  }
 
-    // skip if we have seen before
-    if (seen.contains(obj)) {
-      return 0;
-    }
+  /*
+   * Non-recursive version of object descend. This consumes more memory than recursive in-depth 
+   * traversal but prevents stack overflows on long chains of objects
+   * or complex graphs (a max. recursion depth on my machine was ~5000 objects linked in a chain
+   * so not too much).  
+   */
+  private static long measureObjectSize(Object root) {
+    // Objects seen so far.
+    final IdentityHashSet<Object> seen = new IdentityHashSet<Object>();
+    // Class cache with reference Field and precalculated shallow size. 
+    final IdentityHashMap<Class<?>, ClassCache> classCache = new IdentityHashMap<Class<?>, ClassCache>();
+    // Stack of objects pending traversal. Recursion caused stack overflows. 
+    final ArrayList<Object> stack = new ArrayList<Object>();
+    stack.add(root);
+
+    long totalSize = 0;
+    while (!stack.isEmpty()) {
+      final Object ob = stack.remove(stack.size() - 1);
 
-    // add to seen
-    seen.add(obj);
+      if (ob == null || seen.contains(ob)) {
+        continue;
+      }
+      seen.add(ob);
 
-    Class<?> clazz = obj.getClass();
-    if (clazz.isArray()) {
-      return measureArraySize(obj, seen);
+      final Class<?> obClazz = ob.getClass();
+      if (obClazz.isArray()) {
+        /*
+         * Consider an array, possibly of primitive types. Push any of its references to
+         * the processing stack and accumulate this array's shallow size. 
+         */
+        long size = NUM_BYTES_ARRAY_HEADER;
+        final int len = Array.getLength(ob);
+        if (len > 0) {
+          Class<?> componentClazz = obClazz.getComponentType();
+          if (componentClazz.isPrimitive()) {
+            size += (long) len * primitiveSizes.get(componentClazz);
+          } else {
+            size += (long) NUM_BYTES_OBJECT_REF * len;
+
+            // Push refs for traversal later.
+            for (int i = len; --i >= 0 ;) {
+              final Object o = Array.get(ob, i);
+              if (o != null && !seen.contains(o)) {
+                stack.add(o);
+              }
+            }            
+          }
+        }
+        totalSize += alignObjectSize(size);
+      } else {
+        /*
+         * Consider an object. Push any references it has to the processing stack
+         * and accumulate this object's shallow size. 
+         */
+        try {
+          ClassCache cachedInfo = classCache.get(obClazz);
+          if (cachedInfo == null) {
+            classCache.put(obClazz, cachedInfo = createCacheEntry(obClazz));
+          }
+
+          for (Field f : cachedInfo.referenceFields) {
+            // Fast path to eliminate redundancies.
+            final Object o = f.get(ob);
+            if (o != null && !seen.contains(o)) {
+              stack.add(o);
+            }
+          }
+
+          totalSize += cachedInfo.alignedShallowInstanceSize;
+        } catch (IllegalAccessException e) {
+          // this should never happen as we enabled setAccessible().
+          throw new RuntimeException("Reflective field access failed?", e);
+        }
+      }
     }
 
-    long size = NUM_BYTES_OBJECT_HEADER;
-    long innerSize = 0L;
+    // Help the GC (?).
+    seen.clear();
+    stack.clear();
+    classCache.clear();
 
-    // walk type hierarchy
-    while (clazz != null) {
-      final Field[] fields = clazz.getDeclaredFields();
+    return totalSize;
+  }
+
+  /**
+   * Create a cached information about shallow size and reference fields for 
+   * a given class.
+   */
+  private static ClassCache createCacheEntry(final Class<?> clazz) {
+    ClassCache cachedInfo;
+    long shallowInstanceSize = NUM_BYTES_OBJECT_HEADER;
+    final ArrayList<Field> referenceFields = new ArrayList<Field>(32);
+    for (Class<?> c = clazz; c != null; c = c.getSuperclass()) {
+      final Field[] fields = c.getDeclaredFields();
       for (final Field f : fields) {
-        if (Modifier.isStatic(f.getModifiers())) {
-          continue;
-        }
+        if (!Modifier.isStatic(f.getModifiers())) {
+          shallowInstanceSize = adjustForField(shallowInstanceSize, f);
 
-        size = reflectFieldSize(size, f);
-        
-        if (!f.getType().isPrimitive()) {
-          try {
+          if (!f.getType().isPrimitive()) {
             f.setAccessible(true);
-            innerSize += measureObjectSize(f.get(obj), seen);
-          } catch (IllegalAccessException ex) {
-            // this should never happen as we enable setAccessible()!
-            throw new RuntimeException("Cannot reflect instance field: " +
-              f.getDeclaringClass().getName() + "#" + f.getName(), ex);
+            referenceFields.add(f);
           }
         }
       }
-      clazz = clazz.getSuperclass();
     }
-    return alignObjectSize(size) + innerSize;
+
+    cachedInfo = new ClassCache(
+        alignObjectSize(shallowInstanceSize), 
+        referenceFields.toArray(new Field[referenceFields.size()]));
+    return cachedInfo;
   }
-  
-  private static long reflectFieldSize(long size, final Field f) {
+
+  /**
+   * This method returns the maximum representation size of an object. <code>sizeSoFar</code>
+   * is the object's size measured so far. <code>f</code> is the field being probed.
+   * 
+   * <p>The returned offset will be the maximum of whatever was measured so far and 
+   * <code>f</code> field's offset and representation size (unaligned).
+   */
+  private static long adjustForField(long sizeSoFar, final Field f) {
     final Class<?> type = f.getType();
     final int fsize = type.isPrimitive() ? primitiveSizes.get(type) : NUM_BYTES_OBJECT_REF;
-    if (useUnsafe) {
+    if (objectFieldOffsetMethod != null) {
       try {
         final long offsetPlusSize =
           ((Number) objectFieldOffsetMethod.invoke(theUnsafe, f)).longValue() + fsize;
-        return Math.max(size, offsetPlusSize);
+        return Math.max(sizeSoFar, offsetPlusSize);
       } catch (IllegalAccessException ex) {
         throw new RuntimeException("Access problem with sun.misc.Unsafe", ex);
       } catch (InvocationTargetException ite) {
@@ -409,40 +543,22 @@ public final class RamUsageEstimator {
           f.getDeclaringClass().getName() + "#" + f.getName(), cause);
       }
     } else {
-      return size + fsize;
+      // TODO: No alignments based on field type/ subclass fields alignments?
+      return sizeSoFar + fsize;
     }
   }
 
-  /**
-   * Return the deep size of an <code>array</code>, including
-   * sub-objects if there are any.
-   * 
-   * @param seen A set of already seen objects. If <code>null</code> no references
-   *      are followed and this method returns shallow size.
-   */
-  private static long measureArraySize(Object array, Set<Object> seen) {
-    long size = NUM_BYTES_ARRAY_HEADER;
-    final int len = Array.getLength(array);
-    if (len > 0) {
-      Class<?> arrayElementClazz = array.getClass().getComponentType();
-      if (arrayElementClazz.isPrimitive()) {
-        size += (long) len * primitiveSizes.get(arrayElementClazz);
-      } else {
-        size += (long) NUM_BYTES_OBJECT_REF * len;
-        if (seen != null) {
-          for (int i = 0; i < len; i++) {
-            size += measureObjectSize(Array.get(array, i), seen);
-          }
-        }
-      }
-    }
-
-    return alignObjectSize(size);
+  /** Return the set of unsupported JVM features that improve the estimation. */
+  public static EnumSet<JvmFeature> getUnsupportedFeatures() {
+    EnumSet<JvmFeature> unsupported = EnumSet.allOf(JvmFeature.class);
+    unsupported.removeAll(supportedFeatures);
+    return unsupported;
   }
 
-  public static final long ONE_KB = 1024;
-  public static final long ONE_MB = ONE_KB * ONE_KB;
-  public static final long ONE_GB = ONE_KB * ONE_MB;
+  /** Return the set of supported JVM features that improve the estimation. */
+  public static EnumSet<JvmFeature> getSupportedFeatures() {
+    return EnumSet.copyOf(supportedFeatures);
+  }
 
   /**
    * Returns <code>size</code> in human-readable units (GB, MB, KB or bytes).
@@ -466,4 +582,252 @@ public final class RamUsageEstimator {
       return bytes + " bytes";
     }
   }
+
+  /**
+   * Return a human-readable size of a given object.
+   * @see #sizeOf(Object)
+   * @see #humanReadableUnits(long)
+   */
+  public static String humanSizeOf(Object object) {
+    return humanReadableUnits(sizeOf(object));
+  }
+
+  /**
+   * An identity hash set implemented using open addressing. No null keys are allowed.
+   * 
+   * TODO: If this is useful outside this class, make it public - needs some work
+   */
+  static final class IdentityHashSet<KType> implements Iterable<KType> {
+    /**
+     * Default load factor.
+     */
+    public final static float DEFAULT_LOAD_FACTOR = 0.75f;
+
+    /**
+     * Minimum capacity for the set.
+     */
+    public final static int MIN_CAPACITY = 4;
+
+    /**
+     * All of set entries. Always of power of two length.
+     */
+    public Object[] keys;
+    
+    /**
+     * Cached number of assigned slots.
+     */
+    public int assigned;
+    
+    /**
+     * The load factor for this set (fraction of allocated or deleted slots before
+     * the buffers must be rehashed or reallocated).
+     */
+    public final float loadFactor;
+    
+    /**
+     * Cached capacity threshold at which we must resize the buffers.
+     */
+    private int resizeThreshold;
+    
+    /**
+     * Creates a hash set with the default capacity of 16.
+     * load factor of {@value #DEFAULT_LOAD_FACTOR}. `
+     */
+    public IdentityHashSet() {
+      this(16, DEFAULT_LOAD_FACTOR);
+    }
+    
+    /**
+     * Creates a hash set with the given capacity, load factor of
+     * {@value #DEFAULT_LOAD_FACTOR}.
+     */
+    public IdentityHashSet(int initialCapacity) {
+      this(initialCapacity, DEFAULT_LOAD_FACTOR);
+    }
+    
+    /**
+     * Creates a hash set with the given capacity and load factor.
+     */
+    public IdentityHashSet(int initialCapacity, float loadFactor) {
+      initialCapacity = Math.max(MIN_CAPACITY, initialCapacity);
+      
+      assert initialCapacity > 0 : "Initial capacity must be between (0, "
+          + Integer.MAX_VALUE + "].";
+      assert loadFactor > 0 && loadFactor < 1 : "Load factor must be between (0, 1).";
+      this.loadFactor = loadFactor;
+      allocateBuffers(roundCapacity(initialCapacity));
+    }
+    
+    /**
+     * Adds a reference to the set. Null keys are not allowed.
+     */
+    public boolean add(KType e) {
+      assert e != null : "Null keys not allowed.";
+      
+      if (assigned >= resizeThreshold) expandAndRehash();
+      
+      final int mask = keys.length - 1;
+      int slot = rehash(e) & mask;
+      Object existing;
+      while ((existing = keys[slot]) != null) {
+        if (e == existing) {
+          return false; // already found.
+        }
+        slot = (slot + 1) & mask;
+      }
+      assigned++;
+      keys[slot] = e;
+      return true;
+    }
+
+    /**
+     * Checks if the set contains a given ref.
+     */
+    public boolean contains(KType e) {
+      final int mask = keys.length - 1;
+      int slot = rehash(e) & mask;
+      Object existing;
+      while ((existing = keys[slot]) != null) {
+        if (e == existing) {
+          return true;
+        }
+        slot = (slot + 1) & mask;
+      }
+      return false;
+    }
+
+    /** Rehash via MurmurHash.
+     * 
+     * <p>The implementation is based on the
+     * finalization step from Austin Appleby's
+     * <code>MurmurHash3</code>.
+     * 
+     * @see "http://sites.google.com/site/murmurhash/"
+     */
+    private static int rehash(Object o) {
+      int k = System.identityHashCode(o);
+      k ^= k >>> 16;
+      k *= 0x85ebca6b;
+      k ^= k >>> 13;
+      k *= 0xc2b2ae35;
+      k ^= k >>> 16;
+      return k;
+    }
+    
+    /**
+     * Expand the internal storage buffers (capacity) or rehash current keys and
+     * values if there are a lot of deleted slots.
+     */
+    private void expandAndRehash() {
+      final Object[] oldKeys = this.keys;
+      
+      assert assigned >= resizeThreshold;
+      allocateBuffers(nextCapacity(keys.length));
+      
+      /*
+       * Rehash all assigned slots from the old hash table.
+       */
+      final int mask = keys.length - 1;
+      for (int i = 0; i < oldKeys.length; i++) {
+        final Object key = oldKeys[i];
+        if (key != null) {
+          int slot = rehash(key) & mask;
+          while (keys[slot] != null) {
+            slot = (slot + 1) & mask;
+          }
+          keys[slot] = key;
+        }
+      }
+      Arrays.fill(oldKeys, null);
+    }
+    
+    /**
+     * Allocate internal buffers for a given capacity.
+     * 
+     * @param capacity
+     *          New capacity (must be a power of two).
+     */
+    private void allocateBuffers(int capacity) {
+      this.keys = new Object[capacity];
+      this.resizeThreshold = (int) (capacity * DEFAULT_LOAD_FACTOR);
+    }
+    
+    /**
+     * Return the next possible capacity, counting from the current buffers' size.
+     */
+    protected int nextCapacity(int current) {
+      assert current > 0 && Long.bitCount(current) == 1 : "Capacity must be a power of two.";
+      assert ((current << 1) > 0) : "Maximum capacity exceeded ("
+          + (0x80000000 >>> 1) + ").";
+      
+      if (current < MIN_CAPACITY / 2) current = MIN_CAPACITY / 2;
+      return current << 1;
+    }
+    
+    /**
+     * Round the capacity to the next allowed value.
+     */
+    protected int roundCapacity(int requestedCapacity) {
+      // Maximum positive integer that is a power of two.
+      if (requestedCapacity > (0x80000000 >>> 1)) return (0x80000000 >>> 1);
+      
+      int capacity = MIN_CAPACITY;
+      while (capacity < requestedCapacity) {
+        capacity <<= 1;
+      }
+
+      return capacity;
+    }
+    
+    public void clear() {
+      assigned = 0;
+      Arrays.fill(keys, null);
+    }
+    
+    public int size() {
+      return assigned;
+    }
+    
+    public boolean isEmpty() {
+      return size() == 0;
+    }
+
+    @Override
+    public Iterator<KType> iterator() {
+      return new Iterator<KType>() {
+        int pos = -1;
+        Object nextElement = fetchNext();
+
+        @Override
+        public boolean hasNext() {
+          return nextElement != null;
+        }
+
+        @SuppressWarnings("unchecked")
+        @Override
+        public KType next() {
+          Object r = this.nextElement;
+          if (r == null) {
+            throw new NoSuchElementException();
+          }
+          this.nextElement = fetchNext();
+          return (KType) r;
+        }
+
+        private Object fetchNext() {
+          pos++;
+          while (pos < keys.length && keys[pos] == null) {
+            pos++;
+          }
+
+          return (pos >= keys.length ? null : keys[pos]);
+        }
+
+        @Override
+        public void remove() {
+          throw new UnsupportedOperationException();
+        }
+      };
+    }
+  }
 }

Added: lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/TestIdentityHashSet.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/TestIdentityHashSet.java?rev=1304485&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/TestIdentityHashSet.java (added)
+++ lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/TestIdentityHashSet.java Fri Mar 23 17:05:58 2012
@@ -0,0 +1,39 @@
+package org.apache.lucene.util;
+
+import java.util.*;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+public class TestIdentityHashSet extends LuceneTestCase {
+  @Test
+  public void testCheck() {
+    Random rnd = random;
+    Set<Object> jdk = Collections.newSetFromMap(
+        new IdentityHashMap<Object,Boolean>());
+    RamUsageEstimator.IdentityHashSet<Object> us = new RamUsageEstimator.IdentityHashSet<Object>();
+
+    int max = 100000;
+    int threshold = 256;
+    for (int i = 0; i < max; i++) {
+      // some of these will be interned and some will not so there will be collisions.
+      Integer v = rnd.nextInt(threshold);
+      
+      boolean e1 = jdk.contains(v);
+      boolean e2 = us.contains(v);
+      Assert.assertEquals(e1, e2);
+
+      e1 = jdk.add(v);
+      e2 = us.add(v);
+      Assert.assertEquals(e1, e2);
+    }
+    
+    Set<Object> collected = Collections.newSetFromMap(
+        new IdentityHashMap<Object,Boolean>());
+    for (Object o : us) {
+      collected.add(o);
+    }
+    
+    Assert.assertEquals(collected, jdk);
+  }
+}

Modified: lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/TestRamUsageEstimator.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/TestRamUsageEstimator.java?rev=1304485&r1=1304484&r2=1304485&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/TestRamUsageEstimator.java (original)
+++ lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/TestRamUsageEstimator.java Fri Mar 23 17:05:58 2012
@@ -22,86 +22,86 @@ import java.util.Random;
  */
 
 public class TestRamUsageEstimator extends LuceneTestCase {
+  public void testSanity() {
+    assertTrue(sizeOf(new String("test string")) > shallowSizeOfInstance(String.class));
 
-  public void testBasic() {
-    assertTrue(sizeOf(new String("test strin")) > shallowSizeOfInstance(String.class));
-    
     Holder holder = new Holder();
     holder.holder = new Holder("string2", 5000L);
     assertTrue(sizeOf(holder) > shallowSizeOfInstance(Holder.class));
     assertTrue(sizeOf(holder) > sizeOf(holder.holder));
     
-    assertTrue(shallowSizeOfInstance(HolderSubclass.class) >= shallowSizeOfInstance(Holder.class));
-    assertEquals(shallowSizeOfInstance(Holder.class), shallowSizeOfInstance(HolderSubclass2.class));
-
-    String[] strings = new String[]{new String("test strin"), new String("hollow"), new String("catchmaster")};
+    assertTrue(
+        shallowSizeOfInstance(HolderSubclass.class) >= shallowSizeOfInstance(Holder.class));
+    assertTrue(
+        shallowSizeOfInstance(Holder.class)         == shallowSizeOfInstance(HolderSubclass2.class));
+
+    String[] strings = new String[] {
+        new String("test string"),
+        new String("hollow"), 
+        new String("catchmaster")
+    };
     assertTrue(sizeOf(strings) > shallowSizeOf(strings));
   }
 
   public void testStaticOverloads() {
     Random rnd = random;
-
     {
-      byte[] array = new byte [rnd.nextInt(1024)];
+      byte[] array = new byte[rnd.nextInt(1024)];
       assertEquals(sizeOf(array), sizeOf((Object) array));
     }
-
+    
     {
-      boolean[] array = new boolean [rnd.nextInt(1024)];
+      boolean[] array = new boolean[rnd.nextInt(1024)];
       assertEquals(sizeOf(array), sizeOf((Object) array));
     }
-
+    
     {
-      char[] array = new char [rnd.nextInt(1024)];
+      char[] array = new char[rnd.nextInt(1024)];
       assertEquals(sizeOf(array), sizeOf((Object) array));
     }
-
+    
     {
-      short[] array = new short [rnd.nextInt(1024)];
+      short[] array = new short[rnd.nextInt(1024)];
       assertEquals(sizeOf(array), sizeOf((Object) array));
     }
-
+    
     {
-      int[] array = new int [rnd.nextInt(1024)];
+      int[] array = new int[rnd.nextInt(1024)];
       assertEquals(sizeOf(array), sizeOf((Object) array));
     }
-
+    
     {
-      float[] array = new float [rnd.nextInt(1024)];
+      float[] array = new float[rnd.nextInt(1024)];
       assertEquals(sizeOf(array), sizeOf((Object) array));
     }
-
+    
     {
-      long[] array = new long [rnd.nextInt(1024)];
+      long[] array = new long[rnd.nextInt(1024)];
       assertEquals(sizeOf(array), sizeOf((Object) array));
     }
-
+    
     {
-      double[] array = new double [rnd.nextInt(1024)];
+      double[] array = new double[rnd.nextInt(1024)];
       assertEquals(sizeOf(array), sizeOf((Object) array));
     }
   }
-
+  
   public void testReferenceSize() {
     if (!isSupportedJVM()) {
-      System.err.println("WARN: Your JVM does not support the Oracle/Sun extensions (Hotspot diagnostics, sun.misc.Unsafe),");
-      System.err.println("so the memory estimates may be inprecise.");
-      System.err.println("Please report this to the Lucene mailing list, noting your JVM version: " +
-        Constants.JAVA_VENDOR + " " + Constants.JAVA_VERSION);
-    }
-    if (VERBOSE) {
-      System.out.println("This JVM is 64bit: " + Constants.JRE_IS_64BIT);    
-      System.out.println("Reference size in this JVM: " + NUM_BYTES_OBJECT_REF);
-      System.out.println("Object header size in this JVM: " + NUM_BYTES_OBJECT_HEADER);
-      System.out.println("Array header size in this JVM: " + NUM_BYTES_ARRAY_HEADER);
-      System.out.println("Object alignment in this JVM: " + NUM_BYTES_OBJECT_ALIGNMENT);
+      System.err.println("WARN: Your JVM does not support certain Oracle/Sun extensions.");
+      System.err.println("      Memory estimates may be inaccurate.");
+      System.err.println("      Please report this to the Lucene mailing list. JVM version: " + RamUsageEstimator.JVM_INFO_STRING);
+      for (JvmFeature f : RamUsageEstimator.getUnsupportedFeatures()) {
+        System.err.println("      - " + f.toString());
+      }
     }
+
     assertTrue(NUM_BYTES_OBJECT_REF == 4 || NUM_BYTES_OBJECT_REF == 8);
     if (!Constants.JRE_IS_64BIT) {
-      assertEquals("For 32bit JVMs, reference size must always be 4", 4, NUM_BYTES_OBJECT_REF);
+      assertEquals("For 32bit JVMs, reference size must always be 4?", 4, NUM_BYTES_OBJECT_REF);
     }
   }
-  
+
   @SuppressWarnings("unused")
   private static class Holder {
     long field1 = 5000L;
@@ -109,8 +109,7 @@ public class TestRamUsageEstimator exten
     Holder holder;
     long field2, field3, field4;
     
-    Holder() {
-    }
+    Holder() {}
     
     Holder(String name, long field1) {
       this.name = name;
@@ -123,7 +122,7 @@ public class TestRamUsageEstimator exten
     byte foo;
     int bar;
   }
-
+  
   private static class HolderSubclass2 extends Holder {
     // empty, only inherits all fields -> size should be identical to superclass
   }

Added: lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/TestRamUsageEstimatorOnWildAnimals.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/TestRamUsageEstimatorOnWildAnimals.java?rev=1304485&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/TestRamUsageEstimatorOnWildAnimals.java (added)
+++ lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/TestRamUsageEstimatorOnWildAnimals.java Fri Mar 23 17:05:58 2012
@@ -0,0 +1,37 @@
+package org.apache.lucene.util;
+
+import org.junit.Assert;
+
+/**
+ * Check large and special graphs. 
+ */
+public class TestRamUsageEstimatorOnWildAnimals extends LuceneTestCase {
+  public static class ListElement {
+    ListElement next;
+  }
+
+  public void testOverflowMaxChainLength() {
+    int UPPERLIMIT = 100000;
+    int lower = 0;
+    int upper = UPPERLIMIT;
+    
+    while (lower + 1 < upper) {
+      int mid = (lower + upper) / 2;
+      try {
+        ListElement first = new ListElement();
+        ListElement last = first;
+        for (int i = 0; i < mid; i++) {
+          last = (last.next = new ListElement());
+        }
+        RamUsageEstimator.sizeOf(first); // cause SOE or pass.
+        lower = mid;
+      } catch (StackOverflowError e) {
+        upper = mid;
+      }
+    }
+
+    if (lower + 1 < UPPERLIMIT) {
+      Assert.fail("Max object chain length till stack overflow: " + lower);
+    }
+  }  
+}



RE: svn commit: r1304485 - in /lucene/dev/trunk/lucene/core/src: java/org/apache/lucene/util/ test/org/apache/lucene/util/

Posted by Uwe Schindler <uw...@thetaphi.de>.
Will do!

-----
Uwe Schindler
H.-H.-Meier-Allee 63, D-28213 Bremen
http://www.thetaphi.de
eMail: uwe@thetaphi.de


> -----Original Message-----
> From: Robert Muir [mailto:rcmuir@gmail.com]
> Sent: Friday, March 23, 2012 6:18 PM
> To: dev@lucene.apache.org
> Subject: Re: svn commit: r1304485 - in /lucene/dev/trunk/lucene/core/src:
> java/org/apache/lucene/util/ test/org/apache/lucene/util/
> 
> Can we add a license to the TestIdentityHashSet and
> TestRamUsageEstimatorOnWildAnimals? :)
> 
> On Fri, Mar 23, 2012 at 1:05 PM,  <us...@apache.org> wrote:
> > Author: uschindler
> > Date: Fri Mar 23 17:05:58 2012
> > New Revision: 1304485
> >
> > URL: http://svn.apache.org/viewvc?rev=1304485&view=rev
> > Log:
> > LUCENE-3867: Fix incorrect short-circuit, improve memory usage.
> >
> > Added:
> >
> > lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/TestIdent
> > ityHashSet.java   (with props)
> >
> > lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/TestRamUs
> > ageEstimatorOnWildAnimals.java   (with props)
> > Modified:
> >
> > lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/Constants
> > .java
> >
> > lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/RamUsageE
> > stimator.java
> >
> > lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/TestRamUs
> > ageEstimator.java
> >
> > Modified:
> > lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/Constants
> > .java
> > URL:
> > http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org
> >
> /apache/lucene/util/Constants.java?rev=1304485&r1=1304484&r2=1304485&v
> > iew=diff
> >
> ================================================================
> ======
> > ========
> > ---
> > lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/Constants
> > .java (original)
> > +++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/Const
> > +++ ants.java Fri Mar 23 17:05:58 2012
> > @@ -27,6 +27,11 @@ import org.apache.lucene.LucenePackage;
> >  public final class Constants {
> >   private Constants() {}                         // can't construct
> >
> > +  /** JVM vendor info. */
> > +  public static final String JVM_VENDOR =
> > + System.getProperty("java.vm.vendor");
> > +  public static final String JVM_VERSION =
> > + System.getProperty("java.vm.version");
> > +  public static final String JVM_NAME =
> > + System.getProperty("java.vm.name");
> > +
> >   /** The value of <tt>System.getProperty("java.version")<tt>. **/
> >   public static final String JAVA_VERSION =
> > System.getProperty("java.version");
> >
> >
> > Modified:
> > lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/RamUsageE
> > stimator.java
> > URL:
> > http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org
> >
> /apache/lucene/util/RamUsageEstimator.java?rev=1304485&r1=1304484&r2=
> 1
> > 304485&view=diff
> >
> ================================================================
> ======
> > ========
> > ---
> > lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/RamUsageE
> > stimator.java (original)
> > +++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/RamUs
> > +++ ageEstimator.java Fri Mar 23 17:05:58 2012
> > @@ -24,14 +24,50 @@ import java.text.DecimalFormatSymbols;
> >  import java.util.*;
> >
> >  /**
> > - * Estimates the size of Java objects using a simple memory model
> > - * for primitive size information.
> > + * Estimates the size (memory representation) of Java objects.
> > + *
> > + * @see #sizeOf(Object)
> > + * @see #shallowSizeOf(Object)
> > + * @see #shallowSizeOfInstance(Class)
> >  *
> >  * @lucene.internal
> >  */
> >  public final class RamUsageEstimator {
> > +  /**
> > +   * JVM diagnostic features.
> > +   */
> > +  public static enum JvmFeature {
> > +    OBJECT_REFERENCE_SIZE("Object reference size estimated using
> > + array index scale."),
> > +    ARRAY_HEADER_SIZE("Array header size estimated using array based
> > + offset."),
> > +    FIELD_OFFSETS("Shallow instance size based on field offsets."),
> > +    OBJECT_ALIGNMENT("Object alignment retrieved from
> > + HotSpotDiagnostic MX bean.");
> > +
> > +    public final String description;
> > +
> > +    private JvmFeature(String description) {
> > +      this.description = description;
> > +    }
> > +
> > +    @Override
> > +    public String toString() {
> > +      return super.name() + " (" + description + ")";
> > +    }
> > +  }
> > +
> > +  /** JVM info string for debugging and reports. */
> > +  public final static String JVM_INFO_STRING;
> > +
> > +  /** One kilobyte bytes. */
> > +  public static final long ONE_KB = 1024;
> >
> > -  private RamUsageEstimator() {} // no instance
> > +  /** One megabyte bytes. */
> > +  public static final long ONE_MB = ONE_KB * ONE_KB;
> > +
> > +  /** One gigabyte bytes.*/
> > +  public static final long ONE_GB = ONE_KB * ONE_MB;
> > +
> > +  /** No instantiation. */
> > +  private RamUsageEstimator() {}
> >
> >   public final static int NUM_BYTES_BOOLEAN = 1;
> >   public final static int NUM_BYTES_BYTE = 1; @@ -42,9 +78,19 @@
> > public final class RamUsageEstimator {
> >   public final static int NUM_BYTES_LONG = 8;
> >   public final static int NUM_BYTES_DOUBLE = 8;
> >
> > +  /**
> > +   * Number of bytes this jvm uses to represent an object reference.
> > +   */
> >   public final static int NUM_BYTES_OBJECT_REF;
> > -
> > +
> > +  /**
> > +   * Number of bytes to represent an object header (no fields, no
> alignments).
> > +   */
> >   public final static int NUM_BYTES_OBJECT_HEADER;
> > +
> > +  /**
> > +   * Number of bytes to represent an array header (no content, but with
> alignments).
> > +   */
> >   public final static int NUM_BYTES_ARRAY_HEADER;
> >
> >   /**
> > @@ -69,9 +115,20 @@ public final class RamUsageEstimator {
> >     primitiveSizes.put(long.class, Integer.valueOf(NUM_BYTES_LONG));
> >   }
> >
> > +  /**
> > +   * A handle to <code>sun.misc.Unsafe</code>.
> > +   */
> >   private final static Object theUnsafe;
> > +
> > +  /**
> > +   * A handle to <code>sun.misc.Unsafe#fieldOffset(Field)</code>.
> > +   */
> >   private final static Method objectFieldOffsetMethod;
> > -  private final static boolean useUnsafe, isSupportedJVM;
> > +
> > +  /**
> > +   * All the supported "internal" JVM features detected at clinit.
> > +   */
> > +  private final static EnumSet<JvmFeature> supportedFeatures;
> >
> >   /**
> >    * Initialize constants and try to collect information about the JVM internals.
> > @@ -85,80 +142,71 @@ public final class RamUsageEstimator {
> >     // so on 64 bit JVMs it'll be align(16 + 4, @8) = 24.
> >     int arrayHeader = Constants.JRE_IS_64BIT ? 24 : 12;
> >
> > -    Object unsafe = null;
> > -    Method objectFieldOffsetM = null;
> > -    boolean supportedJvm = true;
> > +    supportedFeatures = EnumSet.noneOf(JvmFeature.class);
> > +
> > +    Class<?> unsafeClass = null;
> > +    Object tempTheUnsafe = null;
> >     try {
> > -      final Class<?> unsafeClass = Class.forName("sun.misc.Unsafe");
> > +      unsafeClass = Class.forName("sun.misc.Unsafe");
> >       final Field unsafeField =
> > unsafeClass.getDeclaredField("theUnsafe");
> >       unsafeField.setAccessible(true);
> > -      unsafe = unsafeField.get(null);
> > -
> > -      // get object reference size by getting scale factor of Object[] arrays:
> > -      try {
> > -        final Method arrayIndexScaleM =
> > unsafeClass.getMethod("arrayIndexScale", Class.class);
> > -        referenceSize = ((Number) arrayIndexScaleM.invoke(unsafe,
> > Object[].class)).intValue();
> > -      } catch (Exception e) {
> > -        // ignore
> > -        supportedJvm = false;
> > -      }
> > -
> > -      // updated best guess based on reference size:
> > -      objectHeader = Constants.JRE_IS_64BIT ? (8 + referenceSize) :
> > 8;
> > -      arrayHeader = Constants.JRE_IS_64BIT ? (8 + 2 * referenceSize)
> > : 12;
> > -
> > -      // get the object header size:
> > -      // - first try out if the field offsets are not scaled (see
> > warning in Unsafe docs)
> > -      // - get the object header size by getting the field offset of
> > the first field of a dummy object
> > -      // If the scaling is byte-wise and unsafe is available, enable
> > dynamic size measurement for
> > -      // estimateRamUsage().
> > -      try {
> > -        objectFieldOffsetM =
> > unsafeClass.getMethod("objectFieldOffset", Field.class);
> > -        final Field dummy1Field =
> > DummyTwoLongObject.class.getDeclaredField("dummy1");
> > -        final int ofs1 = ((Number) objectFieldOffsetM.invoke(unsafe,
> > dummy1Field)).intValue();
> > -        final Field dummy2Field =
> > DummyTwoLongObject.class.getDeclaredField("dummy2");
> > -        final int ofs2 = ((Number) objectFieldOffsetM.invoke(unsafe,
> > dummy2Field)).intValue();
> > -        if (Math.abs(ofs2 - ofs1) == NUM_BYTES_LONG) {
> > -          final Field baseField =
> > DummyOneFieldObject.class.getDeclaredField("base");
> > -          objectHeader = ((Number) objectFieldOffsetM.invoke(unsafe,
> > baseField)).intValue();
> > -        } else {
> > -          // it is not safe to use Unsafe.objectFieldOffset(),
> > -          // as it may be scaled (see "cookie" comment in Unsafe),
> > better use defaults
> > -          // and conventional size estimation:
> > -          objectFieldOffsetM = null;
> > -          supportedJvm = false;
> > -        }
> > -      } catch (Exception e) {
> > -        // on exception ensure useUnsafe will be set to false later:
> > -        objectFieldOffsetM = null;
> > -        supportedJvm = false;
> > -      }
> > +      tempTheUnsafe = unsafeField.get(null);
> > +    } catch (Exception e) {
> > +      // Ignore.
> > +    }
> > +    theUnsafe = tempTheUnsafe;
> >
> > -      // Get the array header size by retrieving the array base
> > offset
> > -      // (offset of the first element of an array).
> > -      try {
> > -        final Method arrayBaseOffsetM =
> > unsafeClass.getMethod("arrayBaseOffset", Class.class);
> > -        // we calculate that only for byte[] arrays, it's actually the same for all
> types:
> > -        arrayHeader = ((Number) arrayBaseOffsetM.invoke(unsafe,
> > byte[].class)).intValue();
> > -      } catch (Exception e) {
> > -        // ignore
> > -        supportedJvm = false;
> > +    // get object reference size by getting scale factor of Object[] arrays:
> > +    try {
> > +      final Method arrayIndexScaleM =
> > + unsafeClass.getMethod("arrayIndexScale", Class.class);
> > +      referenceSize = ((Number) arrayIndexScaleM.invoke(theUnsafe,
> > + Object[].class)).intValue();
> > +      supportedFeatures.add(JvmFeature.OBJECT_REFERENCE_SIZE);
> > +    } catch (Exception e) {
> > +      // ignore.
> > +    }
> > +
> > +    // "best guess" based on reference size. We will attempt to
> > + modify
> > +    // these to exact values if there is supported infrastructure.
> > +    objectHeader = Constants.JRE_IS_64BIT ? (8 + referenceSize) : 8;
> > +    arrayHeader =  Constants.JRE_IS_64BIT ? (8 + 2 * referenceSize) :
> > + 12;
> > +
> > +    // get the object header size:
> > +    // - first try out if the field offsets are not scaled (see
> > + warning in Unsafe docs)
> > +    // - get the object header size by getting the field offset of
> > + the first field of a dummy object
> > +    // If the scaling is byte-wise and unsafe is available, enable
> > + dynamic size measurement for
> > +    // estimateRamUsage().
> > +    Method tempObjectFieldOffsetMethod = null;
> > +    try {
> > +      final Method objectFieldOffsetM =
> > + unsafeClass.getMethod("objectFieldOffset", Field.class);
> > +      final Field dummy1Field =
> > + DummyTwoLongObject.class.getDeclaredField("dummy1");
> > +      final int ofs1 = ((Number) objectFieldOffsetM.invoke(theUnsafe,
> > + dummy1Field)).intValue();
> > +      final Field dummy2Field =
> > + DummyTwoLongObject.class.getDeclaredField("dummy2");
> > +      final int ofs2 = ((Number) objectFieldOffsetM.invoke(theUnsafe,
> > + dummy2Field)).intValue();
> > +      if (Math.abs(ofs2 - ofs1) == NUM_BYTES_LONG) {
> > +        final Field baseField =
> > + DummyOneFieldObject.class.getDeclaredField("base");
> > +        objectHeader = ((Number) objectFieldOffsetM.invoke(theUnsafe,
> > + baseField)).intValue();
> > +        supportedFeatures.add(JvmFeature.FIELD_OFFSETS);
> > +        tempObjectFieldOffsetMethod = objectFieldOffsetM;
> >       }
> >     } catch (Exception e) {
> > -      // ignore
> > -      supportedJvm = false;
> > +      // Ignore.
> > +    }
> > +    objectFieldOffsetMethod = tempObjectFieldOffsetMethod;
> > +
> > +    // Get the array header size by retrieving the array base offset
> > +    // (offset of the first element of an array).
> > +    try {
> > +      final Method arrayBaseOffsetM =
> > + unsafeClass.getMethod("arrayBaseOffset", Class.class);
> > +      // we calculate that only for byte[] arrays, it's actually the same for all
> types:
> > +      arrayHeader = ((Number) arrayBaseOffsetM.invoke(theUnsafe,
> > + byte[].class)).intValue();
> > +      supportedFeatures.add(JvmFeature.ARRAY_HEADER_SIZE);
> > +    } catch (Exception e) {
> > +      // Ignore.
> >     }
> >
> >     NUM_BYTES_OBJECT_REF = referenceSize;
> >     NUM_BYTES_OBJECT_HEADER = objectHeader;
> >     NUM_BYTES_ARRAY_HEADER = arrayHeader;
> > -    useUnsafe = (unsafe != null && objectFieldOffsetM != null);
> > -    if (useUnsafe) {
> > -      theUnsafe = unsafe;
> > -      objectFieldOffsetMethod = objectFieldOffsetM;
> > -    } else {
> > -      theUnsafe = objectFieldOffsetMethod = null;
> > -    }
> >
> >     // Try to get the object alignment (the default seems to be 8 on
> > Hotspot,
> >     // regardless of the architecture).
> > @@ -176,18 +224,34 @@ public final class RamUsageEstimator {
> >         objectAlignment = Integer.parseInt(
> >
> > vmOption.getClass().getMethod("getValue").invoke(vmOption).toString()
> >         );
> > +        supportedFeatures.add(JvmFeature.OBJECT_ALIGNMENT);
> >       } catch (InvocationTargetException ite) {
> >         if (!(ite.getCause() instanceof IllegalArgumentException))
> >           throw ite;
> >         // ignore the error completely and use default of 8 (32 bit JVMs).
> >       }
> >     } catch (Exception e) {
> > -      // ignore
> > -      supportedJvm = false;
> > +      // Ignore.
> >     }
> > +
> >     NUM_BYTES_OBJECT_ALIGNMENT = objectAlignment;
> >
> > -    isSupportedJVM = supportedJvm;
> > +    JVM_INFO_STRING = "[JVM: " +
> > +        Constants.JVM_NAME + ", " + Constants.JVM_VERSION + ", " +
> > + Constants.JVM_VENDOR + ", " +
> > +        Constants.JAVA_VENDOR + ", " + Constants.JAVA_VERSION + "]";
> > +  }
> > +
> > +  /**
> > +   * Cached information about a given class.
> > +   */
> > +  private static final class ClassCache {
> > +    public final long alignedShallowInstanceSize;
> > +    public final Field[] referenceFields;
> > +
> > +    public ClassCache(long alignedShallowInstanceSize, Field[]
> > + referenceFields) {
> > +      this.alignedShallowInstanceSize = alignedShallowInstanceSize;
> > +      this.referenceFields = referenceFields;
> > +    }
> >   }
> >
> >   // Object with just one field to determine the object header size by getting
> the offset of the dummy field:
> > @@ -204,14 +268,14 @@ public final class RamUsageEstimator {
> >   }
> >
> >   /**
> > -   * Returns true, if the current JVM is supported by {@code
> RamUsageEstimator}.
> > +   * Returns true, if the current JVM is fully supported by {@code
> RamUsageEstimator}.
> >    * If this method returns {@code false} you are maybe using a 3rd
> > party Java VM
> >    * that is not supporting Oracle/Sun private APIs. The memory
> > estimates can be
> >    * imprecise then (no way of detecting compressed references, alignments,
> etc.).
> >    * Lucene still tries to use sensible defaults.
> >    */
> >   public static boolean isSupportedJVM() {
> > -    return isSupportedJVM;
> > +    return supportedFeatures.size() == JvmFeature.values().length;
> >   }
> >
> >   /**
> > @@ -272,13 +336,7 @@ public final class RamUsageEstimator {
> >    * should be GCed.</p>
> >    */
> >   public static long sizeOf(Object obj) {
> > -    final Set<Object> seen = Collections.newSetFromMap(new
> > IdentityHashMap<Object,Boolean>(64));
> > -    try {
> > -      return measureObjectSize(obj, seen);
> > -    } finally {
> > -      // Help the GC.
> > -      seen.clear();
> > -    }
> > +    return measureObjectSize(obj);
> >   }
> >
> >   /**
> > @@ -292,7 +350,7 @@ public final class RamUsageEstimator {
> >     if (obj == null) return 0;
> >     final Class<?> clz = obj.getClass();
> >     if (clz.isArray()) {
> > -      return measureArraySize(obj, null);
> > +      return shallowSizeOfArray(obj);
> >     } else {
> >       return shallowSizeOfInstance(clz);
> >     }
> > @@ -302,8 +360,8 @@ public final class RamUsageEstimator {
> >    * Returns the shallow instance size in bytes an instance of the given class
> would occupy.
> >    * This works with all conventional classes and primitive types, but
> > not with arrays
> >    * (the size then depends on the number of elements and varies from object
> to object).
> > -   * Use the array-instance methods instead.
> >    *
> > +   * @see #shallowSizeOf(Object)
> >    * @throws IllegalArgumentException if {@code clazz} is an array class.
> >    */
> >   public static long shallowSizeOfInstance(Class<?> clazz) { @@
> > -313,87 +371,163 @@ public final class RamUsageEstimator {
> >       return primitiveSizes.get(clazz);
> >
> >     long size = NUM_BYTES_OBJECT_HEADER;
> > -
> > +
> >     // Walk type hierarchy
> > -    while (clazz != null) {
> > +    for (;clazz != null; clazz = clazz.getSuperclass()) {
> >       final Field[] fields = clazz.getDeclaredFields();
> > -      boolean fieldFound = false;
> > -      for (final Field f : fields) {
> > -        if (Modifier.isStatic(f.getModifiers())) {
> > -          continue;
> > +      for (Field f : fields) {
> > +        if (!Modifier.isStatic(f.getModifiers())) {
> > +          size = adjustForField(size, f);
> >         }
> > -
> > -        size = reflectFieldSize(size, f);
> > -        fieldFound = true;
> >       }
> > -      if (useUnsafe && fieldFound) {
> > -        // no need to recurse to superclasses, as all fields are
> > -        // added at the end, so we won't find any larger offset
> > -        break;
> > -      }
> > -      clazz = clazz.getSuperclass();
> >     }
> >     return alignObjectSize(size);
> >   }
> >
> >   /**
> > -   * Recursive descend into an object.
> > +   * Return shallow size of any <code>array</code>.
> >    */
> > -  private static long measureObjectSize(Object obj, Set<Object> seen)
> > {
> > -    if (obj == null) {
> > -      return 0;
> > +  private static long shallowSizeOfArray(Object array) {
> > +    long size = NUM_BYTES_ARRAY_HEADER;
> > +    final int len = Array.getLength(array);
> > +    if (len > 0) {
> > +      Class<?> arrayElementClazz =
> > + array.getClass().getComponentType();
> > +      if (arrayElementClazz.isPrimitive()) {
> > +        size += (long) len * primitiveSizes.get(arrayElementClazz);
> > +      } else {
> > +        size += (long) NUM_BYTES_OBJECT_REF * len;
> > +      }
> >     }
> > +    return alignObjectSize(size);
> > +  }
> >
> > -    // skip if we have seen before
> > -    if (seen.contains(obj)) {
> > -      return 0;
> > -    }
> > +  /*
> > +   * Non-recursive version of object descend. This consumes more
> > + memory than recursive in-depth
> > +   * traversal but prevents stack overflows on long chains of objects
> > +   * or complex graphs (a max. recursion depth on my machine was
> > + ~5000 objects linked in a chain
> > +   * so not too much).
> > +   */
> > +  private static long measureObjectSize(Object root) {
> > +    // Objects seen so far.
> > +    final IdentityHashSet<Object> seen = new
> > + IdentityHashSet<Object>();
> > +    // Class cache with reference Field and precalculated shallow size.
> > +    final IdentityHashMap<Class<?>, ClassCache> classCache = new
> > + IdentityHashMap<Class<?>, ClassCache>();
> > +    // Stack of objects pending traversal. Recursion caused stack overflows.
> > +    final ArrayList<Object> stack = new ArrayList<Object>();
> > +    stack.add(root);
> > +
> > +    long totalSize = 0;
> > +    while (!stack.isEmpty()) {
> > +      final Object ob = stack.remove(stack.size() - 1);
> >
> > -    // add to seen
> > -    seen.add(obj);
> > +      if (ob == null || seen.contains(ob)) {
> > +        continue;
> > +      }
> > +      seen.add(ob);
> >
> > -    Class<?> clazz = obj.getClass();
> > -    if (clazz.isArray()) {
> > -      return measureArraySize(obj, seen);
> > +      final Class<?> obClazz = ob.getClass();
> > +      if (obClazz.isArray()) {
> > +        /*
> > +         * Consider an array, possibly of primitive types. Push any
> > + of its references to
> > +         * the processing stack and accumulate this array's shallow size.
> > +         */
> > +        long size = NUM_BYTES_ARRAY_HEADER;
> > +        final int len = Array.getLength(ob);
> > +        if (len > 0) {
> > +          Class<?> componentClazz = obClazz.getComponentType();
> > +          if (componentClazz.isPrimitive()) {
> > +            size += (long) len * primitiveSizes.get(componentClazz);
> > +          } else {
> > +            size += (long) NUM_BYTES_OBJECT_REF * len;
> > +
> > +            // Push refs for traversal later.
> > +            for (int i = len; --i >= 0 ;) {
> > +              final Object o = Array.get(ob, i);
> > +              if (o != null && !seen.contains(o)) {
> > +                stack.add(o);
> > +              }
> > +            }
> > +          }
> > +        }
> > +        totalSize += alignObjectSize(size);
> > +      } else {
> > +        /*
> > +         * Consider an object. Push any references it has to the
> > + processing stack
> > +         * and accumulate this object's shallow size.
> > +         */
> > +        try {
> > +          ClassCache cachedInfo = classCache.get(obClazz);
> > +          if (cachedInfo == null) {
> > +            classCache.put(obClazz, cachedInfo =
> > + createCacheEntry(obClazz));
> > +          }
> > +
> > +          for (Field f : cachedInfo.referenceFields) {
> > +            // Fast path to eliminate redundancies.
> > +            final Object o = f.get(ob);
> > +            if (o != null && !seen.contains(o)) {
> > +              stack.add(o);
> > +            }
> > +          }
> > +
> > +          totalSize += cachedInfo.alignedShallowInstanceSize;
> > +        } catch (IllegalAccessException e) {
> > +          // this should never happen as we enabled setAccessible().
> > +          throw new RuntimeException("Reflective field access
> > + failed?", e);
> > +        }
> > +      }
> >     }
> >
> > -    long size = NUM_BYTES_OBJECT_HEADER;
> > -    long innerSize = 0L;
> > +    // Help the GC (?).
> > +    seen.clear();
> > +    stack.clear();
> > +    classCache.clear();
> >
> > -    // walk type hierarchy
> > -    while (clazz != null) {
> > -      final Field[] fields = clazz.getDeclaredFields();
> > +    return totalSize;
> > +  }
> > +
> > +  /**
> > +   * Create a cached information about shallow size and reference
> > + fields for
> > +   * a given class.
> > +   */
> > +  private static ClassCache createCacheEntry(final Class<?> clazz) {
> > +    ClassCache cachedInfo;
> > +    long shallowInstanceSize = NUM_BYTES_OBJECT_HEADER;
> > +    final ArrayList<Field> referenceFields = new
> > + ArrayList<Field>(32);
> > +    for (Class<?> c = clazz; c != null; c = c.getSuperclass()) {
> > +      final Field[] fields = c.getDeclaredFields();
> >       for (final Field f : fields) {
> > -        if (Modifier.isStatic(f.getModifiers())) {
> > -          continue;
> > -        }
> > +        if (!Modifier.isStatic(f.getModifiers())) {
> > +          shallowInstanceSize = adjustForField(shallowInstanceSize,
> > + f);
> >
> > -        size = reflectFieldSize(size, f);
> > -
> > -        if (!f.getType().isPrimitive()) {
> > -          try {
> > +          if (!f.getType().isPrimitive()) {
> >             f.setAccessible(true);
> > -            innerSize += measureObjectSize(f.get(obj), seen);
> > -          } catch (IllegalAccessException ex) {
> > -            // this should never happen as we enable setAccessible()!
> > -            throw new RuntimeException("Cannot reflect instance
> > field: " +
> > -              f.getDeclaringClass().getName() + "#" + f.getName(),
> > ex);
> > +            referenceFields.add(f);
> >           }
> >         }
> >       }
> > -      clazz = clazz.getSuperclass();
> >     }
> > -    return alignObjectSize(size) + innerSize;
> > +
> > +    cachedInfo = new ClassCache(
> > +        alignObjectSize(shallowInstanceSize),
> > +        referenceFields.toArray(new Field[referenceFields.size()]));
> > +    return cachedInfo;
> >   }
> > -
> > -  private static long reflectFieldSize(long size, final Field f) {
> > +
> > +  /**
> > +   * This method returns the maximum representation size of an
> > + object. <code>sizeSoFar</code>
> > +   * is the object's size measured so far. <code>f</code> is the field being
> probed.
> > +   *
> > +   * <p>The returned offset will be the maximum of whatever was
> > + measured so far and
> > +   * <code>f</code> field's offset and representation size (unaligned).
> > +   */
> > +  private static long adjustForField(long sizeSoFar, final Field f) {
> >     final Class<?> type = f.getType();
> >     final int fsize = type.isPrimitive() ? primitiveSizes.get(type) :
> > NUM_BYTES_OBJECT_REF;
> > -    if (useUnsafe) {
> > +    if (objectFieldOffsetMethod != null) {
> >       try {
> >         final long offsetPlusSize =
> >           ((Number) objectFieldOffsetMethod.invoke(theUnsafe,
> > f)).longValue() + fsize;
> > -        return Math.max(size, offsetPlusSize);
> > +        return Math.max(sizeSoFar, offsetPlusSize);
> >       } catch (IllegalAccessException ex) {
> >         throw new RuntimeException("Access problem with
> > sun.misc.Unsafe", ex);
> >       } catch (InvocationTargetException ite) { @@ -409,40 +543,22 @@
> > public final class RamUsageEstimator {
> >           f.getDeclaringClass().getName() + "#" + f.getName(), cause);
> >       }
> >     } else {
> > -      return size + fsize;
> > +      // TODO: No alignments based on field type/ subclass fields alignments?
> > +      return sizeSoFar + fsize;
> >     }
> >   }
> >
> > -  /**
> > -   * Return the deep size of an <code>array</code>, including
> > -   * sub-objects if there are any.
> > -   *
> > -   * @param seen A set of already seen objects. If <code>null</code>
> > no references
> > -   *      are followed and this method returns shallow size.
> > -   */
> > -  private static long measureArraySize(Object array, Set<Object>
> > seen) {
> > -    long size = NUM_BYTES_ARRAY_HEADER;
> > -    final int len = Array.getLength(array);
> > -    if (len > 0) {
> > -      Class<?> arrayElementClazz =
> > array.getClass().getComponentType();
> > -      if (arrayElementClazz.isPrimitive()) {
> > -        size += (long) len * primitiveSizes.get(arrayElementClazz);
> > -      } else {
> > -        size += (long) NUM_BYTES_OBJECT_REF * len;
> > -        if (seen != null) {
> > -          for (int i = 0; i < len; i++) {
> > -            size += measureObjectSize(Array.get(array, i), seen);
> > -          }
> > -        }
> > -      }
> > -    }
> > -
> > -    return alignObjectSize(size);
> > +  /** Return the set of unsupported JVM features that improve the
> > + estimation. */
> > +  public static EnumSet<JvmFeature> getUnsupportedFeatures() {
> > +    EnumSet<JvmFeature> unsupported =
> > + EnumSet.allOf(JvmFeature.class);
> > +    unsupported.removeAll(supportedFeatures);
> > +    return unsupported;
> >   }
> >
> > -  public static final long ONE_KB = 1024;
> > -  public static final long ONE_MB = ONE_KB * ONE_KB;
> > -  public static final long ONE_GB = ONE_KB * ONE_MB;
> > +  /** Return the set of supported JVM features that improve the
> > + estimation. */
> > +  public static EnumSet<JvmFeature> getSupportedFeatures() {
> > +    return EnumSet.copyOf(supportedFeatures);
> > +  }
> >
> >   /**
> >    * Returns <code>size</code> in human-readable units (GB, MB, KB or
> bytes).
> > @@ -466,4 +582,252 @@ public final class RamUsageEstimator {
> >       return bytes + " bytes";
> >     }
> >   }
> > +
> > +  /**
> > +   * Return a human-readable size of a given object.
> > +   * @see #sizeOf(Object)
> > +   * @see #humanReadableUnits(long)
> > +   */
> > +  public static String humanSizeOf(Object object) {
> > +    return humanReadableUnits(sizeOf(object));
> > +  }
> > +
> > +  /**
> > +   * An identity hash set implemented using open addressing. No null keys
> are allowed.
> > +   *
> > +   * TODO: If this is useful outside this class, make it public -
> > + needs some work
> > +   */
> > +  static final class IdentityHashSet<KType> implements
> > + Iterable<KType> {
> > +    /**
> > +     * Default load factor.
> > +     */
> > +    public final static float DEFAULT_LOAD_FACTOR = 0.75f;
> > +
> > +    /**
> > +     * Minimum capacity for the set.
> > +     */
> > +    public final static int MIN_CAPACITY = 4;
> > +
> > +    /**
> > +     * All of set entries. Always of power of two length.
> > +     */
> > +    public Object[] keys;
> > +
> > +    /**
> > +     * Cached number of assigned slots.
> > +     */
> > +    public int assigned;
> > +
> > +    /**
> > +     * The load factor for this set (fraction of allocated or deleted
> > + slots before
> > +     * the buffers must be rehashed or reallocated).
> > +     */
> > +    public final float loadFactor;
> > +
> > +    /**
> > +     * Cached capacity threshold at which we must resize the buffers.
> > +     */
> > +    private int resizeThreshold;
> > +
> > +    /**
> > +     * Creates a hash set with the default capacity of 16.
> > +     * load factor of {@value #DEFAULT_LOAD_FACTOR}. `
> > +     */
> > +    public IdentityHashSet() {
> > +      this(16, DEFAULT_LOAD_FACTOR);
> > +    }
> > +
> > +    /**
> > +     * Creates a hash set with the given capacity, load factor of
> > +     * {@value #DEFAULT_LOAD_FACTOR}.
> > +     */
> > +    public IdentityHashSet(int initialCapacity) {
> > +      this(initialCapacity, DEFAULT_LOAD_FACTOR);
> > +    }
> > +
> > +    /**
> > +     * Creates a hash set with the given capacity and load factor.
> > +     */
> > +    public IdentityHashSet(int initialCapacity, float loadFactor) {
> > +      initialCapacity = Math.max(MIN_CAPACITY, initialCapacity);
> > +
> > +      assert initialCapacity > 0 : "Initial capacity must be between (0, "
> > +          + Integer.MAX_VALUE + "].";
> > +      assert loadFactor > 0 && loadFactor < 1 : "Load factor must be
> > + between (0, 1).";
> > +      this.loadFactor = loadFactor;
> > +      allocateBuffers(roundCapacity(initialCapacity));
> > +    }
> > +
> > +    /**
> > +     * Adds a reference to the set. Null keys are not allowed.
> > +     */
> > +    public boolean add(KType e) {
> > +      assert e != null : "Null keys not allowed.";
> > +
> > +      if (assigned >= resizeThreshold) expandAndRehash();
> > +
> > +      final int mask = keys.length - 1;
> > +      int slot = rehash(e) & mask;
> > +      Object existing;
> > +      while ((existing = keys[slot]) != null) {
> > +        if (e == existing) {
> > +          return false; // already found.
> > +        }
> > +        slot = (slot + 1) & mask;
> > +      }
> > +      assigned++;
> > +      keys[slot] = e;
> > +      return true;
> > +    }
> > +
> > +    /**
> > +     * Checks if the set contains a given ref.
> > +     */
> > +    public boolean contains(KType e) {
> > +      final int mask = keys.length - 1;
> > +      int slot = rehash(e) & mask;
> > +      Object existing;
> > +      while ((existing = keys[slot]) != null) {
> > +        if (e == existing) {
> > +          return true;
> > +        }
> > +        slot = (slot + 1) & mask;
> > +      }
> > +      return false;
> > +    }
> > +
> > +    /** Rehash via MurmurHash.
> > +     *
> > +     * <p>The implementation is based on the
> > +     * finalization step from Austin Appleby's
> > +     * <code>MurmurHash3</code>.
> > +     *
> > +     * @see "http://sites.google.com/site/murmurhash/"
> > +     */
> > +    private static int rehash(Object o) {
> > +      int k = System.identityHashCode(o);
> > +      k ^= k >>> 16;
> > +      k *= 0x85ebca6b;
> > +      k ^= k >>> 13;
> > +      k *= 0xc2b2ae35;
> > +      k ^= k >>> 16;
> > +      return k;
> > +    }
> > +
> > +    /**
> > +     * Expand the internal storage buffers (capacity) or rehash
> > + current keys and
> > +     * values if there are a lot of deleted slots.
> > +     */
> > +    private void expandAndRehash() {
> > +      final Object[] oldKeys = this.keys;
> > +
> > +      assert assigned >= resizeThreshold;
> > +      allocateBuffers(nextCapacity(keys.length));
> > +
> > +      /*
> > +       * Rehash all assigned slots from the old hash table.
> > +       */
> > +      final int mask = keys.length - 1;
> > +      for (int i = 0; i < oldKeys.length; i++) {
> > +        final Object key = oldKeys[i];
> > +        if (key != null) {
> > +          int slot = rehash(key) & mask;
> > +          while (keys[slot] != null) {
> > +            slot = (slot + 1) & mask;
> > +          }
> > +          keys[slot] = key;
> > +        }
> > +      }
> > +      Arrays.fill(oldKeys, null);
> > +    }
> > +
> > +    /**
> > +     * Allocate internal buffers for a given capacity.
> > +     *
> > +     * @param capacity
> > +     *          New capacity (must be a power of two).
> > +     */
> > +    private void allocateBuffers(int capacity) {
> > +      this.keys = new Object[capacity];
> > +      this.resizeThreshold = (int) (capacity * DEFAULT_LOAD_FACTOR);
> > +    }
> > +
> > +    /**
> > +     * Return the next possible capacity, counting from the current buffers'
> size.
> > +     */
> > +    protected int nextCapacity(int current) {
> > +      assert current > 0 && Long.bitCount(current) == 1 : "Capacity
> > + must be a power of two.";
> > +      assert ((current << 1) > 0) : "Maximum capacity exceeded ("
> > +          + (0x80000000 >>> 1) + ").";
> > +
> > +      if (current < MIN_CAPACITY / 2) current = MIN_CAPACITY / 2;
> > +      return current << 1;
> > +    }
> > +
> > +    /**
> > +     * Round the capacity to the next allowed value.
> > +     */
> > +    protected int roundCapacity(int requestedCapacity) {
> > +      // Maximum positive integer that is a power of two.
> > +      if (requestedCapacity > (0x80000000 >>> 1)) return (0x80000000
> > + >>> 1);
> > +
> > +      int capacity = MIN_CAPACITY;
> > +      while (capacity < requestedCapacity) {
> > +        capacity <<= 1;
> > +      }
> > +
> > +      return capacity;
> > +    }
> > +
> > +    public void clear() {
> > +      assigned = 0;
> > +      Arrays.fill(keys, null);
> > +    }
> > +
> > +    public int size() {
> > +      return assigned;
> > +    }
> > +
> > +    public boolean isEmpty() {
> > +      return size() == 0;
> > +    }
> > +
> > +    @Override
> > +    public Iterator<KType> iterator() {
> > +      return new Iterator<KType>() {
> > +        int pos = -1;
> > +        Object nextElement = fetchNext();
> > +
> > +        @Override
> > +        public boolean hasNext() {
> > +          return nextElement != null;
> > +        }
> > +
> > +        @SuppressWarnings("unchecked")
> > +        @Override
> > +        public KType next() {
> > +          Object r = this.nextElement;
> > +          if (r == null) {
> > +            throw new NoSuchElementException();
> > +          }
> > +          this.nextElement = fetchNext();
> > +          return (KType) r;
> > +        }
> > +
> > +        private Object fetchNext() {
> > +          pos++;
> > +          while (pos < keys.length && keys[pos] == null) {
> > +            pos++;
> > +          }
> > +
> > +          return (pos >= keys.length ? null : keys[pos]);
> > +        }
> > +
> > +        @Override
> > +        public void remove() {
> > +          throw new UnsupportedOperationException();
> > +        }
> > +      };
> > +    }
> > +  }
> >  }
> >
> > Added:
> > lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/TestIdent
> > ityHashSet.java
> > URL:
> > http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/test/org
> > /apache/lucene/util/TestIdentityHashSet.java?rev=1304485&view=auto
> >
> ================================================================
> ======
> > ========
> > ---
> > lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/TestIdent
> > ityHashSet.java (added)
> > +++ lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/TestI
> > +++ dentityHashSet.java Fri Mar 23 17:05:58 2012
> > @@ -0,0 +1,39 @@
> > +package org.apache.lucene.util;
> > +
> > +import java.util.*;
> > +
> > +import org.junit.Assert;
> > +import org.junit.Test;
> > +
> > +public class TestIdentityHashSet extends LuceneTestCase {
> > +  @Test
> > +  public void testCheck() {
> > +    Random rnd = random;
> > +    Set<Object> jdk = Collections.newSetFromMap(
> > +        new IdentityHashMap<Object,Boolean>());
> > +    RamUsageEstimator.IdentityHashSet<Object> us = new
> > +RamUsageEstimator.IdentityHashSet<Object>();
> > +
> > +    int max = 100000;
> > +    int threshold = 256;
> > +    for (int i = 0; i < max; i++) {
> > +      // some of these will be interned and some will not so there will be
> collisions.
> > +      Integer v = rnd.nextInt(threshold);
> > +
> > +      boolean e1 = jdk.contains(v);
> > +      boolean e2 = us.contains(v);
> > +      Assert.assertEquals(e1, e2);
> > +
> > +      e1 = jdk.add(v);
> > +      e2 = us.add(v);
> > +      Assert.assertEquals(e1, e2);
> > +    }
> > +
> > +    Set<Object> collected = Collections.newSetFromMap(
> > +        new IdentityHashMap<Object,Boolean>());
> > +    for (Object o : us) {
> > +      collected.add(o);
> > +    }
> > +
> > +    Assert.assertEquals(collected, jdk);
> > +  }
> > +}
> >
> > Modified:
> > lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/TestRamUs
> > ageEstimator.java
> > URL:
> > http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/test/org
> >
> /apache/lucene/util/TestRamUsageEstimator.java?rev=1304485&r1=1304484&
> > r2=1304485&view=diff
> >
> ================================================================
> ======
> > ========
> > ---
> > lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/TestRamUs
> > ageEstimator.java (original)
> > +++ lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/TestR
> > +++ amUsageEstimator.java Fri Mar 23 17:05:58 2012
> > @@ -22,86 +22,86 @@ import java.util.Random;
> >  */
> >
> >  public class TestRamUsageEstimator extends LuceneTestCase {
> > +  public void testSanity() {
> > +    assertTrue(sizeOf(new String("test string")) >
> > + shallowSizeOfInstance(String.class));
> >
> > -  public void testBasic() {
> > -    assertTrue(sizeOf(new String("test strin")) >
> > shallowSizeOfInstance(String.class));
> > -
> >     Holder holder = new Holder();
> >     holder.holder = new Holder("string2", 5000L);
> >     assertTrue(sizeOf(holder) > shallowSizeOfInstance(Holder.class));
> >     assertTrue(sizeOf(holder) > sizeOf(holder.holder));
> >
> > -    assertTrue(shallowSizeOfInstance(HolderSubclass.class) >=
> > shallowSizeOfInstance(Holder.class));
> > -    assertEquals(shallowSizeOfInstance(Holder.class),
> > shallowSizeOfInstance(HolderSubclass2.class));
> > -
> > -    String[] strings = new String[]{new String("test strin"), new
> > String("hollow"), new String("catchmaster")};
> > +    assertTrue(
> > +        shallowSizeOfInstance(HolderSubclass.class) >=
> > + shallowSizeOfInstance(Holder.class));
> > +    assertTrue(
> > +        shallowSizeOfInstance(Holder.class)         ==
> > + shallowSizeOfInstance(HolderSubclass2.class));
> > +
> > +    String[] strings = new String[] {
> > +        new String("test string"),
> > +        new String("hollow"),
> > +        new String("catchmaster")
> > +    };
> >     assertTrue(sizeOf(strings) > shallowSizeOf(strings));
> >   }
> >
> >   public void testStaticOverloads() {
> >     Random rnd = random;
> > -
> >     {
> > -      byte[] array = new byte [rnd.nextInt(1024)];
> > +      byte[] array = new byte[rnd.nextInt(1024)];
> >       assertEquals(sizeOf(array), sizeOf((Object) array));
> >     }
> > -
> > +
> >     {
> > -      boolean[] array = new boolean [rnd.nextInt(1024)];
> > +      boolean[] array = new boolean[rnd.nextInt(1024)];
> >       assertEquals(sizeOf(array), sizeOf((Object) array));
> >     }
> > -
> > +
> >     {
> > -      char[] array = new char [rnd.nextInt(1024)];
> > +      char[] array = new char[rnd.nextInt(1024)];
> >       assertEquals(sizeOf(array), sizeOf((Object) array));
> >     }
> > -
> > +
> >     {
> > -      short[] array = new short [rnd.nextInt(1024)];
> > +      short[] array = new short[rnd.nextInt(1024)];
> >       assertEquals(sizeOf(array), sizeOf((Object) array));
> >     }
> > -
> > +
> >     {
> > -      int[] array = new int [rnd.nextInt(1024)];
> > +      int[] array = new int[rnd.nextInt(1024)];
> >       assertEquals(sizeOf(array), sizeOf((Object) array));
> >     }
> > -
> > +
> >     {
> > -      float[] array = new float [rnd.nextInt(1024)];
> > +      float[] array = new float[rnd.nextInt(1024)];
> >       assertEquals(sizeOf(array), sizeOf((Object) array));
> >     }
> > -
> > +
> >     {
> > -      long[] array = new long [rnd.nextInt(1024)];
> > +      long[] array = new long[rnd.nextInt(1024)];
> >       assertEquals(sizeOf(array), sizeOf((Object) array));
> >     }
> > -
> > +
> >     {
> > -      double[] array = new double [rnd.nextInt(1024)];
> > +      double[] array = new double[rnd.nextInt(1024)];
> >       assertEquals(sizeOf(array), sizeOf((Object) array));
> >     }
> >   }
> > -
> > +
> >   public void testReferenceSize() {
> >     if (!isSupportedJVM()) {
> > -      System.err.println("WARN: Your JVM does not support the
> > Oracle/Sun extensions (Hotspot diagnostics, sun.misc.Unsafe),");
> > -      System.err.println("so the memory estimates may be
> > inprecise.");
> > -      System.err.println("Please report this to the Lucene mailing
> > list, noting your JVM version: " +
> > -        Constants.JAVA_VENDOR + " " + Constants.JAVA_VERSION);
> > -    }
> > -    if (VERBOSE) {
> > -      System.out.println("This JVM is 64bit: " +
> > Constants.JRE_IS_64BIT);
> > -      System.out.println("Reference size in this JVM: " +
> > NUM_BYTES_OBJECT_REF);
> > -      System.out.println("Object header size in this JVM: " +
> > NUM_BYTES_OBJECT_HEADER);
> > -      System.out.println("Array header size in this JVM: " +
> > NUM_BYTES_ARRAY_HEADER);
> > -      System.out.println("Object alignment in this JVM: " +
> > NUM_BYTES_OBJECT_ALIGNMENT);
> > +      System.err.println("WARN: Your JVM does not support certain
> > + Oracle/Sun extensions.");
> > +      System.err.println("      Memory estimates may be
> > + inaccurate.");
> > +      System.err.println("      Please report this to the Lucene
> > + mailing list. JVM version: " + RamUsageEstimator.JVM_INFO_STRING);
> > +      for (JvmFeature f : RamUsageEstimator.getUnsupportedFeatures())
> > + {
> > +        System.err.println("      - " + f.toString());
> > +      }
> >     }
> > +
> >     assertTrue(NUM_BYTES_OBJECT_REF == 4 || NUM_BYTES_OBJECT_REF
> ==
> > 8);
> >     if (!Constants.JRE_IS_64BIT) {
> > -      assertEquals("For 32bit JVMs, reference size must always be 4",
> > 4, NUM_BYTES_OBJECT_REF);
> > +      assertEquals("For 32bit JVMs, reference size must always be
> > + 4?", 4, NUM_BYTES_OBJECT_REF);
> >     }
> >   }
> > -
> > +
> >   @SuppressWarnings("unused")
> >   private static class Holder {
> >     long field1 = 5000L;
> > @@ -109,8 +109,7 @@ public class TestRamUsageEstimator exten
> >     Holder holder;
> >     long field2, field3, field4;
> >
> > -    Holder() {
> > -    }
> > +    Holder() {}
> >
> >     Holder(String name, long field1) {
> >       this.name = name;
> > @@ -123,7 +122,7 @@ public class TestRamUsageEstimator exten
> >     byte foo;
> >     int bar;
> >   }
> > -
> > +
> >   private static class HolderSubclass2 extends Holder {
> >     // empty, only inherits all fields -> size should be identical to
> > superclass
> >   }
> >
> > Added:
> > lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/TestRamUs
> > ageEstimatorOnWildAnimals.java
> > URL:
> > http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/test/org
> >
> /apache/lucene/util/TestRamUsageEstimatorOnWildAnimals.java?rev=130448
> > 5&view=auto
> >
> ================================================================
> ======
> > ========
> > ---
> > lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/TestRamUs
> > ageEstimatorOnWildAnimals.java (added)
> > +++ lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/TestR
> > +++ amUsageEstimatorOnWildAnimals.java Fri Mar 23 17:05:58 2012
> > @@ -0,0 +1,37 @@
> > +package org.apache.lucene.util;
> > +
> > +import org.junit.Assert;
> > +
> > +/**
> > + * Check large and special graphs.
> > + */
> > +public class TestRamUsageEstimatorOnWildAnimals extends
> > +LuceneTestCase {
> > +  public static class ListElement {
> > +    ListElement next;
> > +  }
> > +
> > +  public void testOverflowMaxChainLength() {
> > +    int UPPERLIMIT = 100000;
> > +    int lower = 0;
> > +    int upper = UPPERLIMIT;
> > +
> > +    while (lower + 1 < upper) {
> > +      int mid = (lower + upper) / 2;
> > +      try {
> > +        ListElement first = new ListElement();
> > +        ListElement last = first;
> > +        for (int i = 0; i < mid; i++) {
> > +          last = (last.next = new ListElement());
> > +        }
> > +        RamUsageEstimator.sizeOf(first); // cause SOE or pass.
> > +        lower = mid;
> > +      } catch (StackOverflowError e) {
> > +        upper = mid;
> > +      }
> > +    }
> > +
> > +    if (lower + 1 < UPPERLIMIT) {
> > +      Assert.fail("Max object chain length till stack overflow: " +
> > +lower);
> > +    }
> > +  }
> > +}
> >
> >
> 
> 
> 
> --
> lucidimagination.com
> 
> ---------------------------------------------------------------------
> To unsubscribe, e-mail: dev-unsubscribe@lucene.apache.org For additional
> commands, e-mail: dev-help@lucene.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@lucene.apache.org
For additional commands, e-mail: dev-help@lucene.apache.org


Re: svn commit: r1304485 - in /lucene/dev/trunk/lucene/core/src: java/org/apache/lucene/util/ test/org/apache/lucene/util/

Posted by Robert Muir <rc...@gmail.com>.
Can we add a license to the TestIdentityHashSet and
TestRamUsageEstimatorOnWildAnimals? :)

On Fri, Mar 23, 2012 at 1:05 PM,  <us...@apache.org> wrote:
> Author: uschindler
> Date: Fri Mar 23 17:05:58 2012
> New Revision: 1304485
>
> URL: http://svn.apache.org/viewvc?rev=1304485&view=rev
> Log:
> LUCENE-3867: Fix incorrect short-circuit, improve memory usage.
>
> Added:
>    lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/TestIdentityHashSet.java   (with props)
>    lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/TestRamUsageEstimatorOnWildAnimals.java   (with props)
> Modified:
>    lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/Constants.java
>    lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/RamUsageEstimator.java
>    lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/TestRamUsageEstimator.java
>
> Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/Constants.java
> URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/Constants.java?rev=1304485&r1=1304484&r2=1304485&view=diff
> ==============================================================================
> --- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/Constants.java (original)
> +++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/Constants.java Fri Mar 23 17:05:58 2012
> @@ -27,6 +27,11 @@ import org.apache.lucene.LucenePackage;
>  public final class Constants {
>   private Constants() {}                         // can't construct
>
> +  /** JVM vendor info. */
> +  public static final String JVM_VENDOR = System.getProperty("java.vm.vendor");
> +  public static final String JVM_VERSION = System.getProperty("java.vm.version");
> +  public static final String JVM_NAME = System.getProperty("java.vm.name");
> +
>   /** The value of <tt>System.getProperty("java.version")<tt>. **/
>   public static final String JAVA_VERSION = System.getProperty("java.version");
>
>
> Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/RamUsageEstimator.java
> URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/RamUsageEstimator.java?rev=1304485&r1=1304484&r2=1304485&view=diff
> ==============================================================================
> --- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/RamUsageEstimator.java (original)
> +++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/RamUsageEstimator.java Fri Mar 23 17:05:58 2012
> @@ -24,14 +24,50 @@ import java.text.DecimalFormatSymbols;
>  import java.util.*;
>
>  /**
> - * Estimates the size of Java objects using a simple memory model
> - * for primitive size information.
> + * Estimates the size (memory representation) of Java objects.
> + *
> + * @see #sizeOf(Object)
> + * @see #shallowSizeOf(Object)
> + * @see #shallowSizeOfInstance(Class)
>  *
>  * @lucene.internal
>  */
>  public final class RamUsageEstimator {
> +  /**
> +   * JVM diagnostic features.
> +   */
> +  public static enum JvmFeature {
> +    OBJECT_REFERENCE_SIZE("Object reference size estimated using array index scale."),
> +    ARRAY_HEADER_SIZE("Array header size estimated using array based offset."),
> +    FIELD_OFFSETS("Shallow instance size based on field offsets."),
> +    OBJECT_ALIGNMENT("Object alignment retrieved from HotSpotDiagnostic MX bean.");
> +
> +    public final String description;
> +
> +    private JvmFeature(String description) {
> +      this.description = description;
> +    }
> +
> +    @Override
> +    public String toString() {
> +      return super.name() + " (" + description + ")";
> +    }
> +  }
> +
> +  /** JVM info string for debugging and reports. */
> +  public final static String JVM_INFO_STRING;
> +
> +  /** One kilobyte bytes. */
> +  public static final long ONE_KB = 1024;
>
> -  private RamUsageEstimator() {} // no instance
> +  /** One megabyte bytes. */
> +  public static final long ONE_MB = ONE_KB * ONE_KB;
> +
> +  /** One gigabyte bytes.*/
> +  public static final long ONE_GB = ONE_KB * ONE_MB;
> +
> +  /** No instantiation. */
> +  private RamUsageEstimator() {}
>
>   public final static int NUM_BYTES_BOOLEAN = 1;
>   public final static int NUM_BYTES_BYTE = 1;
> @@ -42,9 +78,19 @@ public final class RamUsageEstimator {
>   public final static int NUM_BYTES_LONG = 8;
>   public final static int NUM_BYTES_DOUBLE = 8;
>
> +  /**
> +   * Number of bytes this jvm uses to represent an object reference.
> +   */
>   public final static int NUM_BYTES_OBJECT_REF;
> -
> +
> +  /**
> +   * Number of bytes to represent an object header (no fields, no alignments).
> +   */
>   public final static int NUM_BYTES_OBJECT_HEADER;
> +
> +  /**
> +   * Number of bytes to represent an array header (no content, but with alignments).
> +   */
>   public final static int NUM_BYTES_ARRAY_HEADER;
>
>   /**
> @@ -69,9 +115,20 @@ public final class RamUsageEstimator {
>     primitiveSizes.put(long.class, Integer.valueOf(NUM_BYTES_LONG));
>   }
>
> +  /**
> +   * A handle to <code>sun.misc.Unsafe</code>.
> +   */
>   private final static Object theUnsafe;
> +
> +  /**
> +   * A handle to <code>sun.misc.Unsafe#fieldOffset(Field)</code>.
> +   */
>   private final static Method objectFieldOffsetMethod;
> -  private final static boolean useUnsafe, isSupportedJVM;
> +
> +  /**
> +   * All the supported "internal" JVM features detected at clinit.
> +   */
> +  private final static EnumSet<JvmFeature> supportedFeatures;
>
>   /**
>    * Initialize constants and try to collect information about the JVM internals.
> @@ -85,80 +142,71 @@ public final class RamUsageEstimator {
>     // so on 64 bit JVMs it'll be align(16 + 4, @8) = 24.
>     int arrayHeader = Constants.JRE_IS_64BIT ? 24 : 12;
>
> -    Object unsafe = null;
> -    Method objectFieldOffsetM = null;
> -    boolean supportedJvm = true;
> +    supportedFeatures = EnumSet.noneOf(JvmFeature.class);
> +
> +    Class<?> unsafeClass = null;
> +    Object tempTheUnsafe = null;
>     try {
> -      final Class<?> unsafeClass = Class.forName("sun.misc.Unsafe");
> +      unsafeClass = Class.forName("sun.misc.Unsafe");
>       final Field unsafeField = unsafeClass.getDeclaredField("theUnsafe");
>       unsafeField.setAccessible(true);
> -      unsafe = unsafeField.get(null);
> -
> -      // get object reference size by getting scale factor of Object[] arrays:
> -      try {
> -        final Method arrayIndexScaleM = unsafeClass.getMethod("arrayIndexScale", Class.class);
> -        referenceSize = ((Number) arrayIndexScaleM.invoke(unsafe, Object[].class)).intValue();
> -      } catch (Exception e) {
> -        // ignore
> -        supportedJvm = false;
> -      }
> -
> -      // updated best guess based on reference size:
> -      objectHeader = Constants.JRE_IS_64BIT ? (8 + referenceSize) : 8;
> -      arrayHeader = Constants.JRE_IS_64BIT ? (8 + 2 * referenceSize) : 12;
> -
> -      // get the object header size:
> -      // - first try out if the field offsets are not scaled (see warning in Unsafe docs)
> -      // - get the object header size by getting the field offset of the first field of a dummy object
> -      // If the scaling is byte-wise and unsafe is available, enable dynamic size measurement for
> -      // estimateRamUsage().
> -      try {
> -        objectFieldOffsetM = unsafeClass.getMethod("objectFieldOffset", Field.class);
> -        final Field dummy1Field = DummyTwoLongObject.class.getDeclaredField("dummy1");
> -        final int ofs1 = ((Number) objectFieldOffsetM.invoke(unsafe, dummy1Field)).intValue();
> -        final Field dummy2Field = DummyTwoLongObject.class.getDeclaredField("dummy2");
> -        final int ofs2 = ((Number) objectFieldOffsetM.invoke(unsafe, dummy2Field)).intValue();
> -        if (Math.abs(ofs2 - ofs1) == NUM_BYTES_LONG) {
> -          final Field baseField = DummyOneFieldObject.class.getDeclaredField("base");
> -          objectHeader = ((Number) objectFieldOffsetM.invoke(unsafe, baseField)).intValue();
> -        } else {
> -          // it is not safe to use Unsafe.objectFieldOffset(),
> -          // as it may be scaled (see "cookie" comment in Unsafe), better use defaults
> -          // and conventional size estimation:
> -          objectFieldOffsetM = null;
> -          supportedJvm = false;
> -        }
> -      } catch (Exception e) {
> -        // on exception ensure useUnsafe will be set to false later:
> -        objectFieldOffsetM = null;
> -        supportedJvm = false;
> -      }
> +      tempTheUnsafe = unsafeField.get(null);
> +    } catch (Exception e) {
> +      // Ignore.
> +    }
> +    theUnsafe = tempTheUnsafe;
>
> -      // Get the array header size by retrieving the array base offset
> -      // (offset of the first element of an array).
> -      try {
> -        final Method arrayBaseOffsetM = unsafeClass.getMethod("arrayBaseOffset", Class.class);
> -        // we calculate that only for byte[] arrays, it's actually the same for all types:
> -        arrayHeader = ((Number) arrayBaseOffsetM.invoke(unsafe, byte[].class)).intValue();
> -      } catch (Exception e) {
> -        // ignore
> -        supportedJvm = false;
> +    // get object reference size by getting scale factor of Object[] arrays:
> +    try {
> +      final Method arrayIndexScaleM = unsafeClass.getMethod("arrayIndexScale", Class.class);
> +      referenceSize = ((Number) arrayIndexScaleM.invoke(theUnsafe, Object[].class)).intValue();
> +      supportedFeatures.add(JvmFeature.OBJECT_REFERENCE_SIZE);
> +    } catch (Exception e) {
> +      // ignore.
> +    }
> +
> +    // "best guess" based on reference size. We will attempt to modify
> +    // these to exact values if there is supported infrastructure.
> +    objectHeader = Constants.JRE_IS_64BIT ? (8 + referenceSize) : 8;
> +    arrayHeader =  Constants.JRE_IS_64BIT ? (8 + 2 * referenceSize) : 12;
> +
> +    // get the object header size:
> +    // - first try out if the field offsets are not scaled (see warning in Unsafe docs)
> +    // - get the object header size by getting the field offset of the first field of a dummy object
> +    // If the scaling is byte-wise and unsafe is available, enable dynamic size measurement for
> +    // estimateRamUsage().
> +    Method tempObjectFieldOffsetMethod = null;
> +    try {
> +      final Method objectFieldOffsetM = unsafeClass.getMethod("objectFieldOffset", Field.class);
> +      final Field dummy1Field = DummyTwoLongObject.class.getDeclaredField("dummy1");
> +      final int ofs1 = ((Number) objectFieldOffsetM.invoke(theUnsafe, dummy1Field)).intValue();
> +      final Field dummy2Field = DummyTwoLongObject.class.getDeclaredField("dummy2");
> +      final int ofs2 = ((Number) objectFieldOffsetM.invoke(theUnsafe, dummy2Field)).intValue();
> +      if (Math.abs(ofs2 - ofs1) == NUM_BYTES_LONG) {
> +        final Field baseField = DummyOneFieldObject.class.getDeclaredField("base");
> +        objectHeader = ((Number) objectFieldOffsetM.invoke(theUnsafe, baseField)).intValue();
> +        supportedFeatures.add(JvmFeature.FIELD_OFFSETS);
> +        tempObjectFieldOffsetMethod = objectFieldOffsetM;
>       }
>     } catch (Exception e) {
> -      // ignore
> -      supportedJvm = false;
> +      // Ignore.
> +    }
> +    objectFieldOffsetMethod = tempObjectFieldOffsetMethod;
> +
> +    // Get the array header size by retrieving the array base offset
> +    // (offset of the first element of an array).
> +    try {
> +      final Method arrayBaseOffsetM = unsafeClass.getMethod("arrayBaseOffset", Class.class);
> +      // we calculate that only for byte[] arrays, it's actually the same for all types:
> +      arrayHeader = ((Number) arrayBaseOffsetM.invoke(theUnsafe, byte[].class)).intValue();
> +      supportedFeatures.add(JvmFeature.ARRAY_HEADER_SIZE);
> +    } catch (Exception e) {
> +      // Ignore.
>     }
>
>     NUM_BYTES_OBJECT_REF = referenceSize;
>     NUM_BYTES_OBJECT_HEADER = objectHeader;
>     NUM_BYTES_ARRAY_HEADER = arrayHeader;
> -    useUnsafe = (unsafe != null && objectFieldOffsetM != null);
> -    if (useUnsafe) {
> -      theUnsafe = unsafe;
> -      objectFieldOffsetMethod = objectFieldOffsetM;
> -    } else {
> -      theUnsafe = objectFieldOffsetMethod = null;
> -    }
>
>     // Try to get the object alignment (the default seems to be 8 on Hotspot,
>     // regardless of the architecture).
> @@ -176,18 +224,34 @@ public final class RamUsageEstimator {
>         objectAlignment = Integer.parseInt(
>             vmOption.getClass().getMethod("getValue").invoke(vmOption).toString()
>         );
> +        supportedFeatures.add(JvmFeature.OBJECT_ALIGNMENT);
>       } catch (InvocationTargetException ite) {
>         if (!(ite.getCause() instanceof IllegalArgumentException))
>           throw ite;
>         // ignore the error completely and use default of 8 (32 bit JVMs).
>       }
>     } catch (Exception e) {
> -      // ignore
> -      supportedJvm = false;
> +      // Ignore.
>     }
> +
>     NUM_BYTES_OBJECT_ALIGNMENT = objectAlignment;
>
> -    isSupportedJVM = supportedJvm;
> +    JVM_INFO_STRING = "[JVM: " +
> +        Constants.JVM_NAME + ", " + Constants.JVM_VERSION + ", " + Constants.JVM_VENDOR + ", " +
> +        Constants.JAVA_VENDOR + ", " + Constants.JAVA_VERSION + "]";
> +  }
> +
> +  /**
> +   * Cached information about a given class.
> +   */
> +  private static final class ClassCache {
> +    public final long alignedShallowInstanceSize;
> +    public final Field[] referenceFields;
> +
> +    public ClassCache(long alignedShallowInstanceSize, Field[] referenceFields) {
> +      this.alignedShallowInstanceSize = alignedShallowInstanceSize;
> +      this.referenceFields = referenceFields;
> +    }
>   }
>
>   // Object with just one field to determine the object header size by getting the offset of the dummy field:
> @@ -204,14 +268,14 @@ public final class RamUsageEstimator {
>   }
>
>   /**
> -   * Returns true, if the current JVM is supported by {@code RamUsageEstimator}.
> +   * Returns true, if the current JVM is fully supported by {@code RamUsageEstimator}.
>    * If this method returns {@code false} you are maybe using a 3rd party Java VM
>    * that is not supporting Oracle/Sun private APIs. The memory estimates can be
>    * imprecise then (no way of detecting compressed references, alignments, etc.).
>    * Lucene still tries to use sensible defaults.
>    */
>   public static boolean isSupportedJVM() {
> -    return isSupportedJVM;
> +    return supportedFeatures.size() == JvmFeature.values().length;
>   }
>
>   /**
> @@ -272,13 +336,7 @@ public final class RamUsageEstimator {
>    * should be GCed.</p>
>    */
>   public static long sizeOf(Object obj) {
> -    final Set<Object> seen = Collections.newSetFromMap(new IdentityHashMap<Object,Boolean>(64));
> -    try {
> -      return measureObjectSize(obj, seen);
> -    } finally {
> -      // Help the GC.
> -      seen.clear();
> -    }
> +    return measureObjectSize(obj);
>   }
>
>   /**
> @@ -292,7 +350,7 @@ public final class RamUsageEstimator {
>     if (obj == null) return 0;
>     final Class<?> clz = obj.getClass();
>     if (clz.isArray()) {
> -      return measureArraySize(obj, null);
> +      return shallowSizeOfArray(obj);
>     } else {
>       return shallowSizeOfInstance(clz);
>     }
> @@ -302,8 +360,8 @@ public final class RamUsageEstimator {
>    * Returns the shallow instance size in bytes an instance of the given class would occupy.
>    * This works with all conventional classes and primitive types, but not with arrays
>    * (the size then depends on the number of elements and varies from object to object).
> -   * Use the array-instance methods instead.
>    *
> +   * @see #shallowSizeOf(Object)
>    * @throws IllegalArgumentException if {@code clazz} is an array class.
>    */
>   public static long shallowSizeOfInstance(Class<?> clazz) {
> @@ -313,87 +371,163 @@ public final class RamUsageEstimator {
>       return primitiveSizes.get(clazz);
>
>     long size = NUM_BYTES_OBJECT_HEADER;
> -
> +
>     // Walk type hierarchy
> -    while (clazz != null) {
> +    for (;clazz != null; clazz = clazz.getSuperclass()) {
>       final Field[] fields = clazz.getDeclaredFields();
> -      boolean fieldFound = false;
> -      for (final Field f : fields) {
> -        if (Modifier.isStatic(f.getModifiers())) {
> -          continue;
> +      for (Field f : fields) {
> +        if (!Modifier.isStatic(f.getModifiers())) {
> +          size = adjustForField(size, f);
>         }
> -
> -        size = reflectFieldSize(size, f);
> -        fieldFound = true;
>       }
> -      if (useUnsafe && fieldFound) {
> -        // no need to recurse to superclasses, as all fields are
> -        // added at the end, so we won't find any larger offset
> -        break;
> -      }
> -      clazz = clazz.getSuperclass();
>     }
>     return alignObjectSize(size);
>   }
>
>   /**
> -   * Recursive descend into an object.
> +   * Return shallow size of any <code>array</code>.
>    */
> -  private static long measureObjectSize(Object obj, Set<Object> seen) {
> -    if (obj == null) {
> -      return 0;
> +  private static long shallowSizeOfArray(Object array) {
> +    long size = NUM_BYTES_ARRAY_HEADER;
> +    final int len = Array.getLength(array);
> +    if (len > 0) {
> +      Class<?> arrayElementClazz = array.getClass().getComponentType();
> +      if (arrayElementClazz.isPrimitive()) {
> +        size += (long) len * primitiveSizes.get(arrayElementClazz);
> +      } else {
> +        size += (long) NUM_BYTES_OBJECT_REF * len;
> +      }
>     }
> +    return alignObjectSize(size);
> +  }
>
> -    // skip if we have seen before
> -    if (seen.contains(obj)) {
> -      return 0;
> -    }
> +  /*
> +   * Non-recursive version of object descend. This consumes more memory than recursive in-depth
> +   * traversal but prevents stack overflows on long chains of objects
> +   * or complex graphs (a max. recursion depth on my machine was ~5000 objects linked in a chain
> +   * so not too much).
> +   */
> +  private static long measureObjectSize(Object root) {
> +    // Objects seen so far.
> +    final IdentityHashSet<Object> seen = new IdentityHashSet<Object>();
> +    // Class cache with reference Field and precalculated shallow size.
> +    final IdentityHashMap<Class<?>, ClassCache> classCache = new IdentityHashMap<Class<?>, ClassCache>();
> +    // Stack of objects pending traversal. Recursion caused stack overflows.
> +    final ArrayList<Object> stack = new ArrayList<Object>();
> +    stack.add(root);
> +
> +    long totalSize = 0;
> +    while (!stack.isEmpty()) {
> +      final Object ob = stack.remove(stack.size() - 1);
>
> -    // add to seen
> -    seen.add(obj);
> +      if (ob == null || seen.contains(ob)) {
> +        continue;
> +      }
> +      seen.add(ob);
>
> -    Class<?> clazz = obj.getClass();
> -    if (clazz.isArray()) {
> -      return measureArraySize(obj, seen);
> +      final Class<?> obClazz = ob.getClass();
> +      if (obClazz.isArray()) {
> +        /*
> +         * Consider an array, possibly of primitive types. Push any of its references to
> +         * the processing stack and accumulate this array's shallow size.
> +         */
> +        long size = NUM_BYTES_ARRAY_HEADER;
> +        final int len = Array.getLength(ob);
> +        if (len > 0) {
> +          Class<?> componentClazz = obClazz.getComponentType();
> +          if (componentClazz.isPrimitive()) {
> +            size += (long) len * primitiveSizes.get(componentClazz);
> +          } else {
> +            size += (long) NUM_BYTES_OBJECT_REF * len;
> +
> +            // Push refs for traversal later.
> +            for (int i = len; --i >= 0 ;) {
> +              final Object o = Array.get(ob, i);
> +              if (o != null && !seen.contains(o)) {
> +                stack.add(o);
> +              }
> +            }
> +          }
> +        }
> +        totalSize += alignObjectSize(size);
> +      } else {
> +        /*
> +         * Consider an object. Push any references it has to the processing stack
> +         * and accumulate this object's shallow size.
> +         */
> +        try {
> +          ClassCache cachedInfo = classCache.get(obClazz);
> +          if (cachedInfo == null) {
> +            classCache.put(obClazz, cachedInfo = createCacheEntry(obClazz));
> +          }
> +
> +          for (Field f : cachedInfo.referenceFields) {
> +            // Fast path to eliminate redundancies.
> +            final Object o = f.get(ob);
> +            if (o != null && !seen.contains(o)) {
> +              stack.add(o);
> +            }
> +          }
> +
> +          totalSize += cachedInfo.alignedShallowInstanceSize;
> +        } catch (IllegalAccessException e) {
> +          // this should never happen as we enabled setAccessible().
> +          throw new RuntimeException("Reflective field access failed?", e);
> +        }
> +      }
>     }
>
> -    long size = NUM_BYTES_OBJECT_HEADER;
> -    long innerSize = 0L;
> +    // Help the GC (?).
> +    seen.clear();
> +    stack.clear();
> +    classCache.clear();
>
> -    // walk type hierarchy
> -    while (clazz != null) {
> -      final Field[] fields = clazz.getDeclaredFields();
> +    return totalSize;
> +  }
> +
> +  /**
> +   * Create a cached information about shallow size and reference fields for
> +   * a given class.
> +   */
> +  private static ClassCache createCacheEntry(final Class<?> clazz) {
> +    ClassCache cachedInfo;
> +    long shallowInstanceSize = NUM_BYTES_OBJECT_HEADER;
> +    final ArrayList<Field> referenceFields = new ArrayList<Field>(32);
> +    for (Class<?> c = clazz; c != null; c = c.getSuperclass()) {
> +      final Field[] fields = c.getDeclaredFields();
>       for (final Field f : fields) {
> -        if (Modifier.isStatic(f.getModifiers())) {
> -          continue;
> -        }
> +        if (!Modifier.isStatic(f.getModifiers())) {
> +          shallowInstanceSize = adjustForField(shallowInstanceSize, f);
>
> -        size = reflectFieldSize(size, f);
> -
> -        if (!f.getType().isPrimitive()) {
> -          try {
> +          if (!f.getType().isPrimitive()) {
>             f.setAccessible(true);
> -            innerSize += measureObjectSize(f.get(obj), seen);
> -          } catch (IllegalAccessException ex) {
> -            // this should never happen as we enable setAccessible()!
> -            throw new RuntimeException("Cannot reflect instance field: " +
> -              f.getDeclaringClass().getName() + "#" + f.getName(), ex);
> +            referenceFields.add(f);
>           }
>         }
>       }
> -      clazz = clazz.getSuperclass();
>     }
> -    return alignObjectSize(size) + innerSize;
> +
> +    cachedInfo = new ClassCache(
> +        alignObjectSize(shallowInstanceSize),
> +        referenceFields.toArray(new Field[referenceFields.size()]));
> +    return cachedInfo;
>   }
> -
> -  private static long reflectFieldSize(long size, final Field f) {
> +
> +  /**
> +   * This method returns the maximum representation size of an object. <code>sizeSoFar</code>
> +   * is the object's size measured so far. <code>f</code> is the field being probed.
> +   *
> +   * <p>The returned offset will be the maximum of whatever was measured so far and
> +   * <code>f</code> field's offset and representation size (unaligned).
> +   */
> +  private static long adjustForField(long sizeSoFar, final Field f) {
>     final Class<?> type = f.getType();
>     final int fsize = type.isPrimitive() ? primitiveSizes.get(type) : NUM_BYTES_OBJECT_REF;
> -    if (useUnsafe) {
> +    if (objectFieldOffsetMethod != null) {
>       try {
>         final long offsetPlusSize =
>           ((Number) objectFieldOffsetMethod.invoke(theUnsafe, f)).longValue() + fsize;
> -        return Math.max(size, offsetPlusSize);
> +        return Math.max(sizeSoFar, offsetPlusSize);
>       } catch (IllegalAccessException ex) {
>         throw new RuntimeException("Access problem with sun.misc.Unsafe", ex);
>       } catch (InvocationTargetException ite) {
> @@ -409,40 +543,22 @@ public final class RamUsageEstimator {
>           f.getDeclaringClass().getName() + "#" + f.getName(), cause);
>       }
>     } else {
> -      return size + fsize;
> +      // TODO: No alignments based on field type/ subclass fields alignments?
> +      return sizeSoFar + fsize;
>     }
>   }
>
> -  /**
> -   * Return the deep size of an <code>array</code>, including
> -   * sub-objects if there are any.
> -   *
> -   * @param seen A set of already seen objects. If <code>null</code> no references
> -   *      are followed and this method returns shallow size.
> -   */
> -  private static long measureArraySize(Object array, Set<Object> seen) {
> -    long size = NUM_BYTES_ARRAY_HEADER;
> -    final int len = Array.getLength(array);
> -    if (len > 0) {
> -      Class<?> arrayElementClazz = array.getClass().getComponentType();
> -      if (arrayElementClazz.isPrimitive()) {
> -        size += (long) len * primitiveSizes.get(arrayElementClazz);
> -      } else {
> -        size += (long) NUM_BYTES_OBJECT_REF * len;
> -        if (seen != null) {
> -          for (int i = 0; i < len; i++) {
> -            size += measureObjectSize(Array.get(array, i), seen);
> -          }
> -        }
> -      }
> -    }
> -
> -    return alignObjectSize(size);
> +  /** Return the set of unsupported JVM features that improve the estimation. */
> +  public static EnumSet<JvmFeature> getUnsupportedFeatures() {
> +    EnumSet<JvmFeature> unsupported = EnumSet.allOf(JvmFeature.class);
> +    unsupported.removeAll(supportedFeatures);
> +    return unsupported;
>   }
>
> -  public static final long ONE_KB = 1024;
> -  public static final long ONE_MB = ONE_KB * ONE_KB;
> -  public static final long ONE_GB = ONE_KB * ONE_MB;
> +  /** Return the set of supported JVM features that improve the estimation. */
> +  public static EnumSet<JvmFeature> getSupportedFeatures() {
> +    return EnumSet.copyOf(supportedFeatures);
> +  }
>
>   /**
>    * Returns <code>size</code> in human-readable units (GB, MB, KB or bytes).
> @@ -466,4 +582,252 @@ public final class RamUsageEstimator {
>       return bytes + " bytes";
>     }
>   }
> +
> +  /**
> +   * Return a human-readable size of a given object.
> +   * @see #sizeOf(Object)
> +   * @see #humanReadableUnits(long)
> +   */
> +  public static String humanSizeOf(Object object) {
> +    return humanReadableUnits(sizeOf(object));
> +  }
> +
> +  /**
> +   * An identity hash set implemented using open addressing. No null keys are allowed.
> +   *
> +   * TODO: If this is useful outside this class, make it public - needs some work
> +   */
> +  static final class IdentityHashSet<KType> implements Iterable<KType> {
> +    /**
> +     * Default load factor.
> +     */
> +    public final static float DEFAULT_LOAD_FACTOR = 0.75f;
> +
> +    /**
> +     * Minimum capacity for the set.
> +     */
> +    public final static int MIN_CAPACITY = 4;
> +
> +    /**
> +     * All of set entries. Always of power of two length.
> +     */
> +    public Object[] keys;
> +
> +    /**
> +     * Cached number of assigned slots.
> +     */
> +    public int assigned;
> +
> +    /**
> +     * The load factor for this set (fraction of allocated or deleted slots before
> +     * the buffers must be rehashed or reallocated).
> +     */
> +    public final float loadFactor;
> +
> +    /**
> +     * Cached capacity threshold at which we must resize the buffers.
> +     */
> +    private int resizeThreshold;
> +
> +    /**
> +     * Creates a hash set with the default capacity of 16.
> +     * load factor of {@value #DEFAULT_LOAD_FACTOR}. `
> +     */
> +    public IdentityHashSet() {
> +      this(16, DEFAULT_LOAD_FACTOR);
> +    }
> +
> +    /**
> +     * Creates a hash set with the given capacity, load factor of
> +     * {@value #DEFAULT_LOAD_FACTOR}.
> +     */
> +    public IdentityHashSet(int initialCapacity) {
> +      this(initialCapacity, DEFAULT_LOAD_FACTOR);
> +    }
> +
> +    /**
> +     * Creates a hash set with the given capacity and load factor.
> +     */
> +    public IdentityHashSet(int initialCapacity, float loadFactor) {
> +      initialCapacity = Math.max(MIN_CAPACITY, initialCapacity);
> +
> +      assert initialCapacity > 0 : "Initial capacity must be between (0, "
> +          + Integer.MAX_VALUE + "].";
> +      assert loadFactor > 0 && loadFactor < 1 : "Load factor must be between (0, 1).";
> +      this.loadFactor = loadFactor;
> +      allocateBuffers(roundCapacity(initialCapacity));
> +    }
> +
> +    /**
> +     * Adds a reference to the set. Null keys are not allowed.
> +     */
> +    public boolean add(KType e) {
> +      assert e != null : "Null keys not allowed.";
> +
> +      if (assigned >= resizeThreshold) expandAndRehash();
> +
> +      final int mask = keys.length - 1;
> +      int slot = rehash(e) & mask;
> +      Object existing;
> +      while ((existing = keys[slot]) != null) {
> +        if (e == existing) {
> +          return false; // already found.
> +        }
> +        slot = (slot + 1) & mask;
> +      }
> +      assigned++;
> +      keys[slot] = e;
> +      return true;
> +    }
> +
> +    /**
> +     * Checks if the set contains a given ref.
> +     */
> +    public boolean contains(KType e) {
> +      final int mask = keys.length - 1;
> +      int slot = rehash(e) & mask;
> +      Object existing;
> +      while ((existing = keys[slot]) != null) {
> +        if (e == existing) {
> +          return true;
> +        }
> +        slot = (slot + 1) & mask;
> +      }
> +      return false;
> +    }
> +
> +    /** Rehash via MurmurHash.
> +     *
> +     * <p>The implementation is based on the
> +     * finalization step from Austin Appleby's
> +     * <code>MurmurHash3</code>.
> +     *
> +     * @see "http://sites.google.com/site/murmurhash/"
> +     */
> +    private static int rehash(Object o) {
> +      int k = System.identityHashCode(o);
> +      k ^= k >>> 16;
> +      k *= 0x85ebca6b;
> +      k ^= k >>> 13;
> +      k *= 0xc2b2ae35;
> +      k ^= k >>> 16;
> +      return k;
> +    }
> +
> +    /**
> +     * Expand the internal storage buffers (capacity) or rehash current keys and
> +     * values if there are a lot of deleted slots.
> +     */
> +    private void expandAndRehash() {
> +      final Object[] oldKeys = this.keys;
> +
> +      assert assigned >= resizeThreshold;
> +      allocateBuffers(nextCapacity(keys.length));
> +
> +      /*
> +       * Rehash all assigned slots from the old hash table.
> +       */
> +      final int mask = keys.length - 1;
> +      for (int i = 0; i < oldKeys.length; i++) {
> +        final Object key = oldKeys[i];
> +        if (key != null) {
> +          int slot = rehash(key) & mask;
> +          while (keys[slot] != null) {
> +            slot = (slot + 1) & mask;
> +          }
> +          keys[slot] = key;
> +        }
> +      }
> +      Arrays.fill(oldKeys, null);
> +    }
> +
> +    /**
> +     * Allocate internal buffers for a given capacity.
> +     *
> +     * @param capacity
> +     *          New capacity (must be a power of two).
> +     */
> +    private void allocateBuffers(int capacity) {
> +      this.keys = new Object[capacity];
> +      this.resizeThreshold = (int) (capacity * DEFAULT_LOAD_FACTOR);
> +    }
> +
> +    /**
> +     * Return the next possible capacity, counting from the current buffers' size.
> +     */
> +    protected int nextCapacity(int current) {
> +      assert current > 0 && Long.bitCount(current) == 1 : "Capacity must be a power of two.";
> +      assert ((current << 1) > 0) : "Maximum capacity exceeded ("
> +          + (0x80000000 >>> 1) + ").";
> +
> +      if (current < MIN_CAPACITY / 2) current = MIN_CAPACITY / 2;
> +      return current << 1;
> +    }
> +
> +    /**
> +     * Round the capacity to the next allowed value.
> +     */
> +    protected int roundCapacity(int requestedCapacity) {
> +      // Maximum positive integer that is a power of two.
> +      if (requestedCapacity > (0x80000000 >>> 1)) return (0x80000000 >>> 1);
> +
> +      int capacity = MIN_CAPACITY;
> +      while (capacity < requestedCapacity) {
> +        capacity <<= 1;
> +      }
> +
> +      return capacity;
> +    }
> +
> +    public void clear() {
> +      assigned = 0;
> +      Arrays.fill(keys, null);
> +    }
> +
> +    public int size() {
> +      return assigned;
> +    }
> +
> +    public boolean isEmpty() {
> +      return size() == 0;
> +    }
> +
> +    @Override
> +    public Iterator<KType> iterator() {
> +      return new Iterator<KType>() {
> +        int pos = -1;
> +        Object nextElement = fetchNext();
> +
> +        @Override
> +        public boolean hasNext() {
> +          return nextElement != null;
> +        }
> +
> +        @SuppressWarnings("unchecked")
> +        @Override
> +        public KType next() {
> +          Object r = this.nextElement;
> +          if (r == null) {
> +            throw new NoSuchElementException();
> +          }
> +          this.nextElement = fetchNext();
> +          return (KType) r;
> +        }
> +
> +        private Object fetchNext() {
> +          pos++;
> +          while (pos < keys.length && keys[pos] == null) {
> +            pos++;
> +          }
> +
> +          return (pos >= keys.length ? null : keys[pos]);
> +        }
> +
> +        @Override
> +        public void remove() {
> +          throw new UnsupportedOperationException();
> +        }
> +      };
> +    }
> +  }
>  }
>
> Added: lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/TestIdentityHashSet.java
> URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/TestIdentityHashSet.java?rev=1304485&view=auto
> ==============================================================================
> --- lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/TestIdentityHashSet.java (added)
> +++ lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/TestIdentityHashSet.java Fri Mar 23 17:05:58 2012
> @@ -0,0 +1,39 @@
> +package org.apache.lucene.util;
> +
> +import java.util.*;
> +
> +import org.junit.Assert;
> +import org.junit.Test;
> +
> +public class TestIdentityHashSet extends LuceneTestCase {
> +  @Test
> +  public void testCheck() {
> +    Random rnd = random;
> +    Set<Object> jdk = Collections.newSetFromMap(
> +        new IdentityHashMap<Object,Boolean>());
> +    RamUsageEstimator.IdentityHashSet<Object> us = new RamUsageEstimator.IdentityHashSet<Object>();
> +
> +    int max = 100000;
> +    int threshold = 256;
> +    for (int i = 0; i < max; i++) {
> +      // some of these will be interned and some will not so there will be collisions.
> +      Integer v = rnd.nextInt(threshold);
> +
> +      boolean e1 = jdk.contains(v);
> +      boolean e2 = us.contains(v);
> +      Assert.assertEquals(e1, e2);
> +
> +      e1 = jdk.add(v);
> +      e2 = us.add(v);
> +      Assert.assertEquals(e1, e2);
> +    }
> +
> +    Set<Object> collected = Collections.newSetFromMap(
> +        new IdentityHashMap<Object,Boolean>());
> +    for (Object o : us) {
> +      collected.add(o);
> +    }
> +
> +    Assert.assertEquals(collected, jdk);
> +  }
> +}
>
> Modified: lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/TestRamUsageEstimator.java
> URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/TestRamUsageEstimator.java?rev=1304485&r1=1304484&r2=1304485&view=diff
> ==============================================================================
> --- lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/TestRamUsageEstimator.java (original)
> +++ lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/TestRamUsageEstimator.java Fri Mar 23 17:05:58 2012
> @@ -22,86 +22,86 @@ import java.util.Random;
>  */
>
>  public class TestRamUsageEstimator extends LuceneTestCase {
> +  public void testSanity() {
> +    assertTrue(sizeOf(new String("test string")) > shallowSizeOfInstance(String.class));
>
> -  public void testBasic() {
> -    assertTrue(sizeOf(new String("test strin")) > shallowSizeOfInstance(String.class));
> -
>     Holder holder = new Holder();
>     holder.holder = new Holder("string2", 5000L);
>     assertTrue(sizeOf(holder) > shallowSizeOfInstance(Holder.class));
>     assertTrue(sizeOf(holder) > sizeOf(holder.holder));
>
> -    assertTrue(shallowSizeOfInstance(HolderSubclass.class) >= shallowSizeOfInstance(Holder.class));
> -    assertEquals(shallowSizeOfInstance(Holder.class), shallowSizeOfInstance(HolderSubclass2.class));
> -
> -    String[] strings = new String[]{new String("test strin"), new String("hollow"), new String("catchmaster")};
> +    assertTrue(
> +        shallowSizeOfInstance(HolderSubclass.class) >= shallowSizeOfInstance(Holder.class));
> +    assertTrue(
> +        shallowSizeOfInstance(Holder.class)         == shallowSizeOfInstance(HolderSubclass2.class));
> +
> +    String[] strings = new String[] {
> +        new String("test string"),
> +        new String("hollow"),
> +        new String("catchmaster")
> +    };
>     assertTrue(sizeOf(strings) > shallowSizeOf(strings));
>   }
>
>   public void testStaticOverloads() {
>     Random rnd = random;
> -
>     {
> -      byte[] array = new byte [rnd.nextInt(1024)];
> +      byte[] array = new byte[rnd.nextInt(1024)];
>       assertEquals(sizeOf(array), sizeOf((Object) array));
>     }
> -
> +
>     {
> -      boolean[] array = new boolean [rnd.nextInt(1024)];
> +      boolean[] array = new boolean[rnd.nextInt(1024)];
>       assertEquals(sizeOf(array), sizeOf((Object) array));
>     }
> -
> +
>     {
> -      char[] array = new char [rnd.nextInt(1024)];
> +      char[] array = new char[rnd.nextInt(1024)];
>       assertEquals(sizeOf(array), sizeOf((Object) array));
>     }
> -
> +
>     {
> -      short[] array = new short [rnd.nextInt(1024)];
> +      short[] array = new short[rnd.nextInt(1024)];
>       assertEquals(sizeOf(array), sizeOf((Object) array));
>     }
> -
> +
>     {
> -      int[] array = new int [rnd.nextInt(1024)];
> +      int[] array = new int[rnd.nextInt(1024)];
>       assertEquals(sizeOf(array), sizeOf((Object) array));
>     }
> -
> +
>     {
> -      float[] array = new float [rnd.nextInt(1024)];
> +      float[] array = new float[rnd.nextInt(1024)];
>       assertEquals(sizeOf(array), sizeOf((Object) array));
>     }
> -
> +
>     {
> -      long[] array = new long [rnd.nextInt(1024)];
> +      long[] array = new long[rnd.nextInt(1024)];
>       assertEquals(sizeOf(array), sizeOf((Object) array));
>     }
> -
> +
>     {
> -      double[] array = new double [rnd.nextInt(1024)];
> +      double[] array = new double[rnd.nextInt(1024)];
>       assertEquals(sizeOf(array), sizeOf((Object) array));
>     }
>   }
> -
> +
>   public void testReferenceSize() {
>     if (!isSupportedJVM()) {
> -      System.err.println("WARN: Your JVM does not support the Oracle/Sun extensions (Hotspot diagnostics, sun.misc.Unsafe),");
> -      System.err.println("so the memory estimates may be inprecise.");
> -      System.err.println("Please report this to the Lucene mailing list, noting your JVM version: " +
> -        Constants.JAVA_VENDOR + " " + Constants.JAVA_VERSION);
> -    }
> -    if (VERBOSE) {
> -      System.out.println("This JVM is 64bit: " + Constants.JRE_IS_64BIT);
> -      System.out.println("Reference size in this JVM: " + NUM_BYTES_OBJECT_REF);
> -      System.out.println("Object header size in this JVM: " + NUM_BYTES_OBJECT_HEADER);
> -      System.out.println("Array header size in this JVM: " + NUM_BYTES_ARRAY_HEADER);
> -      System.out.println("Object alignment in this JVM: " + NUM_BYTES_OBJECT_ALIGNMENT);
> +      System.err.println("WARN: Your JVM does not support certain Oracle/Sun extensions.");
> +      System.err.println("      Memory estimates may be inaccurate.");
> +      System.err.println("      Please report this to the Lucene mailing list. JVM version: " + RamUsageEstimator.JVM_INFO_STRING);
> +      for (JvmFeature f : RamUsageEstimator.getUnsupportedFeatures()) {
> +        System.err.println("      - " + f.toString());
> +      }
>     }
> +
>     assertTrue(NUM_BYTES_OBJECT_REF == 4 || NUM_BYTES_OBJECT_REF == 8);
>     if (!Constants.JRE_IS_64BIT) {
> -      assertEquals("For 32bit JVMs, reference size must always be 4", 4, NUM_BYTES_OBJECT_REF);
> +      assertEquals("For 32bit JVMs, reference size must always be 4?", 4, NUM_BYTES_OBJECT_REF);
>     }
>   }
> -
> +
>   @SuppressWarnings("unused")
>   private static class Holder {
>     long field1 = 5000L;
> @@ -109,8 +109,7 @@ public class TestRamUsageEstimator exten
>     Holder holder;
>     long field2, field3, field4;
>
> -    Holder() {
> -    }
> +    Holder() {}
>
>     Holder(String name, long field1) {
>       this.name = name;
> @@ -123,7 +122,7 @@ public class TestRamUsageEstimator exten
>     byte foo;
>     int bar;
>   }
> -
> +
>   private static class HolderSubclass2 extends Holder {
>     // empty, only inherits all fields -> size should be identical to superclass
>   }
>
> Added: lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/TestRamUsageEstimatorOnWildAnimals.java
> URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/TestRamUsageEstimatorOnWildAnimals.java?rev=1304485&view=auto
> ==============================================================================
> --- lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/TestRamUsageEstimatorOnWildAnimals.java (added)
> +++ lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/util/TestRamUsageEstimatorOnWildAnimals.java Fri Mar 23 17:05:58 2012
> @@ -0,0 +1,37 @@
> +package org.apache.lucene.util;
> +
> +import org.junit.Assert;
> +
> +/**
> + * Check large and special graphs.
> + */
> +public class TestRamUsageEstimatorOnWildAnimals extends LuceneTestCase {
> +  public static class ListElement {
> +    ListElement next;
> +  }
> +
> +  public void testOverflowMaxChainLength() {
> +    int UPPERLIMIT = 100000;
> +    int lower = 0;
> +    int upper = UPPERLIMIT;
> +
> +    while (lower + 1 < upper) {
> +      int mid = (lower + upper) / 2;
> +      try {
> +        ListElement first = new ListElement();
> +        ListElement last = first;
> +        for (int i = 0; i < mid; i++) {
> +          last = (last.next = new ListElement());
> +        }
> +        RamUsageEstimator.sizeOf(first); // cause SOE or pass.
> +        lower = mid;
> +      } catch (StackOverflowError e) {
> +        upper = mid;
> +      }
> +    }
> +
> +    if (lower + 1 < UPPERLIMIT) {
> +      Assert.fail("Max object chain length till stack overflow: " + lower);
> +    }
> +  }
> +}
>
>



-- 
lucidimagination.com

---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@lucene.apache.org
For additional commands, e-mail: dev-help@lucene.apache.org