You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by jp...@apache.org on 2014/06/19 02:05:33 UTC
svn commit: r1603682 [2/2] - in /lucene/dev/branches/lucene_solr_4_9: ./ lucene/ lucene/codecs/ lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/ lucene/codecs/src/java/org/apache/lucene/codecs/bloom/ lucene/codecs/src/java/org/apache/lucene/...

Modified: lucene/dev/branches/lucene_solr_4_9/lucene/test-framework/src/java/org/apache/lucene/index/BaseIndexFileFormatTestCase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene_solr_4_9/lucene/test-framework/src/java/org/apache/lucene/index/BaseIndexFileFormatTestCase.java?rev=1603682&r1=1603681&r2=1603682&view=diff
==============================================================================
--- lucene/dev/branches/lucene_solr_4_9/lucene/test-framework/src/java/org/apache/lucene/index/BaseIndexFileFormatTestCase.java (original)
+++ lucene/dev/branches/lucene_solr_4_9/lucene/test-framework/src/java/org/apache/lucene/index/BaseIndexFileFormatTestCase.java Thu Jun 19 00:05:32 2014
@@ -18,24 +18,111 @@ package org.apache.lucene.index;
  */
 
 import java.io.IOException;
+import java.lang.reflect.Field;
 import java.util.Arrays;
 import java.util.Collection;
 import java.util.Collections;
 import java.util.HashMap;
 import java.util.HashSet;
+import java.util.IdentityHashMap;
+import java.util.List;
 import java.util.Map;
+import java.util.Set;
 
 import org.apache.lucene.analysis.MockAnalyzer;
 import org.apache.lucene.codecs.Codec;
+import org.apache.lucene.codecs.mockrandom.MockRandomPostingsFormat;
+import org.apache.lucene.codecs.sep.IntIndexInput;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.util.CloseableThreadLocal;
+import org.apache.lucene.util.DoubleBarrelLRUCache;
+import org.apache.lucene.util.InfoStream;
 import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util.RamUsageEstimator;
+import org.apache.lucene.util.RamUsageTester;
 
 /**
  * Common tests to all index formats.
  */
 abstract class BaseIndexFileFormatTestCase extends LuceneTestCase {
 
+  // metadata or Directory-level objects
+  private static final Set<Class<?>> EXCLUDED_CLASSES = Collections.newSetFromMap(new IdentityHashMap<Class<?>,Boolean>());
+
+  static {
+    // Directory objects, don't take into account eg. the NIO buffers
+    EXCLUDED_CLASSES.add(Directory.class);
+    EXCLUDED_CLASSES.add(IndexInput.class);
+    EXCLUDED_CLASSES.add(IntIndexInput.class);
+
+    // used for thread management, not by the index
+    EXCLUDED_CLASSES.add(CloseableThreadLocal.class);
+    EXCLUDED_CLASSES.add(ThreadLocal.class);
+
+    // don't follow references to the top-level reader
+    EXCLUDED_CLASSES.add(IndexReader.class);
+    EXCLUDED_CLASSES.add(IndexReaderContext.class);
+
+    // usually small but can bump memory usage for
+    // memory-efficient things like stored fields
+    EXCLUDED_CLASSES.add(FieldInfos.class);
+    EXCLUDED_CLASSES.add(SegmentInfo.class);
+    EXCLUDED_CLASSES.add(SegmentCommitInfo.class);
+    EXCLUDED_CLASSES.add(FieldInfo.class);
+
+    // used by lucene3x to maintain a cache. Doesn't depend on the number of docs
+    EXCLUDED_CLASSES.add(DoubleBarrelLRUCache.class);
+
+    // constant overhead is typically due to strings
+    // TODO: can we remove this and still pass the test consistently
+    EXCLUDED_CLASSES.add(String.class);
+  }
+
+  static class Accumulator extends RamUsageTester.Accumulator {
+
+    private final Object root;
+
+    Accumulator(Object root) {
+      this.root = root;
+    }
+
+    public long accumulateObject(Object o, long shallowSize, java.util.Map<Field, Object> fieldValues, java.util.Collection<Object> queue) {
+      for (Class<?> clazz = o.getClass(); clazz != null; clazz = clazz.getSuperclass()) {
+        if (EXCLUDED_CLASSES.contains(clazz) && o != root) {
+          return 0;
+        }
+      }
+      // we have no way to estimate the size of these things in codecs although
+      // something like a Collections.newSetFromMap(new HashMap<>()) uses quite
+      // some memory... So for now the test ignores the overhead of such
+      // collections but can we do better?
+      if (o instanceof Collection) {
+        Collection<?> coll = (Collection<?>) o;
+        queue.addAll((Collection<?>) o);
+        return (long) coll.size() * RamUsageEstimator.NUM_BYTES_OBJECT_REF;
+      } else if (o instanceof Map) {
+        final Map<?, ?> map = (Map<?,?>) o;
+        queue.addAll(map.keySet());
+        queue.addAll(map.values());
+        return 2L * map.size() * RamUsageEstimator.NUM_BYTES_OBJECT_REF;
+      }
+      long v = super.accumulateObject(o, shallowSize, fieldValues, queue);
+      // System.out.println(o.getClass() + "=" + v);
+      return v;
+    }
+
+    @Override
+    public long accumulateArray(Object array, long shallowSize,
+        List<Object> values, Collection<Object> queue) {
+      long v = super.accumulateArray(array, shallowSize, values, queue);
+      // System.out.println(array.getClass() + "=" + v);
+      return v;
+    }
+
+  };
+
   /** Returns the codec to run tests against */
   protected abstract Codec getCodec();
 
@@ -73,11 +160,11 @@ abstract class BaseIndexFileFormatTestCa
    * comparing indices that store the same content.
    */
   protected Collection<String> excludedExtensionsFromByteCounts() {
-    return new HashSet<String>(Arrays.asList(new String[] { 
+    return new HashSet<String>(Arrays.asList(new String[] {
     // segment infos store various pieces of information that don't solely depend
     // on the content of the index in the diagnostics (such as a timestamp) so we
     // exclude this file from the bytes counts
-                        "si", 
+                        "si",
     // lock files are 0 bytes (one directory in the test could be RAMDir, the other FSDir)
                         "lock" }));
   }
@@ -118,4 +205,51 @@ abstract class BaseIndexFileFormatTestCa
     dir2.close();
   }
 
+  /** Test the accuracy of the ramBytesUsed estimations. */
+  public void testRamBytesUsed() throws IOException {
+    if (Codec.getDefault() instanceof RandomCodec) {
+      // this test relies on the fact that two segments will be written with
+      // the same codec so we need to disable MockRandomPF
+      final Set<String> avoidCodecs = new HashSet<>(((RandomCodec) Codec.getDefault()).avoidCodecs);
+      avoidCodecs.add(new MockRandomPostingsFormat().getName());
+      Codec.setDefault(new RandomCodec(random(), avoidCodecs));
+    }
+    Directory dir = newDirectory();
+    IndexWriterConfig cfg = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
+    IndexWriter w = new IndexWriter(dir, cfg);
+    // we need to index enough documents so that constant overhead doesn't dominate
+    final int numDocs = atLeast(10000);
+    AtomicReader reader1 = null;
+    for (int i = 0; i < numDocs; ++i) {
+      Document d = new Document();
+      addRandomFields(d);
+      w.addDocument(d);
+      if (i == 100) {
+        w.forceMerge(1);
+        w.commit();
+        reader1 = getOnlySegmentReader(DirectoryReader.open(dir));
+      }
+    }
+    w.forceMerge(1);
+    w.commit();
+    w.close();
+
+    AtomicReader reader2 = getOnlySegmentReader(DirectoryReader.open(dir));
+
+    for (AtomicReader reader : Arrays.asList(reader1, reader2)) {
+      new SimpleMergedSegmentWarmer(InfoStream.NO_OUTPUT).warm(reader);
+    }
+
+    final long actualBytes = RamUsageTester.sizeOf(reader2, new Accumulator(reader2)) - RamUsageTester.sizeOf(reader1, new Accumulator(reader1));
+    final long expectedBytes = ((SegmentReader) reader2).ramBytesUsed() - ((SegmentReader) reader1).ramBytesUsed();
+    final long absoluteError = actualBytes - expectedBytes;
+    final double relativeError = (double) absoluteError / actualBytes;
+    final String message = "Actual RAM usage " + actualBytes + ", but got " + expectedBytes + ", " + 100*relativeError + "% error";
+    assertTrue(message, Math.abs(relativeError) < 0.20d || Math.abs(absoluteError) < 1000);
+
+    reader1.close();
+    reader2.close();
+    dir.close();
+  }
+
 }

Modified: lucene/dev/branches/lucene_solr_4_9/lucene/test-framework/src/java/org/apache/lucene/index/BaseNormsFormatTestCase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene_solr_4_9/lucene/test-framework/src/java/org/apache/lucene/index/BaseNormsFormatTestCase.java?rev=1603682&r1=1603681&r2=1603682&view=diff
==============================================================================
--- lucene/dev/branches/lucene_solr_4_9/lucene/test-framework/src/java/org/apache/lucene/index/BaseNormsFormatTestCase.java (original)
+++ lucene/dev/branches/lucene_solr_4_9/lucene/test-framework/src/java/org/apache/lucene/index/BaseNormsFormatTestCase.java Thu Jun 19 00:05:32 2014
@@ -238,7 +238,8 @@ public abstract class BaseNormsFormatTes
   @Override
   protected void addRandomFields(Document doc) {
     // TODO: improve
-    doc.add(new TextField("foobar", "boo", Field.Store.NO));
+    doc.add(new TextField("foobar", TestUtil.randomSimpleString(random()), Field.Store.NO));
+    
   }
 
   @Override

Modified: lucene/dev/branches/lucene_solr_4_9/lucene/test-framework/src/java/org/apache/lucene/index/RandomCodec.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene_solr_4_9/lucene/test-framework/src/java/org/apache/lucene/index/RandomCodec.java?rev=1603682&r1=1603681&r2=1603682&view=diff
==============================================================================
--- lucene/dev/branches/lucene_solr_4_9/lucene/test-framework/src/java/org/apache/lucene/index/RandomCodec.java (original)
+++ lucene/dev/branches/lucene_solr_4_9/lucene/test-framework/src/java/org/apache/lucene/index/RandomCodec.java Thu Jun 19 00:05:32 2014
@@ -76,6 +76,8 @@ public class RandomCodec extends Lucene4
   /** unique set of docvalues format names this codec knows about */
   public Set<String> dvFormatNames = new HashSet<>();
 
+  public final Set<String> avoidCodecs;
+
   /** memorized field->postingsformat mappings */
   // note: we have to sync this map even though its just for debugging/toString, 
   // otherwise DWPT's .toString() calls that iterate over the map can 
@@ -118,6 +120,7 @@ public class RandomCodec extends Lucene4
 
   public RandomCodec(Random random, Set<String> avoidCodecs) {
     this.perFieldSeed = random.nextInt();
+    this.avoidCodecs = avoidCodecs;
     // TODO: make it possible to specify min/max iterms per
     // block via CL:
     int minItemsPerBlock = TestUtil.nextInt(random, 2, 100);

Modified: lucene/dev/branches/lucene_solr_4_9/lucene/test-framework/src/java/org/apache/lucene/util/RamUsageTester.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene_solr_4_9/lucene/test-framework/src/java/org/apache/lucene/util/RamUsageTester.java?rev=1603682&r1=1603681&r2=1603682&view=diff
==============================================================================
--- lucene/dev/branches/lucene_solr_4_9/lucene/test-framework/src/java/org/apache/lucene/util/RamUsageTester.java (original)
+++ lucene/dev/branches/lucene_solr_4_9/lucene/test-framework/src/java/org/apache/lucene/util/RamUsageTester.java Thu Jun 19 00:05:32 2014
@@ -20,39 +20,39 @@ package org.apache.lucene.util;
 import java.lang.reflect.Array;
 import java.lang.reflect.Field;
 import java.lang.reflect.Modifier;
+import java.util.AbstractList;
 import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashMap;
 import java.util.IdentityHashMap;
 import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
 import java.util.NoSuchElementException;
 
 /** Crawls object graph to collect RAM usage for testing */
 public final class RamUsageTester {
-  
-  /**
-   * A {@link Filter} that accepts all fields.
-   */
-  private static final Filter DEFAULT_FILTER = new Filter() {
 
-    @Override
-    public boolean accept(Field field) {
-      return true;
-    }
+  /** An accumulator of object references. This class allows for customizing RAM usage estimation. */
+  public static class Accumulator {
 
-    public boolean accept(Object o) {
-      return true;
+    /** Accumulate transitive references for the provided fields of the given
+     *  object into <code>queue</code> and return the shallow size of this object. */
+    public long accumulateObject(Object o, long shallowSize, Map<Field, Object> fieldValues, Collection<Object> queue) {
+      for (Object value : fieldValues.values()) {
+        queue.add(value);
+      }
+      return shallowSize;
     }
 
-  };
-
-  /** A filter that allows to decide on what to take into account when measuring RAM usage. */
-  public static interface Filter {
-
-    /** Whether the provided field should be taken into account when measuring RAM usage. */
-    boolean accept(Field field);
-
-    /** Whether the provided field value should be taken into account when measuring RAM usage. */
-    boolean accept(Object o);
+    /** Accumulate transitive references for the provided values of the given
+     *  array into <code>queue</code> and return the shallow size of this array. */
+    public long accumulateArray(Object array, long shallowSize, List<Object> values, Collection<Object> queue) {
+      queue.addAll(values);
+      return shallowSize;
+    }
 
   }
 
@@ -65,13 +65,13 @@ public final class RamUsageTester {
    * (it isn't side-effect free). After the method exits, this memory
    * should be GCed.</p>
    */
-  public static long sizeOf(Object obj, Filter filter) {
-    return measureObjectSize(obj, filter);
+  public static long sizeOf(Object obj, Accumulator accumulator) {
+    return measureObjectSize(obj, accumulator);
   }
 
   /** Same as calling <code>sizeOf(obj, DEFAULT_FILTER)</code>. */
   public static long sizeOf(Object obj) {
-    return sizeOf(obj, DEFAULT_FILTER);
+    return sizeOf(obj, new Accumulator());
   }
 
   /**
@@ -89,7 +89,7 @@ public final class RamUsageTester {
    * or complex graphs (a max. recursion depth on my machine was ~5000 objects linked in a chain
    * so not too much).
    */
-  private static long measureObjectSize(Object root, Filter filter) {
+  private static long measureObjectSize(Object root, Accumulator accumulator) {
     // Objects seen so far.
     final IdentityHashSet<Object> seen = new IdentityHashSet<>();
     // Class cache with reference Field and precalculated shallow size. 
@@ -114,25 +114,28 @@ public final class RamUsageTester {
          * Consider an array, possibly of primitive types. Push any of its references to
          * the processing stack and accumulate this array's shallow size. 
          */
-        long size = RamUsageEstimator.NUM_BYTES_ARRAY_HEADER;
+        final long shallowSize = RamUsageEstimator.shallowSizeOf(ob);
         final int len = Array.getLength(ob);
-        if (len > 0) {
-          Class<?> componentClazz = obClazz.getComponentType();
-          if (componentClazz.isPrimitive()) {
-            size += (long) len * RamUsageEstimator.shallowSizeOfInstance(componentClazz);
-          } else {
-            size += (long) RamUsageEstimator.NUM_BYTES_OBJECT_REF * len;
-
-            // Push refs for traversal later.
-            for (int i = len; --i >= 0 ;) {
-              final Object o = Array.get(ob, i);
-              if (o != null && !seen.contains(o) && filter.accept(o)) {
-                stack.add(o);
+        final List<Object> values;
+        Class<?> componentClazz = obClazz.getComponentType();
+        if (componentClazz.isPrimitive()) {
+          values = Collections.emptyList();
+        } else {
+          values = new AbstractList<Object>() {
+
+            @Override
+            public Object get(int index) {
+              return Array.get(ob, index);
+            }
+
+            @Override
+            public int size() {
+              return len;
               }
-            }            
+              
+            };         
           }
-        }
-        totalSize += RamUsageEstimator.alignObjectSize(size);
+        totalSize += accumulator.accumulateArray(ob, shallowSize, values, stack);
       } else {
         /*
          * Consider an object. Push any references it has to the processing stack
@@ -144,17 +147,12 @@ public final class RamUsageTester {
             classCache.put(obClazz, cachedInfo = createCacheEntry(obClazz));
           }
 
+          Map<Field, Object> fieldValues = new HashMap<>();
           for (Field f : cachedInfo.referenceFields) {
-            if (filter.accept(f)) {
-              // Fast path to eliminate redundancies.
-              final Object o = f.get(ob);
-              if (o != null && !seen.contains(o) && filter.accept(o)) {
-                stack.add(o);
-              }
-            }
+            fieldValues.put(f, f.get(ob));
           }
 
-          totalSize += cachedInfo.alignedShallowInstanceSize;
+          totalSize += accumulator.accumulateObject(ob, cachedInfo.alignedShallowInstanceSize, fieldValues, stack);
         } catch (IllegalAccessException e) {
           // this should never happen as we enabled setAccessible().
           throw new RuntimeException("Reflective field access failed?", e);