You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by jp...@apache.org on 2014/06/19 02:05:33 UTC
svn commit: r1603682 [2/2] - in /lucene/dev/branches/lucene_solr_4_9: ./
lucene/ lucene/codecs/
lucene/codecs/src/java/org/apache/lucene/codecs/blockterms/
lucene/codecs/src/java/org/apache/lucene/codecs/bloom/
lucene/codecs/src/java/org/apache/lucene/...
Modified: lucene/dev/branches/lucene_solr_4_9/lucene/test-framework/src/java/org/apache/lucene/index/BaseIndexFileFormatTestCase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene_solr_4_9/lucene/test-framework/src/java/org/apache/lucene/index/BaseIndexFileFormatTestCase.java?rev=1603682&r1=1603681&r2=1603682&view=diff
==============================================================================
--- lucene/dev/branches/lucene_solr_4_9/lucene/test-framework/src/java/org/apache/lucene/index/BaseIndexFileFormatTestCase.java (original)
+++ lucene/dev/branches/lucene_solr_4_9/lucene/test-framework/src/java/org/apache/lucene/index/BaseIndexFileFormatTestCase.java Thu Jun 19 00:05:32 2014
@@ -18,24 +18,111 @@ package org.apache.lucene.index;
*/
import java.io.IOException;
+import java.lang.reflect.Field;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
+import java.util.IdentityHashMap;
+import java.util.List;
import java.util.Map;
+import java.util.Set;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.codecs.Codec;
+import org.apache.lucene.codecs.mockrandom.MockRandomPostingsFormat;
+import org.apache.lucene.codecs.sep.IntIndexInput;
import org.apache.lucene.document.Document;
import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.util.CloseableThreadLocal;
+import org.apache.lucene.util.DoubleBarrelLRUCache;
+import org.apache.lucene.util.InfoStream;
import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util.RamUsageEstimator;
+import org.apache.lucene.util.RamUsageTester;
/**
* Common tests to all index formats.
*/
abstract class BaseIndexFileFormatTestCase extends LuceneTestCase {
+ // metadata or Directory-level objects
+ private static final Set<Class<?>> EXCLUDED_CLASSES = Collections.newSetFromMap(new IdentityHashMap<Class<?>,Boolean>());
+
+ static {
+ // Directory objects, don't take into account eg. the NIO buffers
+ EXCLUDED_CLASSES.add(Directory.class);
+ EXCLUDED_CLASSES.add(IndexInput.class);
+ EXCLUDED_CLASSES.add(IntIndexInput.class);
+
+ // used for thread management, not by the index
+ EXCLUDED_CLASSES.add(CloseableThreadLocal.class);
+ EXCLUDED_CLASSES.add(ThreadLocal.class);
+
+ // don't follow references to the top-level reader
+ EXCLUDED_CLASSES.add(IndexReader.class);
+ EXCLUDED_CLASSES.add(IndexReaderContext.class);
+
+ // usually small but can bump memory usage for
+ // memory-efficient things like stored fields
+ EXCLUDED_CLASSES.add(FieldInfos.class);
+ EXCLUDED_CLASSES.add(SegmentInfo.class);
+ EXCLUDED_CLASSES.add(SegmentCommitInfo.class);
+ EXCLUDED_CLASSES.add(FieldInfo.class);
+
+ // used by lucene3x to maintain a cache. Doesn't depend on the number of docs
+ EXCLUDED_CLASSES.add(DoubleBarrelLRUCache.class);
+
+ // constant overhead is typically due to strings
+ // TODO: can we remove this and still pass the test consistently
+ EXCLUDED_CLASSES.add(String.class);
+ }
+
+ static class Accumulator extends RamUsageTester.Accumulator {
+
+ private final Object root;
+
+ Accumulator(Object root) {
+ this.root = root;
+ }
+
+ public long accumulateObject(Object o, long shallowSize, java.util.Map<Field, Object> fieldValues, java.util.Collection<Object> queue) {
+ for (Class<?> clazz = o.getClass(); clazz != null; clazz = clazz.getSuperclass()) {
+ if (EXCLUDED_CLASSES.contains(clazz) && o != root) {
+ return 0;
+ }
+ }
+ // we have no way to estimate the size of these things in codecs although
+ // something like a Collections.newSetFromMap(new HashMap<>()) uses quite
+ // some memory... So for now the test ignores the overhead of such
+ // collections but can we do better?
+ if (o instanceof Collection) {
+ Collection<?> coll = (Collection<?>) o;
+ queue.addAll((Collection<?>) o);
+ return (long) coll.size() * RamUsageEstimator.NUM_BYTES_OBJECT_REF;
+ } else if (o instanceof Map) {
+ final Map<?, ?> map = (Map<?,?>) o;
+ queue.addAll(map.keySet());
+ queue.addAll(map.values());
+ return 2L * map.size() * RamUsageEstimator.NUM_BYTES_OBJECT_REF;
+ }
+ long v = super.accumulateObject(o, shallowSize, fieldValues, queue);
+ // System.out.println(o.getClass() + "=" + v);
+ return v;
+ }
+
+ @Override
+ public long accumulateArray(Object array, long shallowSize,
+ List<Object> values, Collection<Object> queue) {
+ long v = super.accumulateArray(array, shallowSize, values, queue);
+ // System.out.println(array.getClass() + "=" + v);
+ return v;
+ }
+
+ };
+
/** Returns the codec to run tests against */
protected abstract Codec getCodec();
@@ -73,11 +160,11 @@ abstract class BaseIndexFileFormatTestCa
* comparing indices that store the same content.
*/
protected Collection<String> excludedExtensionsFromByteCounts() {
- return new HashSet<String>(Arrays.asList(new String[] {
+ return new HashSet<String>(Arrays.asList(new String[] {
// segment infos store various pieces of information that don't solely depend
// on the content of the index in the diagnostics (such as a timestamp) so we
// exclude this file from the bytes counts
- "si",
+ "si",
// lock files are 0 bytes (one directory in the test could be RAMDir, the other FSDir)
"lock" }));
}
@@ -118,4 +205,51 @@ abstract class BaseIndexFileFormatTestCa
dir2.close();
}
+ /** Test the accuracy of the ramBytesUsed estimations. */
+ public void testRamBytesUsed() throws IOException {
+ if (Codec.getDefault() instanceof RandomCodec) {
+ // this test relies on the fact that two segments will be written with
+ // the same codec so we need to disable MockRandomPF
+ final Set<String> avoidCodecs = new HashSet<>(((RandomCodec) Codec.getDefault()).avoidCodecs);
+ avoidCodecs.add(new MockRandomPostingsFormat().getName());
+ Codec.setDefault(new RandomCodec(random(), avoidCodecs));
+ }
+ Directory dir = newDirectory();
+ IndexWriterConfig cfg = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
+ IndexWriter w = new IndexWriter(dir, cfg);
+ // we need to index enough documents so that constant overhead doesn't dominate
+ final int numDocs = atLeast(10000);
+ AtomicReader reader1 = null;
+ for (int i = 0; i < numDocs; ++i) {
+ Document d = new Document();
+ addRandomFields(d);
+ w.addDocument(d);
+ if (i == 100) {
+ w.forceMerge(1);
+ w.commit();
+ reader1 = getOnlySegmentReader(DirectoryReader.open(dir));
+ }
+ }
+ w.forceMerge(1);
+ w.commit();
+ w.close();
+
+ AtomicReader reader2 = getOnlySegmentReader(DirectoryReader.open(dir));
+
+ for (AtomicReader reader : Arrays.asList(reader1, reader2)) {
+ new SimpleMergedSegmentWarmer(InfoStream.NO_OUTPUT).warm(reader);
+ }
+
+ final long actualBytes = RamUsageTester.sizeOf(reader2, new Accumulator(reader2)) - RamUsageTester.sizeOf(reader1, new Accumulator(reader1));
+ final long expectedBytes = ((SegmentReader) reader2).ramBytesUsed() - ((SegmentReader) reader1).ramBytesUsed();
+ final long absoluteError = actualBytes - expectedBytes;
+ final double relativeError = (double) absoluteError / actualBytes;
+ final String message = "Actual RAM usage " + actualBytes + ", but got " + expectedBytes + ", " + 100*relativeError + "% error";
+ assertTrue(message, Math.abs(relativeError) < 0.20d || Math.abs(absoluteError) < 1000);
+
+ reader1.close();
+ reader2.close();
+ dir.close();
+ }
+
}
Modified: lucene/dev/branches/lucene_solr_4_9/lucene/test-framework/src/java/org/apache/lucene/index/BaseNormsFormatTestCase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene_solr_4_9/lucene/test-framework/src/java/org/apache/lucene/index/BaseNormsFormatTestCase.java?rev=1603682&r1=1603681&r2=1603682&view=diff
==============================================================================
--- lucene/dev/branches/lucene_solr_4_9/lucene/test-framework/src/java/org/apache/lucene/index/BaseNormsFormatTestCase.java (original)
+++ lucene/dev/branches/lucene_solr_4_9/lucene/test-framework/src/java/org/apache/lucene/index/BaseNormsFormatTestCase.java Thu Jun 19 00:05:32 2014
@@ -238,7 +238,8 @@ public abstract class BaseNormsFormatTes
@Override
protected void addRandomFields(Document doc) {
// TODO: improve
- doc.add(new TextField("foobar", "boo", Field.Store.NO));
+ doc.add(new TextField("foobar", TestUtil.randomSimpleString(random()), Field.Store.NO));
+
}
@Override
Modified: lucene/dev/branches/lucene_solr_4_9/lucene/test-framework/src/java/org/apache/lucene/index/RandomCodec.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene_solr_4_9/lucene/test-framework/src/java/org/apache/lucene/index/RandomCodec.java?rev=1603682&r1=1603681&r2=1603682&view=diff
==============================================================================
--- lucene/dev/branches/lucene_solr_4_9/lucene/test-framework/src/java/org/apache/lucene/index/RandomCodec.java (original)
+++ lucene/dev/branches/lucene_solr_4_9/lucene/test-framework/src/java/org/apache/lucene/index/RandomCodec.java Thu Jun 19 00:05:32 2014
@@ -76,6 +76,8 @@ public class RandomCodec extends Lucene4
/** unique set of docvalues format names this codec knows about */
public Set<String> dvFormatNames = new HashSet<>();
+ public final Set<String> avoidCodecs;
+
/** memorized field->postingsformat mappings */
// note: we have to sync this map even though its just for debugging/toString,
// otherwise DWPT's .toString() calls that iterate over the map can
@@ -118,6 +120,7 @@ public class RandomCodec extends Lucene4
public RandomCodec(Random random, Set<String> avoidCodecs) {
this.perFieldSeed = random.nextInt();
+ this.avoidCodecs = avoidCodecs;
// TODO: make it possible to specify min/max iterms per
// block via CL:
int minItemsPerBlock = TestUtil.nextInt(random, 2, 100);
Modified: lucene/dev/branches/lucene_solr_4_9/lucene/test-framework/src/java/org/apache/lucene/util/RamUsageTester.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene_solr_4_9/lucene/test-framework/src/java/org/apache/lucene/util/RamUsageTester.java?rev=1603682&r1=1603681&r2=1603682&view=diff
==============================================================================
--- lucene/dev/branches/lucene_solr_4_9/lucene/test-framework/src/java/org/apache/lucene/util/RamUsageTester.java (original)
+++ lucene/dev/branches/lucene_solr_4_9/lucene/test-framework/src/java/org/apache/lucene/util/RamUsageTester.java Thu Jun 19 00:05:32 2014
@@ -20,39 +20,39 @@ package org.apache.lucene.util;
import java.lang.reflect.Array;
import java.lang.reflect.Field;
import java.lang.reflect.Modifier;
+import java.util.AbstractList;
import java.util.ArrayList;
import java.util.Arrays;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashMap;
import java.util.IdentityHashMap;
import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
import java.util.NoSuchElementException;
/** Crawls object graph to collect RAM usage for testing */
public final class RamUsageTester {
-
- /**
- * A {@link Filter} that accepts all fields.
- */
- private static final Filter DEFAULT_FILTER = new Filter() {
- @Override
- public boolean accept(Field field) {
- return true;
- }
+ /** An accumulator of object references. This class allows for customizing RAM usage estimation. */
+ public static class Accumulator {
- public boolean accept(Object o) {
- return true;
+ /** Accumulate transitive references for the provided fields of the given
+ * object into <code>queue</code> and return the shallow size of this object. */
+ public long accumulateObject(Object o, long shallowSize, Map<Field, Object> fieldValues, Collection<Object> queue) {
+ for (Object value : fieldValues.values()) {
+ queue.add(value);
+ }
+ return shallowSize;
}
- };
-
- /** A filter that allows to decide on what to take into account when measuring RAM usage. */
- public static interface Filter {
-
- /** Whether the provided field should be taken into account when measuring RAM usage. */
- boolean accept(Field field);
-
- /** Whether the provided field value should be taken into account when measuring RAM usage. */
- boolean accept(Object o);
+ /** Accumulate transitive references for the provided values of the given
+ * array into <code>queue</code> and return the shallow size of this array. */
+ public long accumulateArray(Object array, long shallowSize, List<Object> values, Collection<Object> queue) {
+ queue.addAll(values);
+ return shallowSize;
+ }
}
@@ -65,13 +65,13 @@ public final class RamUsageTester {
* (it isn't side-effect free). After the method exits, this memory
* should be GCed.</p>
*/
- public static long sizeOf(Object obj, Filter filter) {
- return measureObjectSize(obj, filter);
+ public static long sizeOf(Object obj, Accumulator accumulator) {
+ return measureObjectSize(obj, accumulator);
}
/** Same as calling <code>sizeOf(obj, DEFAULT_FILTER)</code>. */
public static long sizeOf(Object obj) {
- return sizeOf(obj, DEFAULT_FILTER);
+ return sizeOf(obj, new Accumulator());
}
/**
@@ -89,7 +89,7 @@ public final class RamUsageTester {
* or complex graphs (a max. recursion depth on my machine was ~5000 objects linked in a chain
* so not too much).
*/
- private static long measureObjectSize(Object root, Filter filter) {
+ private static long measureObjectSize(Object root, Accumulator accumulator) {
// Objects seen so far.
final IdentityHashSet<Object> seen = new IdentityHashSet<>();
// Class cache with reference Field and precalculated shallow size.
@@ -114,25 +114,28 @@ public final class RamUsageTester {
* Consider an array, possibly of primitive types. Push any of its references to
* the processing stack and accumulate this array's shallow size.
*/
- long size = RamUsageEstimator.NUM_BYTES_ARRAY_HEADER;
+ final long shallowSize = RamUsageEstimator.shallowSizeOf(ob);
final int len = Array.getLength(ob);
- if (len > 0) {
- Class<?> componentClazz = obClazz.getComponentType();
- if (componentClazz.isPrimitive()) {
- size += (long) len * RamUsageEstimator.shallowSizeOfInstance(componentClazz);
- } else {
- size += (long) RamUsageEstimator.NUM_BYTES_OBJECT_REF * len;
-
- // Push refs for traversal later.
- for (int i = len; --i >= 0 ;) {
- final Object o = Array.get(ob, i);
- if (o != null && !seen.contains(o) && filter.accept(o)) {
- stack.add(o);
+ final List<Object> values;
+ Class<?> componentClazz = obClazz.getComponentType();
+ if (componentClazz.isPrimitive()) {
+ values = Collections.emptyList();
+ } else {
+ values = new AbstractList<Object>() {
+
+ @Override
+ public Object get(int index) {
+ return Array.get(ob, index);
+ }
+
+ @Override
+ public int size() {
+ return len;
}
- }
+
+ };
}
- }
- totalSize += RamUsageEstimator.alignObjectSize(size);
+ totalSize += accumulator.accumulateArray(ob, shallowSize, values, stack);
} else {
/*
* Consider an object. Push any references it has to the processing stack
@@ -144,17 +147,12 @@ public final class RamUsageTester {
classCache.put(obClazz, cachedInfo = createCacheEntry(obClazz));
}
+ Map<Field, Object> fieldValues = new HashMap<>();
for (Field f : cachedInfo.referenceFields) {
- if (filter.accept(f)) {
- // Fast path to eliminate redundancies.
- final Object o = f.get(ob);
- if (o != null && !seen.contains(o) && filter.accept(o)) {
- stack.add(o);
- }
- }
+ fieldValues.put(f, f.get(ob));
}
- totalSize += cachedInfo.alignedShallowInstanceSize;
+ totalSize += accumulator.accumulateObject(ob, cachedInfo.alignedShallowInstanceSize, fieldValues, stack);
} catch (IllegalAccessException e) {
// this should never happen as we enabled setAccessible().
throw new RuntimeException("Reflective field access failed?", e);