You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ab...@apache.org on 2019/06/28 08:46:00 UTC

[lucene-solr] branch branch_8x updated: LUCENE-8855: Fix some size estimates and relax test assertions to work under different JVMs.

This is an automated email from the ASF dual-hosted git repository.

ab pushed a commit to branch branch_8x
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git


The following commit(s) were added to refs/heads/branch_8x by this push:
     new 84db2b2  LUCENE-8855: Fix some size estimates and relax test assertions to work under different JVMs.
84db2b2 is described below

commit 84db2b2742868b01c3f1c419b0e31e5953c06bb4
Author: Andrzej Bialecki <ab...@apache.org>
AuthorDate: Thu Jun 27 21:59:21 2019 +0200

    LUCENE-8855: Fix some size estimates and relax test assertions to work under different JVMs.
---
 .../src/java/org/apache/lucene/index/Term.java     |  2 +-
 .../java/org/apache/lucene/util/ByteBlockPool.java |  9 ++++--
 .../java/org/apache/lucene/util/BytesRefHash.java  |  4 +--
 .../org/apache/lucene/util/RamUsageEstimator.java  | 34 ++++++++++++++--------
 .../apache/lucene/util/TestRamUsageEstimator.java  | 33 +++++++++++----------
 5 files changed, 49 insertions(+), 33 deletions(-)

diff --git a/lucene/core/src/java/org/apache/lucene/index/Term.java b/lucene/core/src/java/org/apache/lucene/index/Term.java
index 4ee8b42..4c1bdb4 100644
--- a/lucene/core/src/java/org/apache/lucene/index/Term.java
+++ b/lucene/core/src/java/org/apache/lucene/index/Term.java
@@ -172,6 +172,6 @@ public final class Term implements Comparable<Term>, Accountable {
   public long ramBytesUsed() {
     return BASE_RAM_BYTES +
         RamUsageEstimator.sizeOfObject(field) +
-        (bytes != null ? bytes.bytes.length + RamUsageEstimator.NUM_BYTES_ARRAY_HEADER : 0L);
+        (bytes != null ? RamUsageEstimator.alignObjectSize(bytes.bytes.length + RamUsageEstimator.NUM_BYTES_ARRAY_HEADER) : 0L);
   }
 }
diff --git a/lucene/core/src/java/org/apache/lucene/util/ByteBlockPool.java b/lucene/core/src/java/org/apache/lucene/util/ByteBlockPool.java
index b543886..7649c2c 100644
--- a/lucene/core/src/java/org/apache/lucene/util/ByteBlockPool.java
+++ b/lucene/core/src/java/org/apache/lucene/util/ByteBlockPool.java
@@ -398,8 +398,13 @@ public final class ByteBlockPool implements Accountable {
   @Override
   public long ramBytesUsed() {
     long size = BASE_RAM_BYTES;
-    for (byte[] buffer : buffers) {
-      size += RamUsageEstimator.sizeOfObject(buffer);
+    size += RamUsageEstimator.sizeOfObject(buffer);
+    size += RamUsageEstimator.shallowSizeOf(buffers);
+    for (byte[] buf : buffers) {
+      if (buf == buffer) {
+        continue;
+      }
+      size += RamUsageEstimator.sizeOfObject(buf);
     }
     return size;
   }
diff --git a/lucene/core/src/java/org/apache/lucene/util/BytesRefHash.java b/lucene/core/src/java/org/apache/lucene/util/BytesRefHash.java
index f017ccb..da3178e 100644
--- a/lucene/core/src/java/org/apache/lucene/util/BytesRefHash.java
+++ b/lucene/core/src/java/org/apache/lucene/util/BytesRefHash.java
@@ -45,9 +45,9 @@ import static org.apache.lucene.util.ByteBlockPool.BYTE_BLOCK_SIZE;
 public final class BytesRefHash implements Accountable {
   private static final long BASE_RAM_BYTES = RamUsageEstimator.shallowSizeOfInstance(BytesRefHash.class) +
       // size of scratch1
-      RamUsageEstimator.shallowSizeOf(BytesRef.class) +
+      RamUsageEstimator.shallowSizeOfInstance(BytesRef.class) +
       // size of Counter
-      RamUsageEstimator.shallowSizeOf(Counter.class);
+      RamUsageEstimator.primitiveSizes.get(long.class);
 
   public static final int DEFAULT_CAPACITY = 16;
 
diff --git a/lucene/core/src/java/org/apache/lucene/util/RamUsageEstimator.java b/lucene/core/src/java/org/apache/lucene/util/RamUsageEstimator.java
index d2a043a..a338ca2 100644
--- a/lucene/core/src/java/org/apache/lucene/util/RamUsageEstimator.java
+++ b/lucene/core/src/java/org/apache/lucene/util/RamUsageEstimator.java
@@ -342,7 +342,7 @@ public final class RamUsageEstimator {
       return size;
     }
     // assume array-backed collection and add per-object references
-    size += NUM_BYTES_ARRAY_HEADER * 2;
+    size += NUM_BYTES_ARRAY_HEADER + collection.size() * NUM_BYTES_OBJECT_REF;
     for (Object o : collection) {
       size += sizeOfObject(o, depth, defSize);
     }
@@ -350,29 +350,39 @@ public final class RamUsageEstimator {
   }
 
   private static final class RamUsageQueryVisitor extends QueryVisitor {
-    long total = 0;
+    long total;
     long defSize;
+    Query root;
 
-    RamUsageQueryVisitor(long defSize) {
+    RamUsageQueryVisitor(Query root, long defSize) {
+      this.root = root;
       this.defSize = defSize;
+      if (defSize > 0) {
+        total = defSize;
+      } else {
+        total = shallowSizeOf(root);
+      }
     }
 
     @Override
     public void consumeTerms(Query query, Term... terms) {
-      if (defSize > 0) {
-        total += defSize;
-      } else {
-        total += shallowSizeOf(query);
+      if (query != root) {
+        if (defSize > 0) {
+          total += defSize;
+        } else {
+          total += shallowSizeOf(query);
+        }
       }
       if (terms != null) {
-        for (Term t : terms) {
-          total += sizeOf(t);
-        }
+        total += sizeOf(terms);
       }
     }
 
     @Override
     public void visitLeaf(Query query) {
+      if (query == root) {
+        return;
+      }
       if (query instanceof Accountable) {
         total += ((Accountable)query).ramBytesUsed();
       } else {
@@ -407,9 +417,9 @@ public final class RamUsageEstimator {
     if (q instanceof Accountable) {
       return ((Accountable)q).ramBytesUsed();
     } else {
-      RamUsageQueryVisitor visitor = new RamUsageQueryVisitor(defSize);
+      RamUsageQueryVisitor visitor = new RamUsageQueryVisitor(q, defSize);
       q.visit(visitor);
-      return visitor.total;
+      return alignObjectSize(visitor.total);
     }
   }
 
diff --git a/lucene/core/src/test/org/apache/lucene/util/TestRamUsageEstimator.java b/lucene/core/src/test/org/apache/lucene/util/TestRamUsageEstimator.java
index df9e7a6..e813c1d 100644
--- a/lucene/core/src/test/org/apache/lucene/util/TestRamUsageEstimator.java
+++ b/lucene/core/src/test/org/apache/lucene/util/TestRamUsageEstimator.java
@@ -22,7 +22,6 @@ import static org.apache.lucene.util.RamUsageTester.sizeOf;
 
 import java.util.ArrayList;
 import java.util.Arrays;
-import java.util.Collections;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
@@ -116,56 +115,58 @@ public class TestRamUsageEstimator extends LuceneTestCase {
     }
     long actual = sizeOf(bytes);
     long estimated = RamUsageEstimator.sizeOf(bytes);
-    assertEquals(actual, estimated);
+    assertEquals((double)actual, (double)estimated, (double)actual * 0.1);
   }
 
   public void testMap() {
     Map<String, Object> map = new HashMap<>();
     map.put("primitive", 1234L);
     map.put("string", "string");
+    for (int i = 0; i < 100; i++) {
+      map.put("complex " + i, new Term("foo " + i, "bar " + i));
+    }
     long actual = sizeOf(map);
     long estimated = RamUsageEstimator.sizeOfObject(map);
-    assertTrue(estimated > actual); // RamUsageTester under-estimates the size of map
+    assertEquals((double)actual, (double)estimated, (double)actual * 0.2);
 
     // test recursion
-    map.clear();
-    map.put("string[]", new String[]{"foo", "bar"});
-    map.put("map", Collections.singletonMap("foo", "bar"));
     map.put("self", map);
     actual = sizeOf(map);
     estimated = RamUsageEstimator.sizeOfObject(map);
-    assertTrue(estimated > actual);
+    assertEquals((double)actual, (double)estimated, (double)actual * 0.2);
   }
 
   public void testCollection() {
     List<Object> list = new ArrayList<>();
     list.add(1234L);
     list.add("string");
-    list.add(new Term("foo", "bar"));
+    for (int i = 0; i < 100; i++) {
+      list.add(new Term("foo " + i, "term " + i));
+    }
     long actual = sizeOf(list);
     long estimated = RamUsageEstimator.sizeOfObject(list);
-    assertEquals(actual, estimated);
+    assertEquals((double)actual, (double)estimated, (double)actual * 0.1);
 
     // test recursion
-    list.clear();
-    list.add(1234L);
     list.add(list);
     actual = sizeOf(list);
     estimated = RamUsageEstimator.sizeOfObject(list);
-    assertEquals(actual + RamUsageEstimator.shallowSizeOf(list), estimated);
+    assertEquals((double)actual, (double)estimated, (double)actual * 0.1);
   }
 
   public void testQuery() {
     DisjunctionMaxQuery dismax = new DisjunctionMaxQuery(
-        Arrays.asList(new TermQuery(new Term("foo", "bar")), new TermQuery(new Term("baz", "bam"))), 1.0f);
+        Arrays.asList(new TermQuery(new Term("foo1", "bar1")), new TermQuery(new Term("baz1", "bam1"))), 1.0f);
     BooleanQuery bq = new BooleanQuery.Builder()
-        .add(new TermQuery(new Term("foo", "bar")), BooleanClause.Occur.SHOULD)
-        .add(new FuzzyQuery(new Term("foo", "baz")), BooleanClause.Occur.MUST_NOT)
+        .add(new TermQuery(new Term("foo2", "bar2")), BooleanClause.Occur.SHOULD)
+        .add(new FuzzyQuery(new Term("foo3", "baz3")), BooleanClause.Occur.MUST_NOT)
         .add(dismax, BooleanClause.Occur.MUST)
         .build();
     long actual = sizeOf(bq);
     long estimated = RamUsageEstimator.sizeOfObject(bq);
-    assertTrue(actual < estimated);
+    // sizeOfObject uses much lower default size estimate than we normally use
+    // but the query-specific default is so large that the comparison becomes meaningless.
+    assertEquals((double)actual, (double)estimated, (double)actual * 0.5);
   }
 
   public void testReferenceSize() {