You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2015/11/05 21:11:00 UTC

svn commit: r1712865 - in /lucene/dev/trunk: ./ lucene/ lucene/analysis/ lucene/analysis/common/ lucene/analysis/common/src/java/org/apache/lucene/analysis/pattern/ lucene/analysis/common/src/test/org/apache/lucene/analysis/pattern/

Author: mikemccand
Date: Thu Nov  5 20:10:59 2015
New Revision: 1712865

URL: http://svn.apache.org/viewvc?rev=1712865&view=rev
Log:
LUCENE-6814: release heap in PatternTokenizer.close

Modified:
    lucene/dev/trunk/   (props changed)
    lucene/dev/trunk/lucene/   (props changed)
    lucene/dev/trunk/lucene/CHANGES.txt   (contents, props changed)
    lucene/dev/trunk/lucene/analysis/   (props changed)
    lucene/dev/trunk/lucene/analysis/common/   (props changed)
    lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/pattern/PatternTokenizer.java
    lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternTokenizer.java

Modified: lucene/dev/trunk/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/CHANGES.txt?rev=1712865&r1=1712864&r2=1712865&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/CHANGES.txt Thu Nov  5 20:10:59 2015
@@ -248,6 +248,11 @@ Bug Fixes
 * LUCENE-6872: IndexWriter handles any VirtualMachineError, not just OOM,
   as tragic. (Robert Muir)
 
+* LUCENE-6814: PatternTokenizer no longer hangs onto heap sized to the
+  maximum input string it's ever seen, which can be a large memory
+  "leak" if you tokenize large strings with many threads across many
+  indices (Alex Chow via Mike McCandless)
+
 Other
 
 * LUCENE-6478: Test execution can hang with java.security.debug. (Dawid Weiss)

Modified: lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/pattern/PatternTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/pattern/PatternTokenizer.java?rev=1712865&r1=1712864&r2=1712865&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/pattern/PatternTokenizer.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/pattern/PatternTokenizer.java Thu Nov  5 20:10:59 2015
@@ -137,9 +137,19 @@ public final class PatternTokenizer exte
   }
 
   @Override
+  public void close() throws IOException {
+    try {
+      super.close();
+    } finally {
+      str.setLength(0);
+      str.trimToSize();
+    }
+  }
+
+  @Override
   public void reset() throws IOException {
     super.reset();
-    fillBuffer(str, input);
+    fillBuffer(input);
     matcher.reset(str);
     index = 0;
   }
@@ -147,11 +157,11 @@ public final class PatternTokenizer exte
   // TODO: we should see if we can make this tokenizer work without reading
   // the entire document into RAM, perhaps with Matcher.hitEnd/requireEnd ?
   final char[] buffer = new char[8192];
-  private void fillBuffer(StringBuilder sb, Reader input) throws IOException {
+  private void fillBuffer(Reader input) throws IOException {
     int len;
-    sb.setLength(0);
+    str.setLength(0);
     while ((len = input.read(buffer)) > 0) {
-      sb.append(buffer, 0, len);
+      str.append(buffer, 0, len);
     }
   }
 }

Modified: lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternTokenizer.java?rev=1712865&r1=1712864&r2=1712865&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternTokenizer.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternTokenizer.java Thu Nov  5 20:10:59 2015
@@ -146,4 +146,37 @@ public class TestPatternTokenizer extend
     checkRandomData(random(), b, 1000*RANDOM_MULTIPLIER);
     b.close();
   }
+
+  // LUCENE-6814
+  public void testHeapFreedAfterClose() throws Exception {
+    // TODO: can we move this to BaseTSTC to catch other "hangs onto heap"ers?
+
+    // Build a 1MB string:
+    StringBuilder b = new StringBuilder();
+    for(int i=0;i<1024;i++) {
+      // 1023 spaces, then an x
+      for(int j=0;j<1023;j++) {
+        b.append(' ');
+      }
+      b.append('x');
+    }
+
+    String big = b.toString();
+
+    Pattern x = Pattern.compile("x");
+
+    List<Tokenizer> tokenizers = new ArrayList<>();
+    for(int i=0;i<512;i++) {
+      Tokenizer stream = new PatternTokenizer(x, -1);
+      tokenizers.add(stream);
+      stream.setReader(new StringReader(big));
+      stream.reset();
+      for(int j=0;j<1024;j++) {
+        assertTrue(stream.incrementToken());
+      }
+      assertFalse(stream.incrementToken());
+      stream.end();
+      stream.close();
+    }
+  }
 }