You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2015/11/05 21:11:00 UTC
svn commit: r1712865 - in /lucene/dev/trunk: ./ lucene/ lucene/analysis/
lucene/analysis/common/
lucene/analysis/common/src/java/org/apache/lucene/analysis/pattern/
lucene/analysis/common/src/test/org/apache/lucene/analysis/pattern/
Author: mikemccand
Date: Thu Nov 5 20:10:59 2015
New Revision: 1712865
URL: http://svn.apache.org/viewvc?rev=1712865&view=rev
Log:
LUCENE-6814: release heap in PatternTokenizer.close
Modified:
lucene/dev/trunk/ (props changed)
lucene/dev/trunk/lucene/ (props changed)
lucene/dev/trunk/lucene/CHANGES.txt (contents, props changed)
lucene/dev/trunk/lucene/analysis/ (props changed)
lucene/dev/trunk/lucene/analysis/common/ (props changed)
lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/pattern/PatternTokenizer.java
lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternTokenizer.java
Modified: lucene/dev/trunk/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/CHANGES.txt?rev=1712865&r1=1712864&r2=1712865&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/CHANGES.txt Thu Nov 5 20:10:59 2015
@@ -248,6 +248,11 @@ Bug Fixes
* LUCENE-6872: IndexWriter handles any VirtualMachineError, not just OOM,
as tragic. (Robert Muir)
+* LUCENE-6814: PatternTokenizer no longer hangs onto heap sized to the
+ maximum input string it's ever seen, which can be a large memory
+ "leak" if you tokenize large strings with many threads across many
+ indices (Alex Chow via Mike McCandless)
+
Other
* LUCENE-6478: Test execution can hang with java.security.debug. (Dawid Weiss)
Modified: lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/pattern/PatternTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/pattern/PatternTokenizer.java?rev=1712865&r1=1712864&r2=1712865&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/pattern/PatternTokenizer.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/pattern/PatternTokenizer.java Thu Nov 5 20:10:59 2015
@@ -137,9 +137,19 @@ public final class PatternTokenizer exte
}
@Override
+ public void close() throws IOException {
+ try {
+ super.close();
+ } finally {
+ str.setLength(0);
+ str.trimToSize();
+ }
+ }
+
+ @Override
public void reset() throws IOException {
super.reset();
- fillBuffer(str, input);
+ fillBuffer(input);
matcher.reset(str);
index = 0;
}
@@ -147,11 +157,11 @@ public final class PatternTokenizer exte
// TODO: we should see if we can make this tokenizer work without reading
// the entire document into RAM, perhaps with Matcher.hitEnd/requireEnd ?
final char[] buffer = new char[8192];
- private void fillBuffer(StringBuilder sb, Reader input) throws IOException {
+ private void fillBuffer(Reader input) throws IOException {
int len;
- sb.setLength(0);
+ str.setLength(0);
while ((len = input.read(buffer)) > 0) {
- sb.append(buffer, 0, len);
+ str.append(buffer, 0, len);
}
}
}
Modified: lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternTokenizer.java?rev=1712865&r1=1712864&r2=1712865&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternTokenizer.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternTokenizer.java Thu Nov 5 20:10:59 2015
@@ -146,4 +146,37 @@ public class TestPatternTokenizer extend
checkRandomData(random(), b, 1000*RANDOM_MULTIPLIER);
b.close();
}
+
+ // LUCENE-6814
+ public void testHeapFreedAfterClose() throws Exception {
+ // TODO: can we move this to BaseTSTC to catch other "hangs onto heap"ers?
+
+ // Build a 1MB string:
+ StringBuilder b = new StringBuilder();
+ for(int i=0;i<1024;i++) {
+ // 1023 spaces, then an x
+ for(int j=0;j<1023;j++) {
+ b.append(' ');
+ }
+ b.append('x');
+ }
+
+ String big = b.toString();
+
+ Pattern x = Pattern.compile("x");
+
+ List<Tokenizer> tokenizers = new ArrayList<>();
+ for(int i=0;i<512;i++) {
+ Tokenizer stream = new PatternTokenizer(x, -1);
+ tokenizers.add(stream);
+ stream.setReader(new StringReader(big));
+ stream.reset();
+ for(int j=0;j<1024;j++) {
+ assertTrue(stream.incrementToken());
+ }
+ assertFalse(stream.incrementToken());
+ stream.end();
+ stream.close();
+ }
+ }
}