You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by dw...@apache.org on 2012/02/28 20:26:05 UTC
svn commit: r1294797 -
/lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternReplaceCharFilter.java
Author: dweiss
Date: Tue Feb 28 19:26:05 2012
New Revision: 1294797
URL: http://svn.apache.org/viewvc?rev=1294797&view=rev
Log:
LUCENE-3820: limiting the amount of input for pattern matching to go past exponential time patterns, even if they happen. A nice catch from Mike too -- un-ignore testNastyPattern and look at processing time go wild with each additional input character...
Modified:
lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternReplaceCharFilter.java
Modified: lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternReplaceCharFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternReplaceCharFilter.java?rev=1294797&r1=1294796&r2=1294797&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternReplaceCharFilter.java (original)
+++ lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternReplaceCharFilter.java Tue Feb 28 19:26:05 2012
@@ -20,6 +20,7 @@ package org.apache.lucene.analysis.patte
import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
+import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;
@@ -31,6 +32,7 @@ import org.apache.lucene.analysis.MockTo
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.util._TestUtil;
+import org.junit.Ignore;
/**
* Tests {@link PatternReplaceCharFilter}
@@ -275,7 +277,27 @@ public class TestPatternReplaceCharFilte
private Pattern pattern( String p ){
return Pattern.compile( p );
}
-
+
+ /**
+ * A demonstration of how backtracking regular expressions can lead to relatively
+ * easy DoS attacks.
+ *
+ * @see "http://swtch.com/~rsc/regexp/regexp1.html"
+ */
+ @Ignore
+ public void testNastyPattern() throws Exception {
+ Pattern p = Pattern.compile("(c.+)*xy");
+ String input = "[;<!--aecbbaa-->< febcfdc fbb = \"fbeeebff\" fc = dd >\\';<eefceceaa e= babae\" eacbaff =\"fcfaccacd\" = bcced>>>< bccaafe edb = ecfccdff\" <?</script>< edbd ebbcd=\"faacfcc\" aeca= bedbc ceeaac =adeafde aadccdaf = \"afcc ffda=aafbe �\"1843785582']";
+ for (int i = 0; i < input.length(); i++) {
+ Matcher matcher = p.matcher(input.substring(0, i));
+ long t = System.currentTimeMillis();
+ if (matcher.find()) {
+ System.out.println(matcher.group());
+ }
+ System.out.println(i + " > " + (System.currentTimeMillis() - t) / 1000.0);
+ }
+ }
+
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
int numPatterns = atLeast(100);
@@ -296,9 +318,9 @@ public class TestPatternReplaceCharFilte
return new PatternReplaceCharFilter(p, replacement, CharReader.get(reader));
}
};
- long s = System.currentTimeMillis();
- checkRandomData(random, a, 1000 * RANDOM_MULTIPLIER, true); // only ascii
- System.out.println((System.currentTimeMillis() - s) / 1000.0 + " > " + p);
+ checkRandomData(random, a, 1000 * RANDOM_MULTIPLIER,
+ /* max input length. don't make it longer -- exponential processing
+ * time for certain patterns. */ 40, true); // only ascii
}
}