You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by dw...@apache.org on 2012/02/28 20:26:05 UTC

svn commit: r1294797 - /lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternReplaceCharFilter.java

Author: dweiss
Date: Tue Feb 28 19:26:05 2012
New Revision: 1294797

URL: http://svn.apache.org/viewvc?rev=1294797&view=rev
Log:
LUCENE-3820: limiting the amount of input for pattern matching to go past exponential time patterns, even if they happen. A nice catch from Mike too -- un-ignore testNastyPattern and look at processing time go wild with each additional input character...

Modified:
    lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternReplaceCharFilter.java

Modified: lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternReplaceCharFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternReplaceCharFilter.java?rev=1294797&r1=1294796&r2=1294797&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternReplaceCharFilter.java (original)
+++ lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternReplaceCharFilter.java Tue Feb 28 19:26:05 2012
@@ -20,6 +20,7 @@ package org.apache.lucene.analysis.patte
 import java.io.IOException;
 import java.io.Reader;
 import java.io.StringReader;
+import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 import java.util.regex.PatternSyntaxException;
 
@@ -31,6 +32,7 @@ import org.apache.lucene.analysis.MockTo
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.util._TestUtil;
+import org.junit.Ignore;
 
 /**
  * Tests {@link PatternReplaceCharFilter}
@@ -275,7 +277,27 @@ public class TestPatternReplaceCharFilte
   private Pattern pattern( String p ){
     return Pattern.compile( p );
   }
-  
+
+  /**
+   * A demonstration of how backtracking regular expressions can lead to relatively 
+   * easy DoS attacks.
+   * 
+   * @see "http://swtch.com/~rsc/regexp/regexp1.html"
+   */
+  @Ignore
+  public void testNastyPattern() throws Exception {
+    Pattern p = Pattern.compile("(c.+)*xy");
+    String input = "[;<!--aecbbaa--><    febcfdc fbb = \"fbeeebff\" fc = dd   >\\';<eefceceaa e= babae\" eacbaff =\"fcfaccacd\" = bcced>>><  bccaafe edb = ecfccdff\"   <?</script><    edbd ebbcd=\"faacfcc\" aeca= bedbc ceeaac =adeafde aadccdaf = \"afcc ffda=aafbe &#x16921ed5\"1843785582']";
+    for (int i = 0; i < input.length(); i++) {
+      Matcher matcher = p.matcher(input.substring(0, i));
+      long t = System.currentTimeMillis();
+      if (matcher.find()) {
+        System.out.println(matcher.group());
+      }
+      System.out.println(i + " > " + (System.currentTimeMillis() - t) / 1000.0);
+    }
+  }
+
   /** blast some random strings through the analyzer */
   public void testRandomStrings() throws Exception {
     int numPatterns = atLeast(100);
@@ -296,9 +318,9 @@ public class TestPatternReplaceCharFilte
           return new PatternReplaceCharFilter(p, replacement, CharReader.get(reader));
         }
       };
-      long s = System.currentTimeMillis();
-      checkRandomData(random, a, 1000 * RANDOM_MULTIPLIER, true); // only ascii
-      System.out.println((System.currentTimeMillis() - s) / 1000.0 + " > " + p);
+      checkRandomData(random, a, 1000 * RANDOM_MULTIPLIER, 
+          /* max input length. don't make it longer -- exponential processing
+           * time for certain patterns. */ 40, true); // only ascii
     }
   }