You are viewing a plain text version of this content. The canonical link for it is here.
Posted to java-commits@lucene.apache.org by mi...@apache.org on 2009/07/31 19:41:05 UTC

svn commit: r799667 - in /lucene/java/trunk/contrib: ./ regex/src/java/org/apache/lucene/search/regex/ regex/src/test/org/apache/lucene/search/regex/

Author: mikemccand
Date: Fri Jul 31 17:41:04 2009
New Revision: 799667

URL: http://svn.apache.org/viewvc?rev=799667&view=rev
Log:
LUCENE-1745: allow passing matching flags to the underlying regexp engine

Modified:
    lucene/java/trunk/contrib/CHANGES.txt
    lucene/java/trunk/contrib/regex/src/java/org/apache/lucene/search/regex/JakartaRegexpCapabilities.java
    lucene/java/trunk/contrib/regex/src/java/org/apache/lucene/search/regex/JavaUtilRegexCapabilities.java
    lucene/java/trunk/contrib/regex/src/test/org/apache/lucene/search/regex/TestRegexQuery.java

Modified: lucene/java/trunk/contrib/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/CHANGES.txt?rev=799667&r1=799666&r2=799667&view=diff
==============================================================================
--- lucene/java/trunk/contrib/CHANGES.txt (original)
+++ lucene/java/trunk/contrib/CHANGES.txt Fri Jul 31 17:41:04 2009
@@ -87,6 +87,14 @@
 10. LUCENE-1272: Add get/setBoost to MoreLikeThis. (Jonathan
     Leibiusky via Mike McCandless)
  
+11. LUCENE-1745: Added constructors to JakartaRegexpCapabilities and
+    JavaUtilRegexCapabilities as well as static flags to support
+    configuring a RegexCapabilities implementation with the
+    implementation-specific modifier flags. Allows for callers to
+    customize the RegexQuery using the implementation-specific options
+    and fine tune how regular expressions are compiled and
+    matched. (Marc Zampetti zampettim@aim.com via Mike McCandless)
+ 
 Optimizations
 
   1. LUCENE-1643: Re-use the collation key (RawCollationKey) for

Modified: lucene/java/trunk/contrib/regex/src/java/org/apache/lucene/search/regex/JakartaRegexpCapabilities.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/regex/src/java/org/apache/lucene/search/regex/JakartaRegexpCapabilities.java?rev=799667&r1=799666&r2=799667&view=diff
==============================================================================
--- lucene/java/trunk/contrib/regex/src/java/org/apache/lucene/search/regex/JakartaRegexpCapabilities.java (original)
+++ lucene/java/trunk/contrib/regex/src/java/org/apache/lucene/search/regex/JakartaRegexpCapabilities.java Fri Jul 31 17:41:04 2009
@@ -27,9 +27,40 @@
  */
 public class JakartaRegexpCapabilities implements RegexCapabilities {
   private RE regexp;
-
+  
+  // Define the flags that are possible. Redefine them here
+  // to avoid exposign the RE class to the caller.
+  
+  private int flags = RE.MATCH_NORMAL;
+
+  /**
+   * Flag to specify normal, case-sensitive matching behaviour. This is the default.
+   */
+  public static final int FLAG_MATCH_NORMAL = RE.MATCH_NORMAL;
+  
+  /**
+   * Flag to specify that matching should be case-independent (folded)
+   */
+  public static final int FLAG_MATCH_CASEINDEPENDENT = RE.MATCH_CASEINDEPENDENT;
+ 
+  /**
+   * Contructs a RegexCapabilities with the default MATCH_NORMAL match style.
+   */
+  public JakartaRegexpCapabilities() {}
+  
+  /**
+   * Constructs a RegexCapabilities with the provided match flags.
+   * Multiple flags should be ORed together.
+   * 
+   * @param flags The matching style
+   */
+  public JakartaRegexpCapabilities(int flags)
+  {
+    this.flags = flags;
+  }
+  
   public void compile(String pattern) {
-    regexp = new RE(pattern);
+    regexp = new RE(pattern, this.flags);
   }
 
   public boolean match(String string) {

Modified: lucene/java/trunk/contrib/regex/src/java/org/apache/lucene/search/regex/JavaUtilRegexCapabilities.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/regex/src/java/org/apache/lucene/search/regex/JavaUtilRegexCapabilities.java?rev=799667&r1=799666&r2=799667&view=diff
==============================================================================
--- lucene/java/trunk/contrib/regex/src/java/org/apache/lucene/search/regex/JavaUtilRegexCapabilities.java (original)
+++ lucene/java/trunk/contrib/regex/src/java/org/apache/lucene/search/regex/JavaUtilRegexCapabilities.java Fri Jul 31 17:41:04 2009
@@ -28,9 +28,46 @@
  */
 public class JavaUtilRegexCapabilities implements RegexCapabilities {
   private Pattern pattern;
-
+  private int flags = 0;
+  
+  // Define the optional flags from Pattern that can be used.
+  // Do this here to keep Pattern contained within this class.
+  
+  public static final int FLAG_CANON_EQ = Pattern.CANON_EQ;
+  public static final int FLAG_CASE_INSENSITIVE = Pattern.CASE_INSENSITIVE;
+  public static final int FLAG_COMMENTS = Pattern.COMMENTS;
+  public static final int FLAG_DOTALL = Pattern.DOTALL;
+  public static final int FLAG_LITERAL = Pattern.LITERAL;
+  public static final int FLAG_MULTILINE = Pattern.MULTILINE;
+  public static final int FLAG_UNICODE_CASE = Pattern.UNICODE_CASE;
+  public static final int FLAG_UNIX_LINES = Pattern.UNIX_LINES;
+  
+  /**
+   * Default constructor that uses java.util.regex.Pattern 
+   * with its default flags.
+   */
+  public JavaUtilRegexCapabilities()  {
+    this.flags = 0;
+  }
+  
+  /**
+   * Constructor that allows for the modification of the flags that
+   * the java.util.regex.Pattern will use to compile the regular expression.
+   * This gives the user the ability to fine-tune how the regular expression 
+   * to match the functionlity that they need. 
+   * The {@link java.util.regex.Pattern Pattern} class supports specifying 
+   * these fields via the regular expression text itself, but this gives the caller
+   * another option to modify the behavior. Useful in cases where the regular expression text
+   * cannot be modified, or if doing so is undesired.
+   * 
+   * @flags The flags that are ORed together.
+   */
+  public JavaUtilRegexCapabilities(int flags) {
+    this.flags = flags;
+  }
+  
   public void compile(String pattern) {
-    this.pattern = Pattern.compile(pattern);
+    this.pattern = Pattern.compile(pattern, this.flags);
   }
 
   public boolean match(String string) {

Modified: lucene/java/trunk/contrib/regex/src/test/org/apache/lucene/search/regex/TestRegexQuery.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/regex/src/test/org/apache/lucene/search/regex/TestRegexQuery.java?rev=799667&r1=799666&r2=799667&view=diff
==============================================================================
--- lucene/java/trunk/contrib/regex/src/test/org/apache/lucene/search/regex/TestRegexQuery.java (original)
+++ lucene/java/trunk/contrib/regex/src/test/org/apache/lucene/search/regex/TestRegexQuery.java Fri Jul 31 17:41:04 2009
@@ -33,6 +33,7 @@
   private IndexSearcher searcher;
   private final String FN = "field";
 
+
   public void setUp() {
     RAMDirectory directory = new RAMDirectory();
     try {
@@ -59,8 +60,12 @@
 
   private Term newTerm(String value) { return new Term(FN, value); }
 
-  private int  regexQueryNrHits(String regex) throws Exception {
+  private int  regexQueryNrHits(String regex, RegexCapabilities capability) throws Exception {
     RegexQuery query = new RegexQuery( newTerm(regex));
+    
+    if ( capability != null )
+      query.setRegexImplementation(capability);
+    
     return searcher.search(query).length();
   }
 
@@ -68,19 +73,20 @@
     SpanRegexQuery srq1 = new SpanRegexQuery( newTerm(regex1));
     SpanRegexQuery srq2 = new SpanRegexQuery( newTerm(regex2));
     SpanNearQuery query = new SpanNearQuery( new SpanQuery[]{srq1, srq2}, slop, ordered);
+    
     return searcher.search(query).length();
   }
 
   public void testRegex1() throws Exception {
-    assertEquals(1, regexQueryNrHits("^q.[aeiou]c.*$"));
+    assertEquals(1, regexQueryNrHits("^q.[aeiou]c.*$", null));
   }
 
   public void testRegex2() throws Exception {
-    assertEquals(0, regexQueryNrHits("^.[aeiou]c.*$"));
+    assertEquals(0, regexQueryNrHits("^.[aeiou]c.*$", null));
   }
 
   public void testRegex3() throws Exception {
-    assertEquals(0, regexQueryNrHits("^q.[aeiou]c$"));
+    assertEquals(0, regexQueryNrHits("^q.[aeiou]c$", null));
   }
 
   public void testSpanRegex1() throws Exception {
@@ -98,6 +104,22 @@
     RegexQuery query2 = new RegexQuery( newTerm("foo.*"));
     assertFalse(query1.equals(query2));
   }
+  
+  public void testJakartaCaseSensativeFail() throws Exception {
+    assertEquals(0, regexQueryNrHits("^.*DOG.*$", null));
+  }
+
+  public void testJavaUtilCaseSensativeFail() throws Exception {
+    assertEquals(0, regexQueryNrHits("^.*DOG.*$", null));
+  }
+  
+  public void testJakartaCaseInsensative() throws Exception {
+    assertEquals(1, regexQueryNrHits("^.*DOG.*$", new JakartaRegexpCapabilities(JakartaRegexpCapabilities.FLAG_MATCH_CASEINDEPENDENT)));
+  }
+  
+  public void testJavaUtilCaseInsensative() throws Exception {
+    assertEquals(1, regexQueryNrHits("^.*DOG.*$", new JavaUtilRegexCapabilities(JavaUtilRegexCapabilities.FLAG_CASE_INSENSITIVE)));
+  }
 
 }