You are viewing a plain text version of this content. The canonical link for it is here.
Posted to java-commits@lucene.apache.org by mi...@apache.org on 2009/07/31 19:41:05 UTC
svn commit: r799667 - in /lucene/java/trunk/contrib: ./
regex/src/java/org/apache/lucene/search/regex/
regex/src/test/org/apache/lucene/search/regex/
Author: mikemccand
Date: Fri Jul 31 17:41:04 2009
New Revision: 799667
URL: http://svn.apache.org/viewvc?rev=799667&view=rev
Log:
LUCENE-1745: allow passing matching flags to the underlying regexp engine
Modified:
lucene/java/trunk/contrib/CHANGES.txt
lucene/java/trunk/contrib/regex/src/java/org/apache/lucene/search/regex/JakartaRegexpCapabilities.java
lucene/java/trunk/contrib/regex/src/java/org/apache/lucene/search/regex/JavaUtilRegexCapabilities.java
lucene/java/trunk/contrib/regex/src/test/org/apache/lucene/search/regex/TestRegexQuery.java
Modified: lucene/java/trunk/contrib/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/CHANGES.txt?rev=799667&r1=799666&r2=799667&view=diff
==============================================================================
--- lucene/java/trunk/contrib/CHANGES.txt (original)
+++ lucene/java/trunk/contrib/CHANGES.txt Fri Jul 31 17:41:04 2009
@@ -87,6 +87,14 @@
10. LUCENE-1272: Add get/setBoost to MoreLikeThis. (Jonathan
Leibiusky via Mike McCandless)
+11. LUCENE-1745: Added constructors to JakartaRegexpCapabilities and
+ JavaUtilRegexCapabilities as well as static flags to support
+ configuring a RegexCapabilities implementation with the
+ implementation-specific modifier flags. Allows for callers to
+ customize the RegexQuery using the implementation-specific options
+ and fine tune how regular expressions are compiled and
+ matched. (Marc Zampetti zampettim@aim.com via Mike McCandless)
+
Optimizations
1. LUCENE-1643: Re-use the collation key (RawCollationKey) for
Modified: lucene/java/trunk/contrib/regex/src/java/org/apache/lucene/search/regex/JakartaRegexpCapabilities.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/regex/src/java/org/apache/lucene/search/regex/JakartaRegexpCapabilities.java?rev=799667&r1=799666&r2=799667&view=diff
==============================================================================
--- lucene/java/trunk/contrib/regex/src/java/org/apache/lucene/search/regex/JakartaRegexpCapabilities.java (original)
+++ lucene/java/trunk/contrib/regex/src/java/org/apache/lucene/search/regex/JakartaRegexpCapabilities.java Fri Jul 31 17:41:04 2009
@@ -27,9 +27,40 @@
*/
public class JakartaRegexpCapabilities implements RegexCapabilities {
private RE regexp;
-
+
+ // Define the flags that are possible. Redefine them here
+ // to avoid exposign the RE class to the caller.
+
+ private int flags = RE.MATCH_NORMAL;
+
+ /**
+ * Flag to specify normal, case-sensitive matching behaviour. This is the default.
+ */
+ public static final int FLAG_MATCH_NORMAL = RE.MATCH_NORMAL;
+
+ /**
+ * Flag to specify that matching should be case-independent (folded)
+ */
+ public static final int FLAG_MATCH_CASEINDEPENDENT = RE.MATCH_CASEINDEPENDENT;
+
+ /**
+ * Contructs a RegexCapabilities with the default MATCH_NORMAL match style.
+ */
+ public JakartaRegexpCapabilities() {}
+
+ /**
+ * Constructs a RegexCapabilities with the provided match flags.
+ * Multiple flags should be ORed together.
+ *
+ * @param flags The matching style
+ */
+ public JakartaRegexpCapabilities(int flags)
+ {
+ this.flags = flags;
+ }
+
public void compile(String pattern) {
- regexp = new RE(pattern);
+ regexp = new RE(pattern, this.flags);
}
public boolean match(String string) {
Modified: lucene/java/trunk/contrib/regex/src/java/org/apache/lucene/search/regex/JavaUtilRegexCapabilities.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/regex/src/java/org/apache/lucene/search/regex/JavaUtilRegexCapabilities.java?rev=799667&r1=799666&r2=799667&view=diff
==============================================================================
--- lucene/java/trunk/contrib/regex/src/java/org/apache/lucene/search/regex/JavaUtilRegexCapabilities.java (original)
+++ lucene/java/trunk/contrib/regex/src/java/org/apache/lucene/search/regex/JavaUtilRegexCapabilities.java Fri Jul 31 17:41:04 2009
@@ -28,9 +28,46 @@
*/
public class JavaUtilRegexCapabilities implements RegexCapabilities {
private Pattern pattern;
-
+ private int flags = 0;
+
+ // Define the optional flags from Pattern that can be used.
+ // Do this here to keep Pattern contained within this class.
+
+ public static final int FLAG_CANON_EQ = Pattern.CANON_EQ;
+ public static final int FLAG_CASE_INSENSITIVE = Pattern.CASE_INSENSITIVE;
+ public static final int FLAG_COMMENTS = Pattern.COMMENTS;
+ public static final int FLAG_DOTALL = Pattern.DOTALL;
+ public static final int FLAG_LITERAL = Pattern.LITERAL;
+ public static final int FLAG_MULTILINE = Pattern.MULTILINE;
+ public static final int FLAG_UNICODE_CASE = Pattern.UNICODE_CASE;
+ public static final int FLAG_UNIX_LINES = Pattern.UNIX_LINES;
+
+ /**
+ * Default constructor that uses java.util.regex.Pattern
+ * with its default flags.
+ */
+ public JavaUtilRegexCapabilities() {
+ this.flags = 0;
+ }
+
+ /**
+ * Constructor that allows for the modification of the flags that
+ * the java.util.regex.Pattern will use to compile the regular expression.
+ * This gives the user the ability to fine-tune how the regular expression
+ * to match the functionlity that they need.
+ * The {@link java.util.regex.Pattern Pattern} class supports specifying
+ * these fields via the regular expression text itself, but this gives the caller
+ * another option to modify the behavior. Useful in cases where the regular expression text
+ * cannot be modified, or if doing so is undesired.
+ *
+ * @flags The flags that are ORed together.
+ */
+ public JavaUtilRegexCapabilities(int flags) {
+ this.flags = flags;
+ }
+
public void compile(String pattern) {
- this.pattern = Pattern.compile(pattern);
+ this.pattern = Pattern.compile(pattern, this.flags);
}
public boolean match(String string) {
Modified: lucene/java/trunk/contrib/regex/src/test/org/apache/lucene/search/regex/TestRegexQuery.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/contrib/regex/src/test/org/apache/lucene/search/regex/TestRegexQuery.java?rev=799667&r1=799666&r2=799667&view=diff
==============================================================================
--- lucene/java/trunk/contrib/regex/src/test/org/apache/lucene/search/regex/TestRegexQuery.java (original)
+++ lucene/java/trunk/contrib/regex/src/test/org/apache/lucene/search/regex/TestRegexQuery.java Fri Jul 31 17:41:04 2009
@@ -33,6 +33,7 @@
private IndexSearcher searcher;
private final String FN = "field";
+
public void setUp() {
RAMDirectory directory = new RAMDirectory();
try {
@@ -59,8 +60,12 @@
private Term newTerm(String value) { return new Term(FN, value); }
- private int regexQueryNrHits(String regex) throws Exception {
+ private int regexQueryNrHits(String regex, RegexCapabilities capability) throws Exception {
RegexQuery query = new RegexQuery( newTerm(regex));
+
+ if ( capability != null )
+ query.setRegexImplementation(capability);
+
return searcher.search(query).length();
}
@@ -68,19 +73,20 @@
SpanRegexQuery srq1 = new SpanRegexQuery( newTerm(regex1));
SpanRegexQuery srq2 = new SpanRegexQuery( newTerm(regex2));
SpanNearQuery query = new SpanNearQuery( new SpanQuery[]{srq1, srq2}, slop, ordered);
+
return searcher.search(query).length();
}
public void testRegex1() throws Exception {
- assertEquals(1, regexQueryNrHits("^q.[aeiou]c.*$"));
+ assertEquals(1, regexQueryNrHits("^q.[aeiou]c.*$", null));
}
public void testRegex2() throws Exception {
- assertEquals(0, regexQueryNrHits("^.[aeiou]c.*$"));
+ assertEquals(0, regexQueryNrHits("^.[aeiou]c.*$", null));
}
public void testRegex3() throws Exception {
- assertEquals(0, regexQueryNrHits("^q.[aeiou]c$"));
+ assertEquals(0, regexQueryNrHits("^q.[aeiou]c$", null));
}
public void testSpanRegex1() throws Exception {
@@ -98,6 +104,22 @@
RegexQuery query2 = new RegexQuery( newTerm("foo.*"));
assertFalse(query1.equals(query2));
}
+
+ public void testJakartaCaseSensativeFail() throws Exception {
+ assertEquals(0, regexQueryNrHits("^.*DOG.*$", null));
+ }
+
+ public void testJavaUtilCaseSensativeFail() throws Exception {
+ assertEquals(0, regexQueryNrHits("^.*DOG.*$", null));
+ }
+
+ public void testJakartaCaseInsensative() throws Exception {
+ assertEquals(1, regexQueryNrHits("^.*DOG.*$", new JakartaRegexpCapabilities(JakartaRegexpCapabilities.FLAG_MATCH_CASEINDEPENDENT)));
+ }
+
+ public void testJavaUtilCaseInsensative() throws Exception {
+ assertEquals(1, regexQueryNrHits("^.*DOG.*$", new JavaUtilRegexCapabilities(JavaUtilRegexCapabilities.FLAG_CASE_INSENSITIVE)));
+ }
}