You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@drill.apache.org by pr...@apache.org on 2017/09/13 15:56:45 UTC
[1/2] drill git commit: DRILL-5697: Improve performance of filter
operator for pattern matching
Repository: drill
Updated Branches:
refs/heads/master c7c8ffd6f -> aaff1b35b
http://git-wip-us.apache.org/repos/asf/drill/blob/aaff1b35/exec/java-exec/src/test/java/org/apache/drill/exec/expr/fn/impl/TestStringFunctions.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/expr/fn/impl/TestStringFunctions.java b/exec/java-exec/src/test/java/org/apache/drill/exec/expr/fn/impl/TestStringFunctions.java
index 4723d20..39b03dc 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/exec/expr/fn/impl/TestStringFunctions.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/expr/fn/impl/TestStringFunctions.java
@@ -157,6 +157,967 @@ public class TestStringFunctions extends BaseTestQuery {
}
@Test
+ public void testLikeStartsWith() throws Exception {
+
+ // all ASCII.
+ testBuilder()
+ .sqlQuery(" SELECT id FROM (" +
+ "VALUES('A'),('AB'),('ABC'), ('ABD'), ('ABCD'),('ABCDE')," +
+ "('AABCD'),('ABABCD'),('ABC$XYZ'), (''),('abcd')," +
+ "('x'), ('xyz'), ('%')) tbl(id) " +
+ "where id like 'ABC%'")
+ .unOrdered()
+ .baselineColumns("id")
+ .baselineValues("ABC")
+ .baselineValues("ABCD")
+ .baselineValues("ABCDE")
+ .baselineValues("ABC$XYZ")
+ .build()
+ .run();
+
+ testBuilder()
+ .sqlQuery(" SELECT id FROM (" +
+ "VALUES('A'),('AB'),('ABC'), ('ABD')," +
+ "('ABCD'),('ABCDE'),('AABCD'),('ABAB CD'),('ABC$XYZ')," +
+ "(''),('abcd'), ('x'), ('xyz'), ('%')) tbl(id)" +
+ "where id like 'AB%'")
+ .unOrdered()
+ .baselineColumns("id")
+ .baselineValues("AB")
+ .baselineValues("ABC")
+ .baselineValues("ABD")
+ .baselineValues("ABCD")
+ .baselineValues("ABCDE")
+ .baselineValues("ABAB CD")
+ .baselineValues("ABC$XYZ")
+ .build()
+ .run();
+
+ testBuilder()
+ .sqlQuery(" SELECT id FROM (" +
+ "VALUES('A'),('AB'), ('ABC'), ('ABD'), ('ABCD')," +
+ "('ABCDE'),('AABCD'),('ABAB CD'),('ABC$XYZ'), ('')," +
+ "('abcd'), ('x'), ('xyz'), ('%')) tbl(id)" +
+ "where id like 'A%'")
+ .unOrdered()
+ .baselineColumns("id")
+ .baselineValues("A")
+ .baselineValues("AB")
+ .baselineValues("ABC")
+ .baselineValues("ABD")
+ .baselineValues("ABCD")
+ .baselineValues("ABCDE")
+ .baselineValues("AABCD")
+ .baselineValues("ABAB CD")
+ .baselineValues("ABC$XYZ")
+ .build()
+ .run();
+
+ // no match
+ testBuilder()
+ .sqlQuery(" SELECT id FROM (" +
+ "VALUES('A'),('AB'),('ABC'),('ABCD'),('ABCDE')," +
+ "('AABCD'), ('ABABCD'),('ABC$XYZ'), (''),('abcd')," +
+ "('x'), ('xyz'), ('%')) tbl(id)" +
+ " where id like 'z%'")
+ .unOrdered()
+ .baselineColumns("id")
+ .expectsEmptyResultSet()
+ .build()
+ .run();
+
+ // patternLength > txtLength
+ testBuilder()
+ .sqlQuery(" SELECT id FROM (" +
+ "VALUES('A'),('AB'),('ABC'),('ABCD'),('ABCDE'),('AABCD')," +
+ "('ABABCD'),('ABC$XYZ'), (''),('abcd'), ('x'), ('xyz'), ('%')) tbl(id)" +
+ "where id like 'ABCDEXYZRST%'")
+ .unOrdered()
+ .baselineColumns("id")
+ .expectsEmptyResultSet()
+ .build()
+ .run();
+
+ // non ASCII
+ testBuilder()
+ .sqlQuery(" SELECT id FROM (" +
+ "VALUES('ABC'), ('¤EÀsÆW°ê»Ú®i¶T¤¤¤ß3¼Ó®i¶TÆU2~~')," +
+ " ('xyz'), ('%')) tbl(id)" +
+ " where id like '¤%'")
+ .unOrdered()
+ .baselineColumns("id")
+ .baselineValues("¤EÀsÆW°ê»Ú®i¶T¤¤¤ß3¼Ó®i¶TÆU2~~")
+ .build()
+ .run();
+
+ testBuilder()
+ .sqlQuery(" SELECT id FROM (" +
+ "VALUES('ABC'), ('ABC¤EÀsÆW°ê»Ú®i¶T¤¤¤ß3¼Ó®i¶TÆU2~~'), " +
+ "('xyz'), ('%')) tbl(id)" +
+ " where id like 'ABC¤%'")
+ .unOrdered()
+ .baselineColumns("id")
+ .baselineValues("ABC¤EÀsÆW°ê»Ú®i¶T¤¤¤ß3¼Ó®i¶TÆU2~~")
+ .build()
+ .run();
+
+ testBuilder()
+ .sqlQuery(" SELECT id FROM (" +
+ "VALUES('ABC'), ('ABC¤EÀsÆW°ê»Ú®i¶T¤¤¤ß3¼Ó®i¶TÆU2~~'), ('xyz'), ('%')) tbl(id)" +
+ " where id like 'A%'")
+ .unOrdered()
+ .baselineColumns("id")
+ .baselineValues("ABC")
+ .baselineValues("ABC¤EÀsÆW°ê»Ú®i¶T¤¤¤ß3¼Ó®i¶TÆU2~~")
+ .build()
+ .run();
+
+ // no match
+ testBuilder()
+ .sqlQuery(" SELECT id FROM (" +
+ "VALUES('ABC'), ('¤EÀsÆW°ê»Ú®i¶T¤¤¤ß3¼Ó®i¶TÆU2~~'), ('xyz'), ('%')) tbl(id) " +
+ "where id like 'Z%'")
+ .unOrdered()
+ .baselineColumns("id")
+ .expectsEmptyResultSet()
+ .build()
+ .run();
+ }
+
+ @Test
+ public void testLikeEndsWith() throws Exception {
+
+ // all ASCII. End with multiple characters
+ testBuilder()
+ .sqlQuery(" SELECT id FROM (" +
+ "VALUES('A'),('AB'),('ABC'), ('ABD'), ('ABCD'),('ABCDE')," +
+ "('AABCD'),('ABABCD'),('ABC$XYZ'), (''),('abcd'), " +
+ "('x'), ('xyz'), ('%')) tbl(id) " +
+ "where id like '%BCD'")
+ .unOrdered()
+ .baselineColumns("id")
+ .baselineValues("ABCD")
+ .baselineValues("AABCD")
+ .baselineValues("ABABCD")
+ .build()
+ .run();
+
+ // all ASCII. End with single character.
+ testBuilder()
+ .sqlQuery(" SELECT id FROM (" +
+ "VALUES('A'),('AB'),('ABC'), ('ABD'), ('ABCD'),('ABCDE')," +
+ "('AABCD'),('ABABCD'),('ABC$XYZ'), (''),('abcd'), " +
+ "('x'), ('xyz'), ('%')) tbl(id)" +
+ "where id like '%D'")
+ .unOrdered()
+ .baselineColumns("id")
+ .baselineValues("ABD")
+ .baselineValues("ABCD")
+ .baselineValues("AABCD")
+ .baselineValues("ABABCD")
+ .build()
+ .run();
+
+ // all ASCII. End with nothing. Should match all.
+ testBuilder()
+ .sqlQuery(" SELECT id FROM (" +
+ "VALUES('A'),('AB'),('ABC'), ('ABD'), ('ABCD'),('ABCDE')," +
+ "('AABCD'),('ABABCD'),('ABC$XYZ'), (''),('abcd'), " +
+ "('x'), ('xyz'), ('%')) tbl(id)" +
+ "where id like '%'")
+ .unOrdered()
+ .baselineColumns("id")
+ .baselineValues("A")
+ .baselineValues("AB")
+ .baselineValues("ABC")
+ .baselineValues("ABD")
+ .baselineValues("ABCD")
+ .baselineValues("ABCDE")
+ .baselineValues("AABCD")
+ .baselineValues("ABABCD")
+ .baselineValues("ABC$XYZ")
+ .baselineValues("")
+ .baselineValues("abcd")
+ .baselineValues("x")
+ .baselineValues("xyz")
+ .baselineValues("%")
+ .build()
+ .run();
+
+ // no match
+ testBuilder()
+ .sqlQuery(" SELECT id FROM (" +
+ "VALUES('A'),('AB'),('ABC'),('ABCD'),('ABCDE'),('AABCD')," +
+ "('ABABCD'),('ABC$XYZ'), (''),('abcd'), ('x'), ('xyz'), ('%')) tbl(id)" +
+ "where id like '%F'")
+ .unOrdered()
+ .baselineColumns("id")
+ .expectsEmptyResultSet()
+ .build()
+ .run();
+
+ // patternLength > txtLength
+ testBuilder()
+ .sqlQuery(" SELECT id FROM (" +
+ "VALUES('A'),('AB'),('ABC'),('ABCD'),('ABCDE'),('AABCD'),('ABABCD')," +
+ "('ABC$XYZ'), (''),('abcd'), ('x'), ('xyz'), ('%')) tbl(id) " +
+ "where id like '%ABCDEXYZRST'")
+ .unOrdered()
+ .baselineColumns("id")
+ .expectsEmptyResultSet()
+ .build()
+ .run();
+
+ // patternLength == txtLength
+ testBuilder()
+ .sqlQuery(" SELECT id FROM (" +
+ "VALUES('A'),('AB'),('ABC'),('ABCD'),('ABCDE'),('AABCD'),('ABABCD')," +
+ "('ABC$XYZ'), (''),('abcd'), ('x'), ('xyz'), ('%')) tbl(id) " +
+ "where id like '%ABC'")
+ .unOrdered()
+ .baselineColumns("id")
+ .baselineValues("ABC")
+ .build()
+ .run();
+
+ // non ASCII. End with single character
+ testBuilder()
+ .sqlQuery(" SELECT id FROM (" +
+ "VALUES('ABC'), ('¤EÀsÆW°ê»Ú®i¶T¤¤¤ß3¼Ó®i¶TÆU2~~'), ('')," +
+ "('¶TÆU2~~'), ('xyz'), ('%')) tbl(id) " +
+ "where id like '%~~'")
+ .unOrdered()
+ .baselineColumns("id")
+ .baselineValues("¤EÀsÆW°ê»Ú®i¶T¤¤¤ß3¼Ó®i¶TÆU2~~")
+ .baselineValues("¶TÆU2~~")
+ .build()
+ .run();
+
+ // non ASCII. End with multiple characters
+ testBuilder()
+ .sqlQuery(" SELECT id FROM (" +
+ "VALUES('ABC'), ('ABC¤EÀsÆW°ê»Ú®i¶T¤¤¤ß3¼Ó®i¶TÆU2~~'), " +
+ "(''), ('¶TÆU2~~'), ('xyz'), ('%')) tbl(id)" +
+ "where id like '%¶TÆU2~~'")
+ .unOrdered()
+ .baselineColumns("id")
+ .baselineValues("ABC¤EÀsÆW°ê»Ú®i¶T¤¤¤ß3¼Ó®i¶TÆU2~~")
+ .baselineValues("¶TÆU2~~")
+ .build()
+ .run();
+
+ // non ASCII, no match
+ testBuilder()
+ .sqlQuery(" SELECT id FROM (" +
+ "VALUES('ABC'), ('ABC¤EÀsÆW°ê»Ú®i¶T¤¤¤ß3¼Ó®i¶TÆU2~~'), ('')," +
+ "('xyz'), ('%')) tbl(id)" +
+ "where id like '%E'")
+ .unOrdered()
+ .baselineColumns("id")
+ .expectsEmptyResultSet()
+ .build()
+ .run();
+ }
+
+ @Test
+ public void testLikeContains() throws Exception {
+
+ // all ASCII. match at the beginning, middle and end.
+ testBuilder()
+ .sqlQuery(" SELECT id FROM (" +
+ "VALUES('A'),('AB'),('ABC'), ('ABD'), ('ABCD'),('DEABC')," +
+ "('AABCD'), ('ABABCDEF'),('AABC$XYZ'), (''),('abcd'), ('x'), " +
+ "('xyz'), ('%')) tbl(id) " +
+ "where id like '%ABC%'")
+ .unOrdered()
+ .baselineColumns("id")
+ .baselineValues("ABC")
+ .baselineValues("ABCD")
+ .baselineValues("DEABC")
+ .baselineValues("AABCD")
+ .baselineValues("ABABCDEF")
+ .baselineValues("AABC$XYZ")
+ .build()
+ .run();
+
+ // all ASCII. match at the beginning, middle and end, single character.
+ testBuilder()
+ .sqlQuery(" SELECT id FROM (" +
+ "VALUES('A'),('AB'),('ABC'), ('ABD'), ('ABCD'),('ABCDE')," +
+ "('AABCD'),('ABABCD'),('CAB$XYZ'), (''),('abcd'), ('x'), " +
+ "('xyz'), ('%')) tbl(id)" +
+ "where id like '%C%'")
+ .unOrdered()
+ .baselineColumns("id")
+ .baselineValues("ABC")
+ .baselineValues("ABCD")
+ .baselineValues("ABCDE")
+ .baselineValues("AABCD")
+ .baselineValues("ABABCD")
+ .baselineValues("CAB$XYZ")
+ .build()
+ .run();
+
+ // no match
+ testBuilder()
+ .sqlQuery(" SELECT id FROM (" +
+ "VALUES('A'),('AB'),('ABC'), ('ABD'), ('ABCD'),('ABCDE')," +
+ "('AABCD'),('ABABCD'),('CAB$XYZ'), (''),('abcd'), ('x')," +
+ "('xyz'), ('%')) tbl(id)" +
+ "where id like '%FGH%'")
+ .unOrdered()
+ .baselineColumns("id")
+ .expectsEmptyResultSet()
+ .build()
+ .run();
+
+ // patternLength > txtLength
+ testBuilder()
+ .sqlQuery(" SELECT id FROM (" +
+ "VALUES('A'),('AB'),('ABC'),('ABCD'),('ABCDE'),('AABCD'),('ABABCD')," +
+ "('ABC$XYZ'), (''),('abcd'), ('x'), ('xyz'), ('%')) tbl(id)" +
+ "where id like '%ABCDEXYZRST%'")
+ .unOrdered()
+ .baselineColumns("id")
+ .expectsEmptyResultSet()
+ .build()
+ .run();
+
+ // all match
+ testBuilder()
+ .sqlQuery(" SELECT id FROM (" +
+ "VALUES('A'),('AB'),('ABC'),('ABCD'),('ABCDE'),('AABCD'),('ABABCD')," +
+ "('ABC$XYZ'), (''),('abcd'), ('x'), ('xyz'), ('%')) tbl(id)" +
+ "where id like '%%'")
+ .unOrdered()
+ .baselineColumns("id")
+ .baselineValues("A")
+ .baselineValues("AB")
+ .baselineValues("ABC")
+ .baselineValues("ABCD")
+ .baselineValues("ABCDE")
+ .baselineValues("AABCD")
+ .baselineValues("ABABCD")
+ .baselineValues("ABC$XYZ")
+ .baselineValues("")
+ .baselineValues("abcd")
+ .baselineValues("x")
+ .baselineValues("xyz")
+ .baselineValues("%")
+ .build()
+ .run();
+
+ // non ASCII
+ testBuilder()
+ .sqlQuery(" SELECT id FROM (" +
+ "VALUES('ABC'), ('¤EÀsÆW°ê»Ú®i¶T¤¤¤ß3¼Ó®i¶TÆU2~~'), " +
+ "(''), ('¶TÆU2~~'), ('xyz'), ('%')) tbl(id)" +
+ "where id like '%ÆU2%'")
+ .unOrdered()
+ .baselineColumns("id")
+ .baselineValues("¤EÀsÆW°ê»Ú®i¶T¤¤¤ß3¼Ó®i¶TÆU2~~")
+ .baselineValues("¶TÆU2~~")
+ .build()
+ .run();
+
+ testBuilder()
+ .sqlQuery(" SELECT id FROM (" +
+ "VALUES('ABC'), ('ABC¤EÀsÆW°ê»Ú®i¶T¤¤¤ß3¼Ó®i¶TÆU2~~'), " +
+ "(''), ('¶TÆU2~~'), ('xyz'), ('%')) tbl(id)" +
+ "where id like '%EÀsÆW%'")
+ .unOrdered()
+ .baselineColumns("id")
+ .baselineValues("ABC¤EÀsÆW°ê»Ú®i¶T¤¤¤ß3¼Ó®i¶TÆU2~~")
+ .build()
+ .run();
+
+ // no match
+ testBuilder()
+ .sqlQuery(" SELECT id FROM (" +
+ "VALUES('ABC'), ('ABC¤EÀsÆW°ê»Ú®i¶T¤¤¤ß3¼Ó®i¶TÆU2~~'), ('')," +
+ "('xyz'), ('%')) tbl(id) where id like '%¶T¶T%'")
+ .unOrdered()
+ .baselineColumns("id")
+ .expectsEmptyResultSet()
+ .build()
+ .run();
+ }
+
+ @Test
+ public void testLikeConstant() throws Exception {
+
+ // all ASCII
+ testBuilder()
+ .sqlQuery(" SELECT id FROM (" +
+ "VALUES('A'),('AB'),('ABC'), ('ABD'), " +
+ "('ABCD'),('ABCDE'),('AABCD'),('ABABCD'),('ABC$XYZ'), ('')," +
+ "('abcd'), ('x'), ('xyz'), ('%')) tbl(id) " +
+ "where id like 'ABC'")
+ .unOrdered()
+ .baselineColumns("id")
+ .baselineValues("ABC")
+ .build()
+ .run();
+
+
+ // Multiple same values
+ testBuilder()
+ .sqlQuery(" SELECT id FROM (" +
+ "VALUES('A'),('AB'),('ABC'), ('ABC')," +
+ "('ABD'), ('ABCD'),('ABCDE'),('AABCD'),('ABABCD'),('ABC$XYZ')," +
+ "(''),('abcd'), ('x'), ('xyz'), ('%')) tbl(id)" +
+ "where id like 'ABC'")
+ .unOrdered()
+ .baselineColumns("id")
+ .baselineValues("ABC")
+ .baselineValues("ABC")
+ .build()
+ .run();
+
+ // match empty string
+ testBuilder()
+ .sqlQuery(" SELECT id FROM (" +
+ "VALUES('A'),('AB'),('ABC'), ('ABD'), ('ABCD'),('ABCDE')," +
+ "('AABCD'),('ABABCD'),('ABC$XYZ'), (''),('abcd'), ('x')," +
+ " ('xyz'), ('%')) tbl(id)" +
+ "where id like ''")
+ .unOrdered()
+ .baselineColumns("id")
+ .baselineValues("")
+ .build()
+ .run();
+
+ // no match
+ testBuilder()
+ .sqlQuery(" SELECT id FROM (" +
+ "VALUES('A'),('AB'),('ABC'),('ABCD'),('ABCDE'),('AABCD')," +
+ "('ABABCD'),('ABC$XYZ'), (''),('abcd'), ('x'), ('xyz')," +
+ "('%')) tbl(id) where id like 'EFGH'")
+ .unOrdered()
+ .baselineColumns("id")
+ .expectsEmptyResultSet()
+ .build()
+ .run();
+
+ // patternLength > txtLength
+ testBuilder()
+ .sqlQuery(" SELECT id FROM (" +
+ "VALUES('A'),('AB'),('ABC'),('ABCD'),('ABCDE'),('AABCD'),('ABABCD')," +
+ "('ABC$XYZ'), (''),('abcd'), ('x'), ('xyz'), ('%')) tbl(id)" +
+ " where id like 'ABCDEXYZRST'")
+ .unOrdered()
+ .baselineColumns("id")
+ .expectsEmptyResultSet()
+ .build()
+ .run();
+
+
+ // non ASCII
+ testBuilder()
+ .sqlQuery(" SELECT id FROM (" +
+ "VALUES('ABC'), ('¤EÀsÆW°ê»Ú®i¶T¤¤¤ß3¼Ó®i¶TÆU2~~'), (''), " +
+ "('¶TÆU2~~'), ('xyz'), ('%')) tbl(id)" +
+ " where id like '¶TÆU2~~'")
+ .unOrdered()
+ .baselineColumns("id")
+ .baselineValues("¶TÆU2~~")
+ .build()
+ .run();
+
+ testBuilder()
+ .sqlQuery(" SELECT id FROM (" +
+ "VALUES('ABC'), ('ABC¤EÀsÆW°ê»Ú®i¶T¤¤¤ß3¼Ó®i¶TÆU2~~'), ('')," +
+ "('¶TÆU2~~'), ('xyz'), ('%')) tbl(id)" +
+ "where id like 'ABC¤EÀsÆW°ê»Ú®i¶T¤¤¤ß3¼Ó®i¶TÆU2~~'")
+ .unOrdered()
+ .baselineColumns("id")
+ .baselineValues("ABC¤EÀsÆW°ê»Ú®i¶T¤¤¤ß3¼Ó®i¶TÆU2~~")
+ .build()
+ .run();
+
+ // no match
+ testBuilder()
+ .sqlQuery(" SELECT id FROM (" +
+ "VALUES('ABC'), ('ABC¤EÀsÆW°ê»Ú®i¶T¤¤¤ß3¼Ó®i¶TÆU2~~'), (''), " +
+ "('xyz'), ('%')) tbl(id) where id like '¶T¶T'")
+ .unOrdered()
+ .baselineColumns("id")
+ .expectsEmptyResultSet()
+ .build()
+ .run();
+ }
+
+ @Test
+ public void testLikeWithEscapeStartsWith() throws Exception {
+
+ // all ASCII
+ testBuilder()
+ .sqlQuery(" SELECT id FROM (" +
+ "VALUES('A'),('AB'),('ABC%'), ('ABD'), ('ABCD'),('ABCDE')," +
+ "('AABCD'),('ABABCD'),('ABC$XYZ'), (''),('abcd'), ('x')," +
+ "('xyz'), ('%')) tbl(id) " +
+ "where id like 'ABC#%' escape '#'")
+ .unOrdered()
+ .baselineColumns("id")
+ .baselineValues("ABC%")
+ .build()
+ .run();
+
+ testBuilder()
+ .sqlQuery(" SELECT id FROM (" +
+ "VALUES('A'),('A%B'),('A%B%C%'), ('ABD'), ('ABCD')," +
+ "('ABCDE'),('AABCD'),('A%BABCD'),('ABC$XYZ'), ('')," +
+ "('abcd'), ('x'), ('xyz'), ('%')) tbl (id)" +
+ "where id like 'A#%B%' escape '#'")
+ .unOrdered()
+ .baselineColumns("id")
+ .baselineValues("A%B")
+ .baselineValues("A%B%C%")
+ .baselineValues("A%BABCD")
+ .build()
+ .run();
+
+ // Multiple escape characters
+ testBuilder()
+ .sqlQuery(" SELECT id FROM (" +
+ "VALUES('A_'),('AB'),('ABC'), ('ABD'), ('A_BC%D_')," +
+ "('ABCDE'), ('A_BC%D_XYZ'),('ABABCD'),('A_BC%D_$%XYZ')," +
+ " (''),('abcd'), ('x'), ('%')) tbl(id)" +
+ "where id like 'A#_BC#%D#_%' escape '#'")
+ .unOrdered()
+ .baselineColumns("id")
+ .baselineValues("A_BC%D_")
+ .baselineValues("A_BC%D_XYZ")
+ .baselineValues("A_BC%D_$%XYZ")
+ .build()
+ .run();
+
+ // Escape character followed by escape character
+ testBuilder()
+ .sqlQuery(" SELECT id FROM (" +
+ "VALUES('A_'),('AB'),('ABC'), ('ABD'), ('ABC%D_')," +
+ "('A#BC%D_E'),('A_BC%D_XYZ'),('ABABCD'),('A#BC%D_$%XYZ')," +
+ " (''),('abcd'), ('x'), ('%')) tbl(id)" +
+ "where id like 'A##BC#%D#_%' escape '#'")
+ .unOrdered()
+ .baselineColumns("id")
+ .baselineValues("A#BC%D_E")
+ .baselineValues("A#BC%D_$%XYZ")
+ .build()
+ .run();
+
+ // no match
+ testBuilder()
+ .sqlQuery(" SELECT id FROM (" +
+ "VALUES('A'),('AB'),('ABC'),('ABCD'),('ABCDE'),('AABCD')," +
+ "('ABABCD'),('ABC$XYZ'), (''),('abcd'), ('x'), ('xyz'), ('%')) tbl(id)" +
+ " where id like 'z#%' escape '#'")
+ .unOrdered()
+ .baselineColumns("id")
+ .expectsEmptyResultSet()
+ .build()
+ .run();
+
+ // patternLength > txtLength
+ testBuilder()
+ .sqlQuery(" SELECT id FROM (" +
+ "VALUES('A'),('AB'),('ABC'),('ABCD'),('ABCDE'),('AABCD')," +
+ "('ABABCD'),('ABC$XYZ'), (''),('abcd'), ('x'), ('xyz'), ('%')) tbl(id)" +
+ " where id like 'ABCDEXYZRST#_%' escape '#'")
+ .unOrdered()
+ .baselineColumns("id")
+ .expectsEmptyResultSet()
+ .build()
+ .run();
+
+ // non ASCII
+ testBuilder()
+ .sqlQuery(" SELECT id FROM (" +
+ "VALUES('ABC'), ('¤E_ÀsÆW°ê»Ú®i¶T¤¤¤ß3¼Ó®i¶TÆU2~~')," +
+ " ('xyz'), ('%')) tbl(id)" +
+ " where id like '¤E#_%' escape '#'")
+ .unOrdered()
+ .baselineColumns("id")
+ .baselineValues("¤E_ÀsÆW°ê»Ú®i¶T¤¤¤ß3¼Ó®i¶TÆU2~~")
+ .build()
+ .run();
+
+ testBuilder()
+ .sqlQuery(" SELECT id FROM (" +
+ "VALUES('ABC'), ('ABC¤EÀ%sÆW°ê»Ú®i¶T¤¤¤ß3¼Ó®i¶TÆU2~~'), " +
+ "('xyz'), ('%')) tbl(id)" +
+ " where id like 'ABC¤EÀ#%sÆW%' escape '#'")
+ .unOrdered()
+ .baselineColumns("id")
+ .baselineValues("ABC¤EÀ%sÆW°ê»Ú®i¶T¤¤¤ß3¼Ó®i¶TÆU2~~")
+ .build()
+ .run();
+
+ testBuilder()
+ .sqlQuery(" SELECT id FROM (" +
+ "VALUES('AB%C'), " +
+ "('AB%C¤EÀsÆW°ê»Ú®i¶T¤¤¤ß3¼Ó®i¶TÆU2~~'), ('xyz'), ('%')) tbl(id)" +
+ " where id like 'AB#%C%' escape '#'")
+ .unOrdered()
+ .baselineColumns("id")
+ .baselineValues("AB%C")
+ .baselineValues("AB%C¤EÀsÆW°ê»Ú®i¶T¤¤¤ß3¼Ó®i¶TÆU2~~")
+ .build()
+ .run();
+
+ // no match
+ testBuilder()
+ .sqlQuery(" SELECT id FROM (" +
+ "VALUES('ABC'), ('¤EÀsÆW°ê»Ú®i¶T¤¤¤ß3¼Ó®i¶TÆU2~~')," +
+ " ('xyz'), ('%')) tbl(id)" +
+ "where id like 'Z$%%' escape '$'")
+ .unOrdered()
+ .baselineColumns("id")
+ .expectsEmptyResultSet()
+ .build()
+ .run();
+ }
+
+ @Test
+ public void testLikeWithEscapeEndsWith() throws Exception {
+
+ // all ASCII
+ testBuilder().sqlQuery(" SELECT id FROM (" +
+ "VALUES('A'),('AB'),('ABC'),('AB%C'),('ABCDE'),('AABCD')," +
+ "('ABAB%C'),('ABC$XYZAB%C'), (''),('abcd'), ('x'), ('xyz'), ('%')) tbl(id) " +
+ "where id like '%AB$%C' escape '$'")
+ .unOrdered()
+ .baselineColumns("id")
+ .baselineValues("AB%C")
+ .baselineValues("ABAB%C")
+ .baselineValues("ABC$XYZAB%C")
+ .build()
+ .run();
+
+
+ testBuilder()
+ .sqlQuery(" SELECT id FROM (" +
+ "VALUES('A'),('AB_'),('AB%C%AB_'), ('ABD'), ('ABCD')," +
+ "('ABCDE'), ('AABCD'),('AB%ABCD'),('ABC$XYZAB_'), ('')," +
+ "('abcd'), ('x'), ('xyz'), ('%')) tbl(id)" +
+ "where id like '%AB#_' escape '#'")
+ .unOrdered()
+ .baselineColumns("id")
+ .baselineValues("AB_")
+ .baselineValues("AB%C%AB_")
+ .baselineValues("ABC$XYZAB_")
+ .build()
+ .run();
+
+
+ testBuilder()
+ .sqlQuery(" SELECT id FROM (" +
+ "VALUES('A_'),('AB'),('ABC'), ('ABD'), ('A_BCD'),('ABCDEA_')," +
+ "('A_ABCD'),('ABABCDA_'),('A_BC$XYZA_'), (''),('abcd')," +
+ " ('x'), ('xyz'), ('%')) tbl(id)" +
+ "where id like '%A#_' escape '#'")
+ .unOrdered()
+ .baselineColumns("id")
+ .baselineValues(("A_"))
+ .baselineValues("ABCDEA_")
+ .baselineValues("ABABCDA_")
+ .baselineValues("A_BC$XYZA_")
+ .build()
+ .run();
+
+ // Multiple escape characters
+ testBuilder()
+ .sqlQuery(" SELECT id FROM (" +
+ "VALUES('A_'),('AB'),('ABC'), ('ABD'), ('A_BC%D_'),('ABCDE')," +
+ "('XYZA_BC%D_'),('ABABCD'),('$%XYZA_BC%D_'), ('')," +
+ "('abcd'), ('x'), ('%')) tbl(id)" +
+ " where id like '%A#_BC#%D#_' escape '#'")
+ .unOrdered()
+ .baselineColumns("id")
+ .baselineValues("A_BC%D_")
+ .baselineValues("XYZA_BC%D_")
+ .baselineValues("$%XYZA_BC%D_")
+ .build()
+ .run();
+
+
+ // Escape character followed by escape character
+ testBuilder()
+ .sqlQuery(" SELECT id FROM (" +
+ "VALUES('A_'),('AB'),('ABC'), ('ABD'), ('A#BC%D_'),('A#BC%D_E')," +
+ "('A_BC%D_XYZ'),('ABABCD'),('$%XYZA#BC%D_'), ('')," +
+ "('abcd'), ('x'), ('%')) tbl(id)" +
+ " where id like '%A##BC#%D#_' escape '#'")
+ .unOrdered()
+ .baselineColumns("id")
+ .baselineValues("A#BC%D_")
+ .baselineValues("$%XYZA#BC%D_")
+ .build()
+ .run();
+
+ // non ASCII
+ testBuilder()
+ .sqlQuery(" SELECT id FROM (" +
+ "VALUES('ABC'), ('¤E_ÀsÆW°ê»Ú®i¶T¤¤¤ß3¼Ó®i¶TÆU2_~~')," +
+ " ('xyz'), ('%')) tbl(id)" +
+ " where id like '%2#_~~' escape '#'")
+ .unOrdered()
+ .baselineColumns("id")
+ .baselineValues("¤E_ÀsÆW°ê»Ú®i¶T¤¤¤ß3¼Ó®i¶TÆU2_~~")
+ .build()
+ .run();
+
+ testBuilder()
+ .sqlQuery(" SELECT id FROM (" +
+ "VALUES('ABC'), ('ABC¤EÀ%sÆW°ê»Ú®i¶T¤¤¤ß3¼Ó®i¶TÆU2~~ABC¤EÀ%sÆW'), " +
+ "('xyz'), ('%')) tbl(id)" +
+ " where id like '%ABC¤EÀ#%sÆW' escape '#'")
+ .unOrdered()
+ .baselineColumns("id")
+ .baselineValues("ABC¤EÀ%sÆW°ê»Ú®i¶T¤¤¤ß3¼Ó®i¶TÆU2~~ABC¤EÀ%sÆW")
+ .build()
+ .run();
+
+ testBuilder()
+ .sqlQuery(" SELECT id FROM (" +
+ "VALUES('AB%C'), " +
+ "('AB%C¤EÀsÆW°ê»Ú®i¶T¤¤¤ß3¼Ó®i¶TÆU2~~AB%C'), ('xyz'), ('%')) tbl(id)" +
+ " where id like '%AB#%C' escape '#'")
+ .unOrdered()
+ .baselineColumns("id")
+ .baselineValues("AB%C")
+ .baselineValues("AB%C¤EÀsÆW°ê»Ú®i¶T¤¤¤ß3¼Ó®i¶TÆU2~~AB%C")
+ .build()
+ .run();
+
+ // no match
+ testBuilder()
+ .sqlQuery(" SELECT id FROM (" +
+ "VALUES('ABC'), ('¤EÀsÆW°ê»Ú®i¶T¤¤¤ß3¼Ó®i¶TÆU2~~')," +
+ " ('xyz'), ('%')) tbl(id)" +
+ "where id like '%$%' escape '$'")
+ .unOrdered()
+ .baselineColumns("id")
+ .baselineValues("%")
+ .build()
+ .run();
+
+ }
+
+ @Test
+ public void testLikeWithEscapeContains() throws Exception {
+
+ // test EndsWith
+ testBuilder().sqlQuery(" SELECT id FROM (" +
+ "VALUES('A'),('AB'),('ABC'),('AB%C'),('ABCDE'),('AB%AB%CDED')," +
+ "('ABAB%CDE'),('ABC$XYZAB%C'), (''),('abcd'), ('x'), ('xyz'), ('%')) tbl(id)" +
+ "where id like '%AB$%C%' escape '$'")
+ .unOrdered()
+ .baselineColumns("id")
+ .baselineValues("ABAB%CDE")
+ .baselineValues("AB%AB%CDED")
+ .baselineValues("AB%C")
+ .baselineValues("ABC$XYZAB%C")
+ .build()
+ .run();
+
+
+ testBuilder()
+ .sqlQuery(" SELECT id FROM (" +
+ "VALUES('A'),('AB_'),('%AB%C%AB_'), ('%AB%D'), ('ABCD')," +
+ "('AB%AC%AB%DE'), ('AABCD'),('AB%AB%CD'),('ABC$XYZAB_')," +
+ "(''),('abcd'), ('x'), ('xyz'), ('%')) tbl(id)" +
+ "where id like '%#%AB#%%' escape '#'")
+ .unOrdered()
+ .baselineColumns("id")
+ .baselineValues("%AB%C%AB_")
+ .baselineValues("%AB%D")
+ .baselineValues("AB%AB%CD")
+ .baselineValues("AB%AC%AB%DE")
+ .build()
+ .run();
+
+ // no match
+ testBuilder()
+ .sqlQuery(" SELECT id FROM (" +
+ "VALUES('A'),('AB_'),('%AB%C%AB_'), ('%AB%D'), ('ABCD')," +
+ "('AB%AC%AB%DE'), ('AABCD'),('AB%AB%CD'),('ABC$XYZAB_')," +
+ "(''),('abcd'), ('x'), ('xyz'), ('%')) tbl(id)" +
+ "where id like '%#%A#_B#%%' escape '#'")
+ .unOrdered()
+ .baselineColumns("id")
+ .expectsEmptyResultSet()
+ .build()
+ .run();
+
+ testBuilder()
+ .sqlQuery(" SELECT id FROM (" +
+ "VALUES('A_'),('AB'),('ABC'), ('ABD'), ('A_BCD'),('ABA_CDEA_')," +
+ "('A_ABCD'),('ABABCDA_'),('A_BC$XYZA_'), ('')," +
+ "('abcd'), ('x'), ('xyz'), ('%')) tbl(id)" +
+ "where id like '%A#_%' escape '#'")
+ .unOrdered()
+ .baselineColumns("id")
+ .baselineValues(("A_"))
+ .baselineValues("A_BCD")
+ .baselineValues("ABA_CDEA_")
+ .baselineValues("A_ABCD")
+ .baselineValues("A_BC$XYZA_")
+ .baselineValues("ABABCDA_")
+ .build()
+ .run();
+
+
+ // Multiple escape characters
+ testBuilder()
+ .sqlQuery(" SELECT id FROM (" +
+ "VALUES('A_'),('AB'),('ABC'), ('ABD'), ('A_BC%D_'),('ABCDE')," +
+ "('XYZA_BC%D_'),('ABABCD'),('$%XYZA_BC%D_'), ('')," +
+ "('abcd'), ('x'), ('%')) tbl" +
+ "(id) where id like '%A#_BC#%D#_' escape '#'")
+ .unOrdered()
+ .baselineColumns("id")
+ .baselineValues("A_BC%D_")
+ .baselineValues("XYZA_BC%D_")
+ .baselineValues("$%XYZA_BC%D_")
+ .build()
+ .run();
+
+
+ // Escape character followed by escape character
+ testBuilder()
+ .sqlQuery(" SELECT id FROM (" +
+ "VALUES('A_'),('AB'),('ABC'), ('ABDA#BC%D_'), ('A#BC%D_')," +
+ "('A#BC%BC%D_E'),('A_BC%D_XYZ'),('ABABCD'),('$%XYZA#BC%D_')," +
+ " (''),('abcd'), ('x'), ('%')) tbl(id)" +
+ " where id like '%A##BC#%D#_%' escape '#'")
+ .unOrdered()
+ .baselineColumns("id")
+ .baselineValues("A#BC%D_")
+ .baselineValues("$%XYZA#BC%D_")
+ .baselineValues("ABDA#BC%D_")
+ .build()
+ .run();
+
+ }
+
+ @Test
+ public void testLikeWithEscapeConstant() throws Exception {
+
+ // test startsWith
+ testBuilder().sqlQuery(" SELECT id FROM (" +
+ "VALUES('A'),('AB'),('ABC%'),('ABCD'),('ABCDE'),('AABCD')," +
+ "('ABABCD'),('ABC$XYZ'), (''),('abcd'), ('x'), ('xyz'), ('%')) tbl(id) " +
+ "where id like 'ABC%%' escape '%' ")
+ .unOrdered()
+ .baselineColumns("id")
+ .baselineValues("ABC%")
+ .build()
+ .run();
+
+ testBuilder().sqlQuery(" SELECT id FROM (" +
+ "VALUES('A'),('AB'),('%ABC'),('ABCD'),('ABCDE'),('AABCD')," +
+ "('ABABCD'),('ABC$XYZ'), (''),('abcd'), ('x'), ('xyz'), ('%')) tbl(id) " +
+ "where id like '%%ABC' escape '%' ")
+ .unOrdered()
+ .baselineColumns("id")
+ .baselineValues("%ABC")
+ .build()
+ .run();
+
+ testBuilder().sqlQuery(" SELECT id FROM (" +
+ "VALUES('A'),('AB'),('AB%C'),('ABCD'),('ABCDE'),('AABCD'),('ABABCD')," +
+ "('ABC$XYZ'), (''),('abcd'), ('x'), ('xyz'), ('%')) tbl(id) " +
+ "where id like 'AB%%C' escape '%' ")
+ .unOrdered()
+ .baselineColumns("id")
+ .baselineValues("AB%C")
+ .build()
+ .run();
+
+ // Multiple escape characters
+ testBuilder()
+ .sqlQuery(" SELECT id FROM (" +
+ "VALUES('A_'),('AB'),('ABC'), ('ABD'), ('%_BC%D_'),('ABCDE')," +
+ "('XYZA_BC%D_'),('ABABCD'),('$%XYZA_BC%D_'), (''),('abcd'), ('x'), ('%')) tbl(id)" +
+ " where id like '%%_BC%%D%_' escape '%'")
+ .unOrdered()
+ .baselineColumns("id")
+ .baselineValues("%_BC%D_")
+ .build()
+ .run();
+
+ // Escape character followed by escape character
+ testBuilder()
+ .sqlQuery(" SELECT id FROM (" +
+ "VALUES('A_'),('AB'),('ABC'), ('ABDA#BC%D_'), ('A#BC%D_'),('A#BA#BC%D_E')," +
+ "('A_BC%D_XYZ'),('ABABCD'),('$%XYZA#BC%D_'), (''),('abcd'), ('x'), ('%')) tbl(id)" +
+ " where id like 'A##BC#%D#_' escape '#'")
+ .unOrdered()
+ .baselineColumns("id")
+ .baselineValues("A#BC%D_")
+ .build()
+ .run();
+
+ // no match
+ testBuilder().sqlQuery(" SELECT id FROM (" +
+ "VALUES('A'),('AB'),('ABC%'),('ABCD'),('ABCDE'),('AABCD')," +
+ "('ABABCD'),('ABC$XYZ'), (''),('abcd'), ('x'), ('xyz'), ('%')) tbl(id) " +
+ "where id like '%_ABC%%' escape '%' ")
+ .unOrdered()
+ .baselineColumns("id")
+ .expectsEmptyResultSet()
+ .build()
+ .run();
+
+ }
+
+ @Test
+ public void testLikeRandom() throws Exception {
+
+ // test Random queries with like
+ testBuilder()
+ .sqlQuery(" SELECT id FROM (" +
+ "VALUES('aeiou'),('abcdef'),('afdrgt'),('abcdt'),('aaaa'),('a'),('aeiou'),(''),('a aa')) tbl(id)" +
+ "where id not like 'a %'")
+ .unOrdered()
+ .baselineColumns("id")
+ .baselineValues("aeiou")
+ .baselineValues("abcdef")
+ .baselineValues("afdrgt")
+ .baselineValues("abcdt")
+ .baselineValues("aaaa")
+ .baselineValues("a")
+ .baselineValues("aeiou")
+ .baselineValues("")
+ .build()
+ .run();
+
+ testBuilder()
+ .sqlQuery(" SELECT id FROM (" +
+ "VALUES('aeiou'),('abcdefizu'),('afdrgt'),('abcdt'),('aaaa'),('a'),('aeiou'),(''),('a aa')) tbl(id)" +
+ "where id like 'a%i_u'")
+ .unOrdered()
+ .baselineColumns("id")
+ .baselineValues("aeiou")
+ .baselineValues("aeiou")
+ .baselineValues("abcdefizu")
+ .build()
+ .run();
+
+ testBuilder()
+ .sqlQuery(" SELECT id FROM (" +
+ "VALUES('xyzaeioughbcd'),('abcdefizu'),('afdrgt'),('abcdt'),('aaaa'),('a'),('aeiou'),(''),('a aa')) tbl(id)" +
+ "where id like '%a_i_u%bcd%'")
+ .unOrdered()
+ .baselineColumns("id")
+ .baselineValues("xyzaeioughbcd")
+ .build()
+ .run();
+
+ }
+
+ @Test
public void testILike() throws Exception {
testBuilder()
.sqlQuery("select n_name from cp.`tpch/nation.parquet` where ilike(n_name, '%united%') = true")
[2/2] drill git commit: DRILL-5697: Improve performance of filter
operator for pattern matching
Posted by pr...@apache.org.
DRILL-5697: Improve performance of filter operator for pattern matching
closes #907
Project: http://git-wip-us.apache.org/repos/asf/drill/repo
Commit: http://git-wip-us.apache.org/repos/asf/drill/commit/aaff1b35
Tree: http://git-wip-us.apache.org/repos/asf/drill/tree/aaff1b35
Diff: http://git-wip-us.apache.org/repos/asf/drill/diff/aaff1b35
Branch: refs/heads/master
Commit: aaff1b35b7339fb4e6ab480dd517994ff9f0a5c5
Parents: c7c8ffd
Author: Padma Penumarthy <pp...@yahoo.com>
Authored: Mon Aug 7 15:11:00 2017 -0700
Committer: Paul Rogers <pr...@maprtech.com>
Committed: Tue Sep 12 16:02:20 2017 -0700
----------------------------------------------------------------------
.../drill/exec/expr/fn/impl/RegexpUtil.java | 77 +-
.../expr/fn/impl/SqlPatternComplexMatcher.java | 36 +
.../expr/fn/impl/SqlPatternConstantMatcher.java | 52 +
.../expr/fn/impl/SqlPatternContainsMatcher.java | 53 +
.../expr/fn/impl/SqlPatternEndsWithMatcher.java | 49 +
.../exec/expr/fn/impl/SqlPatternFactory.java | 42 +
.../exec/expr/fn/impl/SqlPatternMatcher.java | 22 +
.../fn/impl/SqlPatternStartsWithMatcher.java | 48 +
.../exec/expr/fn/impl/StringFunctions.java | 34 +-
.../exec/store/ischema/InfoSchemaFilter.java | 4 +-
.../exec/expr/fn/impl/TestSqlPatterns.java | 674 +++++++++++++
.../exec/expr/fn/impl/TestStringFunctions.java | 961 +++++++++++++++++++
12 files changed, 2028 insertions(+), 24 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/drill/blob/aaff1b35/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/RegexpUtil.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/RegexpUtil.java b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/RegexpUtil.java
index 38c8316..aed718c 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/RegexpUtil.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/RegexpUtil.java
@@ -47,10 +47,49 @@ public class RegexpUtil {
"[:alnum:]", "\\p{Alnum}"
};
+ // type of pattern string.
+ public enum SqlPatternType {
+ STARTS_WITH, // Starts with a constant string followed by any string values (ABC%)
+ ENDS_WITH, // Ends with a constant string, starts with any string values (%ABC)
+ CONTAINS, // Contains a constant string, starts and ends with any string values (%ABC%)
+ CONSTANT, // Is a constant string (ABC)
+ COMPLEX // Not a simple pattern. Needs regex evaluation.
+ };
+
+ public static class SqlPatternInfo {
+ private final SqlPatternType patternType; // type of pattern
+
+ // simple pattern with like meta characters(% and _) and escape characters removed.
+ // Used for simple pattern matching.
+ private final String simplePatternString;
+
+ // javaPatternString used for regex pattern match.
+ private final String javaPatternString;
+
+ public SqlPatternInfo(final SqlPatternType patternType, final String javaPatternString, final String simplePatternString) {
+ this.patternType = patternType;
+ this.simplePatternString = simplePatternString;
+ this.javaPatternString = javaPatternString;
+ }
+
+ public SqlPatternType getPatternType() {
+ return patternType;
+ }
+
+ public String getSimplePatternString() {
+ return simplePatternString;
+ }
+
+ public String getJavaPatternString() {
+ return javaPatternString;
+ }
+
+ }
+
/**
* Translates a SQL LIKE pattern to Java regex pattern. No escape char.
*/
- public static String sqlToRegexLike(String sqlPattern) {
+ public static SqlPatternInfo sqlToRegexLike(String sqlPattern) {
return sqlToRegexLike(sqlPattern, (char)0);
}
@@ -58,7 +97,7 @@ public class RegexpUtil {
* Translates a SQL LIKE pattern to Java regex pattern, with optional
* escape string.
*/
- public static String sqlToRegexLike(
+ public static SqlPatternInfo sqlToRegexLike(
String sqlPattern,
CharSequence escapeStr) {
final char escapeChar;
@@ -76,12 +115,22 @@ public class RegexpUtil {
/**
* Translates a SQL LIKE pattern to Java regex pattern.
*/
- public static String sqlToRegexLike(
+ public static SqlPatternInfo sqlToRegexLike(
String sqlPattern,
char escapeChar) {
int i;
final int len = sqlPattern.length();
final StringBuilder javaPattern = new StringBuilder(len + len);
+ final StringBuilder simplePattern = new StringBuilder(len);
+
+ // Figure out the pattern type and build simplePatternString
+ // as we are going through the sql pattern string
+ // to build java regex pattern string. This is better instead of using
+ // regex later for determining if a pattern is simple or not.
+ // Saves CPU cycles.
+ // Start with patternType as CONSTANT
+ SqlPatternType patternType = SqlPatternType.CONSTANT;
+
for (i = 0; i < len; i++) {
char c = sqlPattern.charAt(i);
if (JAVA_REGEX_SPECIALS.indexOf(c) >= 0) {
@@ -96,20 +145,40 @@ public class RegexpUtil {
|| (nextChar == '%')
|| (nextChar == escapeChar)) {
javaPattern.append(nextChar);
+ simplePattern.append(nextChar);
i++;
} else {
throw invalidEscapeSequence(sqlPattern, i);
}
} else if (c == '_') {
+ // if we find _, it is not simple pattern, we are looking for only %
+ patternType = SqlPatternType.COMPLEX;
javaPattern.append('.');
} else if (c == '%') {
+ if (i == 0) {
+ // % at the start could potentially be one of the simple cases i.e. ENDS_WITH.
+ patternType = SqlPatternType.ENDS_WITH;
+ } else if (i == (len-1)) {
+ if (patternType == SqlPatternType.ENDS_WITH) {
+ // Starts and Ends with %. This is contains.
+ patternType = SqlPatternType.CONTAINS;
+ } else if (patternType == SqlPatternType.CONSTANT) {
+ // % at the end with constant string in the beginning i.e. STARTS_WITH
+ patternType = SqlPatternType.STARTS_WITH;
+ }
+ } else {
+ // If we find % anywhere other than start or end, it is not a simple case.
+ patternType = SqlPatternType.COMPLEX;
+ }
javaPattern.append(".");
javaPattern.append('*');
} else {
javaPattern.append(c);
+ simplePattern.append(c);
}
}
- return javaPattern.toString();
+
+ return new SqlPatternInfo(patternType, javaPattern.toString(), simplePattern.toString());
}
private static RuntimeException invalidEscapeCharacter(String s) {
http://git-wip-us.apache.org/repos/asf/drill/blob/aaff1b35/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/SqlPatternComplexMatcher.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/SqlPatternComplexMatcher.java b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/SqlPatternComplexMatcher.java
new file mode 100644
index 0000000..91cc85d
--- /dev/null
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/SqlPatternComplexMatcher.java
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.expr.fn.impl;
+
+public class SqlPatternComplexMatcher implements SqlPatternMatcher {
+ java.util.regex.Matcher matcher;
+ CharSequence charSequenceWrapper;
+
+ public SqlPatternComplexMatcher(String patternString, CharSequence charSequenceWrapper) {
+ this.charSequenceWrapper = charSequenceWrapper;
+ matcher = java.util.regex.Pattern.compile(patternString).matcher("");
+ matcher.reset(charSequenceWrapper);
+ }
+
+ @Override
+ public int match() {
+ matcher.reset();
+ return matcher.matches() ? 1 : 0;
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/drill/blob/aaff1b35/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/SqlPatternConstantMatcher.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/SqlPatternConstantMatcher.java b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/SqlPatternConstantMatcher.java
new file mode 100644
index 0000000..3294575
--- /dev/null
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/SqlPatternConstantMatcher.java
@@ -0,0 +1,52 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.expr.fn.impl;
+
+public class SqlPatternConstantMatcher implements SqlPatternMatcher {
+ final String patternString;
+ CharSequence charSequenceWrapper;
+ final int patternLength;
+
+ public SqlPatternConstantMatcher(String patternString, CharSequence charSequenceWrapper) {
+ this.patternString = patternString;
+ this.charSequenceWrapper = charSequenceWrapper;
+ patternLength = patternString.length();
+ }
+
+ @Override
+ public int match() {
+ int index = 0;
+
+ // If the lengths are not same, there cannot be a match
+ if (patternLength != charSequenceWrapper.length()) {
+ return 0;
+ }
+
+ // simplePattern string has meta characters i.e % and _ and escape characters removed.
+ // so, we can just directly compare.
+ while (index < patternLength) {
+ if (patternString.charAt(index) != charSequenceWrapper.charAt(index)) {
+ break;
+ }
+ index++;
+ }
+
+ return index == patternLength ? 1 : 0;
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/drill/blob/aaff1b35/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/SqlPatternContainsMatcher.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/SqlPatternContainsMatcher.java b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/SqlPatternContainsMatcher.java
new file mode 100644
index 0000000..2602dc8
--- /dev/null
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/SqlPatternContainsMatcher.java
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.expr.fn.impl;
+
+public class SqlPatternContainsMatcher implements SqlPatternMatcher {
+ final String patternString;
+ CharSequence charSequenceWrapper;
+ final int patternLength;
+
+ public SqlPatternContainsMatcher(String patternString, CharSequence charSequenceWrapper) {
+ this.patternString = patternString;
+ this.charSequenceWrapper = charSequenceWrapper;
+ patternLength = patternString.length();
+ }
+
+ @Override
+ public int match() {
+ final int txtLength = charSequenceWrapper.length();
+ int patternIndex = 0;
+ int txtIndex = 0;
+
+ // simplePattern string has meta characters i.e % and _ and escape characters removed.
+ // so, we can just directly compare.
+ while (patternIndex < patternLength && txtIndex < txtLength) {
+ if (patternString.charAt(patternIndex) != charSequenceWrapper.charAt(txtIndex)) {
+ // Go back if there is no match
+ txtIndex = txtIndex - patternIndex;
+ patternIndex = 0;
+ } else {
+ patternIndex++;
+ }
+ txtIndex++;
+ }
+
+ return patternIndex == patternLength ? 1 : 0;
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/drill/blob/aaff1b35/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/SqlPatternEndsWithMatcher.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/SqlPatternEndsWithMatcher.java b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/SqlPatternEndsWithMatcher.java
new file mode 100644
index 0000000..15fed22
--- /dev/null
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/SqlPatternEndsWithMatcher.java
@@ -0,0 +1,49 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.expr.fn.impl;
+
+public class SqlPatternEndsWithMatcher implements SqlPatternMatcher {
+ final String patternString;
+ CharSequence charSequenceWrapper;
+ final int patternLength;
+
+ public SqlPatternEndsWithMatcher(String patternString, CharSequence charSequenceWrapper) {
+ this.charSequenceWrapper = charSequenceWrapper;
+ this.patternString = patternString;
+ this.patternLength = patternString.length();
+ }
+
+ @Override
+ public int match() {
+ int txtIndex = charSequenceWrapper.length();
+ int patternIndex = patternLength;
+ boolean matchFound = true; // if pattern is empty string, we always match.
+
+ // simplePattern string has meta characters i.e % and _ and escape characters removed.
+ // so, we can just directly compare.
+ while (patternIndex > 0 && txtIndex > 0) {
+ if (charSequenceWrapper.charAt(--txtIndex) != patternString.charAt(--patternIndex)) {
+ matchFound = false;
+ break;
+ }
+ }
+
+ return (patternIndex == 0 && matchFound == true) ? 1 : 0;
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/drill/blob/aaff1b35/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/SqlPatternFactory.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/SqlPatternFactory.java b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/SqlPatternFactory.java
new file mode 100644
index 0000000..9c85a01
--- /dev/null
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/SqlPatternFactory.java
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.drill.exec.expr.fn.impl;
+
+public class SqlPatternFactory {
+ public static SqlPatternMatcher getSqlPatternMatcher(org.apache.drill.exec.expr.fn.impl.RegexpUtil.SqlPatternInfo patternInfo,
+ CharSequence charSequenceWrapper)
+ {
+ switch (patternInfo.getPatternType()) {
+ case COMPLEX:
+ return new SqlPatternComplexMatcher(patternInfo.getJavaPatternString(), charSequenceWrapper);
+ case STARTS_WITH:
+ return new SqlPatternStartsWithMatcher(patternInfo.getSimplePatternString(), charSequenceWrapper);
+ case CONSTANT:
+ return new SqlPatternConstantMatcher(patternInfo.getSimplePatternString(), charSequenceWrapper);
+ case ENDS_WITH:
+ return new SqlPatternEndsWithMatcher(patternInfo.getSimplePatternString(), charSequenceWrapper);
+ case CONTAINS:
+ return new SqlPatternContainsMatcher(patternInfo.getSimplePatternString(), charSequenceWrapper);
+ default:
+ break;
+ }
+
+ return null;
+ }
+}
http://git-wip-us.apache.org/repos/asf/drill/blob/aaff1b35/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/SqlPatternMatcher.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/SqlPatternMatcher.java b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/SqlPatternMatcher.java
new file mode 100644
index 0000000..9c0c6e2
--- /dev/null
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/SqlPatternMatcher.java
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.expr.fn.impl;
+
+public interface SqlPatternMatcher {
+ public int match();
+}
http://git-wip-us.apache.org/repos/asf/drill/blob/aaff1b35/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/SqlPatternStartsWithMatcher.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/SqlPatternStartsWithMatcher.java b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/SqlPatternStartsWithMatcher.java
new file mode 100644
index 0000000..9faae8a
--- /dev/null
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/SqlPatternStartsWithMatcher.java
@@ -0,0 +1,48 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.expr.fn.impl;
+
+public class SqlPatternStartsWithMatcher implements SqlPatternMatcher {
+ final String patternString;
+ CharSequence charSequenceWrapper;
+ final int patternLength;
+
+ public SqlPatternStartsWithMatcher(String patternString, CharSequence charSequenceWrapper) {
+ this.charSequenceWrapper = charSequenceWrapper;
+ this.patternString = patternString;
+ patternLength = patternString.length();
+ }
+
+ @Override
+ public int match() {
+ int index = 0;
+ final int txtLength = charSequenceWrapper.length();
+
+ // simplePattern string has meta characters i.e % and _ and escape characters removed.
+ // so, we can just directly compare.
+ while (index < patternLength && index < txtLength) {
+ if (patternString.charAt(index) != charSequenceWrapper.charAt(index)) {
+ break;
+ }
+ index++;
+ }
+
+ return (index == patternLength ? 1 : 0);
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/drill/blob/aaff1b35/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/StringFunctions.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/StringFunctions.java b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/StringFunctions.java
index e5fe957..2a99ffa 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/StringFunctions.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/StringFunctions.java
@@ -55,24 +55,23 @@ public class StringFunctions{
@Param VarCharHolder input;
@Param(constant=true) VarCharHolder pattern;
@Output BitHolder out;
- @Workspace java.util.regex.Matcher matcher;
@Workspace org.apache.drill.exec.expr.fn.impl.CharSequenceWrapper charSequenceWrapper;
+ @Workspace org.apache.drill.exec.expr.fn.impl.RegexpUtil.SqlPatternInfo sqlPatternInfo;
+ @Workspace org.apache.drill.exec.expr.fn.impl.SqlPatternMatcher sqlPatternMatcher;
@Override
public void setup() {
- matcher = java.util.regex.Pattern.compile(org.apache.drill.exec.expr.fn.impl.RegexpUtil.sqlToRegexLike( //
- org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(pattern.start, pattern.end, pattern.buffer))).matcher("");
+ sqlPatternInfo = org.apache.drill.exec.expr.fn.impl.RegexpUtil.sqlToRegexLike(
+ org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(pattern.start, pattern.end, pattern.buffer));
charSequenceWrapper = new org.apache.drill.exec.expr.fn.impl.CharSequenceWrapper();
- matcher.reset(charSequenceWrapper);
+ sqlPatternMatcher = org.apache.drill.exec.expr.fn.impl.SqlPatternFactory.getSqlPatternMatcher(sqlPatternInfo, charSequenceWrapper);
}
@Override
public void eval() {
- charSequenceWrapper.setBuffer(input.start, input.end, input.buffer);
// Reusing same charSequenceWrapper, no need to pass it in.
- // This saves one method call since reset(CharSequence) calls reset()
- matcher.reset();
- out.value = matcher.matches()? 1:0;
+ charSequenceWrapper.setBuffer(input.start, input.end, input.buffer);
+ out.value = sqlPatternMatcher.match();
}
}
@@ -83,25 +82,24 @@ public class StringFunctions{
@Param(constant=true) VarCharHolder pattern;
@Param(constant=true) VarCharHolder escape;
@Output BitHolder out;
- @Workspace java.util.regex.Matcher matcher;
@Workspace org.apache.drill.exec.expr.fn.impl.CharSequenceWrapper charSequenceWrapper;
+ @Workspace org.apache.drill.exec.expr.fn.impl.RegexpUtil.SqlPatternInfo sqlPatternInfo;
+ @Workspace org.apache.drill.exec.expr.fn.impl.SqlPatternMatcher sqlPatternMatcher;
@Override
public void setup() {
- matcher = java.util.regex.Pattern.compile(org.apache.drill.exec.expr.fn.impl.RegexpUtil.sqlToRegexLike( //
+ sqlPatternInfo = org.apache.drill.exec.expr.fn.impl.RegexpUtil.sqlToRegexLike(
org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(pattern.start, pattern.end, pattern.buffer),
- org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(escape.start, escape.end, escape.buffer))).matcher("");
+ org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(escape.start, escape.end, escape.buffer));
charSequenceWrapper = new org.apache.drill.exec.expr.fn.impl.CharSequenceWrapper();
- matcher.reset(charSequenceWrapper);
+ sqlPatternMatcher = org.apache.drill.exec.expr.fn.impl.SqlPatternFactory.getSqlPatternMatcher(sqlPatternInfo, charSequenceWrapper);
}
@Override
public void eval() {
- charSequenceWrapper.setBuffer(input.start, input.end, input.buffer);
// Reusing same charSequenceWrapper, no need to pass it in.
- // This saves one method call since reset(CharSequence) calls reset()
- matcher.reset();
- out.value = matcher.matches()? 1:0;
+ charSequenceWrapper.setBuffer(input.start, input.end, input.buffer);
+ out.value = sqlPatternMatcher.match();
}
}
@@ -117,7 +115,7 @@ public class StringFunctions{
@Override
public void setup() {
matcher = java.util.regex.Pattern.compile(org.apache.drill.exec.expr.fn.impl.RegexpUtil.sqlToRegexLike( //
- org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(pattern.start, pattern.end, pattern.buffer)),
+ org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(pattern.start, pattern.end, pattern.buffer)).getJavaPatternString(),
java.util.regex.Pattern.CASE_INSENSITIVE).matcher("");
charSequenceWrapper = new org.apache.drill.exec.expr.fn.impl.CharSequenceWrapper();
matcher.reset(charSequenceWrapper);
@@ -147,7 +145,7 @@ public class StringFunctions{
public void setup() {
matcher = java.util.regex.Pattern.compile(org.apache.drill.exec.expr.fn.impl.RegexpUtil.sqlToRegexLike( //
org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(pattern.start, pattern.end, pattern.buffer),
- org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(escape.start, escape.end, escape.buffer)),
+ org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(escape.start, escape.end, escape.buffer)).getJavaPatternString(),
java.util.regex.Pattern.CASE_INSENSITIVE).matcher("");
charSequenceWrapper = new org.apache.drill.exec.expr.fn.impl.CharSequenceWrapper();
matcher.reset(charSequenceWrapper);
http://git-wip-us.apache.org/repos/asf/drill/blob/aaff1b35/exec/java-exec/src/main/java/org/apache/drill/exec/store/ischema/InfoSchemaFilter.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/ischema/InfoSchemaFilter.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/ischema/InfoSchemaFilter.java
index 22ad7f6..6d4d8ce 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/ischema/InfoSchemaFilter.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/ischema/InfoSchemaFilter.java
@@ -152,10 +152,10 @@ public class InfoSchemaFilter {
final String fieldValue = recordValues.get(col.field.toString());
if (fieldValue != null) {
if (escape == null) {
- return Pattern.matches(sqlToRegexLike(pattern.value), fieldValue) ?
+ return Pattern.matches(sqlToRegexLike(pattern.value).getJavaPatternString(), fieldValue) ?
Result.TRUE : Result.FALSE;
} else {
- return Pattern.matches(sqlToRegexLike(pattern.value, escape.value), fieldValue) ?
+ return Pattern.matches(sqlToRegexLike(pattern.value, escape.value).getJavaPatternString(), fieldValue) ?
Result.TRUE : Result.FALSE;
}
}
http://git-wip-us.apache.org/repos/asf/drill/blob/aaff1b35/exec/java-exec/src/test/java/org/apache/drill/exec/expr/fn/impl/TestSqlPatterns.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/expr/fn/impl/TestSqlPatterns.java b/exec/java-exec/src/test/java/org/apache/drill/exec/expr/fn/impl/TestSqlPatterns.java
new file mode 100644
index 0000000..d8c1410
--- /dev/null
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/expr/fn/impl/TestSqlPatterns.java
@@ -0,0 +1,674 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.drill.exec.expr.fn.impl;
+import org.junit.Test;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+public class TestSqlPatterns {
+
+ @Test
+ public void testSqlRegexLike() {
+ // Given SQL like pattern, verify patternType is correct.
+ // Java pattern should have % replaced with .*, _ replaced with .
+ // Simple pattern should have meta (% and _) and escape characters removed.
+
+ // A%B is complex
+ RegexpUtil.SqlPatternInfo patternInfo = RegexpUtil.sqlToRegexLike("A%B");
+ assertEquals("A.*B", patternInfo.getJavaPatternString());
+ assertEquals(RegexpUtil.SqlPatternType.COMPLEX, patternInfo.getPatternType());
+
+ // A_B is complex
+ patternInfo = RegexpUtil.sqlToRegexLike("A_B");
+ assertEquals("A.B", patternInfo.getJavaPatternString());
+ assertEquals(RegexpUtil.SqlPatternType.COMPLEX, patternInfo.getPatternType());
+
+ // A%B%D is complex
+ patternInfo = RegexpUtil.sqlToRegexLike("A%B%D");
+ assertEquals("A.*B.*D", patternInfo.getJavaPatternString());
+ assertEquals(RegexpUtil.SqlPatternType.COMPLEX, patternInfo.getPatternType());
+
+ // %AB% is contains
+ patternInfo = RegexpUtil.sqlToRegexLike("%AB%");
+ assertEquals(".*AB.*", patternInfo.getJavaPatternString());
+ assertEquals("AB", patternInfo.getSimplePatternString());
+ assertEquals(RegexpUtil.SqlPatternType.CONTAINS, patternInfo.getPatternType());
+
+ // %AB is ends with
+ patternInfo = RegexpUtil.sqlToRegexLike("%AB");
+ assertEquals(".*AB", patternInfo.getJavaPatternString());
+ assertEquals("AB", patternInfo.getSimplePatternString());
+ assertEquals(RegexpUtil.SqlPatternType.ENDS_WITH, patternInfo.getPatternType());
+
+ // AB% is starts with
+ patternInfo = RegexpUtil.sqlToRegexLike("AB%");
+ assertEquals("AB.*", patternInfo.getJavaPatternString());
+ assertEquals("AB", patternInfo.getSimplePatternString());
+ assertEquals(RegexpUtil.SqlPatternType.STARTS_WITH, patternInfo.getPatternType());
+
+ // AB is constant.
+ patternInfo = RegexpUtil.sqlToRegexLike("AB");
+ assertEquals("AB", patternInfo.getJavaPatternString());
+ assertEquals("AB", patternInfo.getSimplePatternString());
+ assertEquals(RegexpUtil.SqlPatternType.CONSTANT, patternInfo.getPatternType());
+
+ // Test with escape characters.
+
+ // A%#B is invalid escape sequence
+ try {
+ patternInfo = RegexpUtil.sqlToRegexLike("A%#B", '#');
+ } catch (Exception ex) {
+ assertTrue(ex.getMessage().contains("Invalid escape sequence"));
+ }
+
+ // A#%B with # as escape character is constant A%B
+ patternInfo = RegexpUtil.sqlToRegexLike("A#%B", '#');
+ assertEquals("A%B", patternInfo.getJavaPatternString());
+ assertEquals("A%B", patternInfo.getSimplePatternString());
+ assertEquals(RegexpUtil.SqlPatternType.CONSTANT, patternInfo.getPatternType());
+
+ // %A#%B% is contains A%B
+ patternInfo = RegexpUtil.sqlToRegexLike("%A#%B%", '#');
+ assertEquals(".*A%B.*", patternInfo.getJavaPatternString());
+ assertEquals("A%B", patternInfo.getSimplePatternString());
+ assertEquals(RegexpUtil.SqlPatternType.CONTAINS, patternInfo.getPatternType());
+
+ // #%AB% is starts with %AB
+ patternInfo = RegexpUtil.sqlToRegexLike("#%AB%", '#');
+ assertEquals("%AB.*", patternInfo.getJavaPatternString());
+ assertEquals("%AB", patternInfo.getSimplePatternString());
+ assertEquals(RegexpUtil.SqlPatternType.STARTS_WITH, patternInfo.getPatternType());
+
+ // %#%AB#% is ends with %AB%
+ patternInfo = RegexpUtil.sqlToRegexLike("%#%AB#%", '#');
+ assertEquals(".*%AB%", patternInfo.getJavaPatternString());
+ assertEquals("%AB%", patternInfo.getSimplePatternString());
+ assertEquals(RegexpUtil.SqlPatternType.ENDS_WITH, patternInfo.getPatternType());
+
+ // #_A#%B%C is complex
+ patternInfo = RegexpUtil.sqlToRegexLike("#_A#%B%C", '#');
+ assertEquals("_A%B.*C", patternInfo.getJavaPatternString());
+ assertEquals(RegexpUtil.SqlPatternType.COMPLEX, patternInfo.getPatternType());
+
+ }
+
+ @Test
+ public void testSqlPatternStartsWith() {
+ RegexpUtil.SqlPatternInfo patternInfo = new RegexpUtil.SqlPatternInfo(RegexpUtil.SqlPatternType.STARTS_WITH,"", "ABC");
+
+ StringBuffer sb = new StringBuffer("ABCD");
+ SqlPatternMatcher sqlPatternStartsWith = SqlPatternFactory.getSqlPatternMatcher(patternInfo, sb);
+ assertEquals(sqlPatternStartsWith.match(), 1); // ABCD should match StartsWith ABC
+
+ sb.setLength(0);
+ sb.append("BCD");
+ assertEquals(sqlPatternStartsWith.match(), 0); // BCD should not match StartsWith ABC
+
+ sb.setLength(0);
+ sb.append("XYZABC");
+ assertEquals(sqlPatternStartsWith.match(), 0); // XYZABC should not match StartsWith ABC
+
+ // null text
+ sb.setLength(0);
+ assertEquals(sqlPatternStartsWith.match(), 0); // null String should not match StartsWith ABC
+
+ // pattern length > txt length
+ sb.append("AB");
+ assertEquals(sqlPatternStartsWith.match(), 0); // AB should not match StartsWith ABC
+
+ // startsWith null pattern should match anything
+ patternInfo = new RegexpUtil.SqlPatternInfo(RegexpUtil.SqlPatternType.STARTS_WITH,"", "");
+ sqlPatternStartsWith = SqlPatternFactory.getSqlPatternMatcher(patternInfo, sb);
+ assertEquals(sqlPatternStartsWith.match(), 1); // AB should match StartsWith null pattern
+
+ // null pattern and null text
+ sb.setLength(0);
+ assertEquals(sqlPatternStartsWith.match(), 1); // null text should match null pattern
+
+ // wide character string.
+ sb.append("b00dUrA0oa2i4ZEHg6zvPXPXlVQYB2BXe8T5gIEtvUDzcN6yUkIqyS07gaAy8k4" +
+ "ac6Bn1cxblsXFnkp8g8hiQkUMJPyl6l0jTdsIzQ4PkVCURGGyF0aduGqCXUaKp91gqkRMvL" +
+ "g1Lh6u0NrGCBoJajPxnwZCyh58cN5aFiNscBFKIqqLPTS1vnbR39nmzU88FM8qDepJRhvein" +
+ "hHhmrHdEb22QN20dXEHSygR7vrb2zZhhfWeJbXRsesuYDqdGig801IAS6VWRIdQtJ6gaRhCdNz" +
+ " DWnQWRXlMhcrR4MKJXeBgDtjzbHd0ZS53K8u8ORl6FKxtvdKmwUuHiuMJrQQm6Rgx6WJrAtvTf" +
+ "UE8a5I3nYXdRppnm3MbRsLu4IxXIblh8kmAIG6n2yHwGhpWYkRI7cwl4dOB3bsxxtdaaTlZMMx6T" +
+ "XPaUK10UzfZCAkWG9Du3QhJxxJBZaP3HPebXmw1l5swPohmG3L6zOcEWp7f" +
+ "saldC7TOrFa3ReYFHooclSGTgZ9sWjJ5SYJ0vEkI1RMWoeGcdJq5v4lrcB6YjrMqQJIaxAdRnIaNG" +
+ "V6oR9SkI4diiXspIvRWj6PMkpqI02ovI3va49bHauTrqTyM9eIhS" +
+ "0Mc3SHzknQwHJAFkqmhV9Lm2VLULou2iJDvc5sWW8W48IODGqGytqLogA01Cuo3gURmH2057nCld9" +
+ "PDHQEieFMddi4gKPOv4es1YX2aBo4RfYiTlUyXd6gGujVPgU2j" +
+ "AAhcz6JqVC08O73gM9zOAM2l4PwN2TN3lBufkQUGyOzHtoTDjSdQ2DPXIks9A6ehIpn92n1UtdrJeMz" +
+ "4oMN4kwP95YjQk1ko2e3DVAiPVlCiaWqnzXKa41kLVs3KiBhfAff5" +
+ "hoTnBGn9CaXed6g6kLs2YBTQYM9yLW9Wb5qNhLeCM4GGJM8dUWqqEsWYPrcPAkCMa6LXfgEcsCwQ6ij" +
+ "JhhjcxwoafBRyyEvQ6Pfhg8IqJ0afBpAZHhR2y4I11zbaJZqs3WG3H3aQHT" +
+ "wcPHdBHnk65GdL3Njuoo0K4mcmN6lk7pWptHwTjkw59zTw834PZ8TWm5XiUnsi9JKy41MPqHcbO0nN" +
+ "SYl9Q6kEjv4nt8p9unhUYqgrGvLl42nvqGb1F47f6PvxkewuouxMFAszYhaMjZzIf5" +
+ "AgmvaXbSP9MKYu6EkkvM9CIhYGZuq7PJUk6wmoG6IxIfOokUcnrGzuU9INFUuXf4LptQ987GU3hw0d" +
+ "yMNf6nncwABOOoC5EnqYBNoq29Mf54H5k2Xi8y1fh8ldtKcW9T4WsaXun9fKofegfhwY8wgfoG" +
+ "eW2YNW3fdalIsggRzMEAXVDxj7oieReUGiT53uV2kcmcQRQLdUDUcOC1JEiSRpgZl38c1DDVRlz8Rbhi" +
+ "KUxMqNCPx6PABXCPocpfXJa0yBT0l3ssgMlDfKsxAHX6aEC86zk0CDmTqZPmBjLAoYaHA3" +
+ "uGqoARbQ6rhIBHOdkb7PoRImjmF4sQ60TBIWdao9dqLMjslhOQrGQlPIniW5I1V9nisc5lV0jEqeaC3y" +
+ "lSnjhieVJ7H0FYjcsihjQryhyRwUZBGxWFuh0hI9rOv8h5jHKb549hOHPcIjSdLa6M048G" +
+ "9drX0LNEixfp7WUqq2DyRfBioybmoHVzFWzhXrMJXzwHakzLwb4T2BHcLK6VpC4b2GodYlZe43ggxTNUErif" +
+ "NEfEfxZhDj6HBMYobKvn4ofOsyKPGn6NXnCqIbCCvqOyBikxAYukgCmWHRJRGX4RjNbL" +
+ "BVjY5eoXJB7xisnrqOieXuEnZ9n7rnK8qM4RuOSA8EaDd5n58JU9SUUNRqpZZgK2nPy9Pv90ORiGr1Y30rZS" +
+ "bKT7SucjEZJ00WBF9FlJp6v8OcVvMBjRriaYYjVlOiLvVDQQ2NvYfbv5bLbEhkrJi5Nlg" +
+ "3Tq5jsgSTEBqSKTD5UIukFP194LvVMQIOQ9YM7m9iZHMpCCoIL99FJLsNmzRDVETCjyFoXxSputp6ufupS1n" +
+ "1SHRVlXm7Bx3bjJ79O3bGqjzxT1EZV39isegIyKx2H0zEUpnlXzzbusS0tusECmG3C3eGDOTs" +
+ "FZbYTp5ZxtXCrudDSX3kaeLtCstfqAHGsjHkPd87aSNaJJjPaSaMmGo7zTJGUIX1VCA2KJP37USIAa5NGHtM" +
+ "ChmtfO8kmrO9PZl6Ld18Yi7OlBsEUkMQE0yKwtSpkTK76XS5CG8S7S2S07vtYaBJJ9Bvuzr0F" +
+ "tLsQ1gYWPF1geDalS5MdWfpDvF5MaeJMd2fK0m3jui7xY1IfuSxqZs7SEL6wUVGdWc5tsVroCMMy6Nqjdz5T4vW" +
+ "zdSmpjrFnnB8edB5AOekeHua16I9qcNHuCcOgeYZIc6GzG0O1XAcQu6cEi1ZivUPoYf2sKr4uPvcD" +
+ "gnaIN1KmhwSmxPgkErJVroPAUO18E2apxRlmZkhS6CInyzcLkvycSDCGtFaAZBO3QDO5nmvPFgVxfSbwG8BhhY" +
+ "cWXqwnsbEEejtlXH3Zr5BtxTzd3Bo08s8HxjIXF6Z0CPXcvQzDoemL8M2A1AIrnBkT7vIHgvMuH475M" +
+ "TXIR4K0njrS4X4KrBQFxvuZey8tnUnm8oiJWdUFzdM4N0KioJsG8UzxRODxKh4e3GqxmZxsSwwL0nNnV1syiCCC" +
+ "zTgrtT6fcxpAfcFeTct7FNd4BjzbNCgBrSspzhxnEFMZXuqBGaOS9d9qcuUulwF0lAWGBauWI57qyjXfQnQ" +
+ "i6Sy6nXOcUIOZWJ9BVJf4A27Pa4Pi7ZFznFnIdiQOrxCbb2ZCVkCftWsmcEMnXWXUkGOuA5JXo9YvGyPGq2wgO1wj" +
+ "qAKyqxhBVOL48L2D0PYU16Ursxe0ckoBYXJheQi2d1eIa0pTD78325f8jCHclqINvuvj0GZfJENlc1e" +
+ "ULPRd358aPnsx2DOmN1UojjBI1hacijCtFCE8zGCa9M0L7aZbRUHe8lmlaqhx0Su6nPnPgfbJr6idfxTJHqCT4t8" +
+ "4BfZeqRZ5rgIS15Z7HFYSCPZixMPf683GQoQEFWIM0EqNTJmoHW3K7jDHOUpVutyyWt5VO5ray6rBrq1nAF" +
+ "QEN59RqxM04eXxAOBWnPB17TdvDmyXuXDpjnjXReJLNqJVgB2VFPxsqhQWQupAtjBGvffU7exZMM92fiYdBArV" +
+ "4SE1mBFewTNRz4PmwFVmUoxWj74rzZQuDMhAlx3jBXcaX8eD7PlaADdiMT1mF3faVyScA6bHbV2jU79XvppOfoD" +
+ "YtBFj3a5LtAhTy5BnN2v1XlTQtk6MZ0Ej6g7sW96w9n2XV8wqdWGgjeKHaqH7Pn1XFw7IHvpVYK4wFvIGubp4bpms" +
+ "C3ARq1Gqq8zvDQtoLZSZYOvXCZOIElGZLscqjbRckX5aRhTJX6CxjVcT7S3TScnCbqNdfqMpEsNl2GY3fprQF" +
+ "CTtiZv12uCj0WILSesMc5ct2tQcIvwnOHAuE6fw7lD8EgQ0emU4zxUIDowhTvJ46k27rXTctIX7HlBEZXInV9r49" +
+ "VbJdA3des3ZqGPbBYXTwQcns1jJTmnIf1S0jLWN0Wgk9bH5gkdhl53l2yc1AlZCyJdm9vktH5sctTDdMZrDPPHNUG2" +
+ "pTBg4DDR9Zc6YvkrO4f5O3mfOl441bJkmOSNwoOc3krHTQlN6SBGLEptT4m7MFwqVyrbsEXHegwa53aN4W0J7qwV0" +
+ "EMN2VHLtoHQDfXVOVDXnE1rK3cDJRMhCIvIRmywkA5T9GchtDVfek2qZq1H5wfe92RoXBseAuMoWtTCJiXOJraCxmj" +
+ "cluokF3eK0NpycncoQcObLiS1rield0fdx8UJhsV9QnNtok5a0f4L1MKtjnYJmvItSqn3Lo2VkWagxGSEJzKnK2gO3pH" +
+ "Whlarr6bRQeIwCXckALEVdGZBTPiqjYPBfk5H5wYXqkieh04tjSmnWytNebBNmGjTNgrqNVO7ftCbhh7wICOn" +
+ "lpSMt6BoFvjHYW1IpEyTlVlvNl5NzPPAn2119ttZTfXpifXfQtBGzlCNYTD6m1FvpmOydzqEq8YadgybW76HDtnBdU" +
+ "M1djhNcHfR12NkPc7UIvVJDiTTJ440pU1tqYISyEVr5QZBrhOP2y6RsZnlJy7Mqh56Jw0fJkbI2yQaoc7Jh2Wsh7" +
+ "R58SXBXsalwNM9TmTeBMrc8Hghx9hDpai8agUclHTCoyK2hkEpKLlEJiXUKOE8JPugYE8yFVYF49UAjJUbsj6we3Ocii" +
+ "FXs6oXGymttSxcRksGdfUaIonkrqniea31SgiGmhCjKi0x5ZDNFS26CqSEU0FKiLJyhui8HOJCddX64Ers0VTMHppS" +
+ "ydpQX7PndzDuhT7k8Wj2kGJvKCqzVxTGCssDHoedKmMULEjUqU2EcjT5VOaCFeHKUXyP1B7qfYPtKLcgXHH5bmSgRs8gY" +
+ "2JkPOST2Vr35mNKoulUMqFeo0s1y5hcVY39a3mBMytwZn7HgPhEJScwZdWJd6E5tZ13evEmcn1A5YPBYbm91CdJFXhj" +
+ "iuqmJS71Xq4j56K35TmCJCb4jAAbcGTGEHzcCP1HKVFfsNnLqwflvHwMYQMA3EumrMn1nXnETZFdZJRHlnO8dwgnT" +
+ "ehbB2XtrpErgaFbEWfWEinoiMd4Vs7kgHzs8UiuagYyyCxmg5gEvza3CXzjUnG2lfjI6ox6EYPgXvRySHmL" +
+ "atXzj4x3CgF6j1gn10aUJknF7KQLJ84DIA5fy33YaLLbeOoGJHsdr9rQZCjaIqZKH870sslgm0tnGw5yOddnj" +
+ "FDI2KwL6UVGr3YExI1p5sGaY0Su4G30PMJsOX9ZWvRF72Lk0pVMnjVugkzsnQrbyGezZ8WN8y8kOvrysQuhTt5" +
+ "AFyMJ4kLsONE52kZsJYYyDpWw9a8BZ");
+
+ patternInfo = new RegexpUtil.SqlPatternInfo(RegexpUtil.SqlPatternType.STARTS_WITH,"",
+ "b00dUrA0oa2i4ZEHg6zvPXPXlVQYB2BXe8T5gIEtvUDzcN6yUkIqyS07gaAy8k4");
+ sqlPatternStartsWith = SqlPatternFactory.getSqlPatternMatcher(patternInfo, sb);
+ assertEquals(sqlPatternStartsWith.match(), 1); // should match
+
+ patternInfo = new RegexpUtil.SqlPatternInfo(RegexpUtil.SqlPatternType.STARTS_WITH,"",
+ "AFyMJ4kLsONE52kZsJYYyDpWw9a8BZ");
+ sqlPatternStartsWith = SqlPatternFactory.getSqlPatternMatcher(patternInfo, sb);
+ assertEquals(sqlPatternStartsWith.match(), 0); // should not match
+
+ // non ascii
+ sb.setLength(0);
+ sb.append("¤EÀsÆW°ê»Ú®i¶T¤¤¤ß3¼Ó®i¶TÆU2~~");
+
+ assertEquals(sqlPatternStartsWith.match(), 0); // should not match
+
+ patternInfo = new RegexpUtil.SqlPatternInfo(RegexpUtil.SqlPatternType.STARTS_WITH,"", "¤EÀsÆW");
+ sqlPatternStartsWith = SqlPatternFactory.getSqlPatternMatcher(patternInfo, sb);
+ assertEquals(sqlPatternStartsWith.match(), 1); // should match
+
+ }
+
+ @Test
+ public void testSqlPatternEndsWith() {
+ RegexpUtil.SqlPatternInfo patternInfo = new RegexpUtil.SqlPatternInfo(RegexpUtil.SqlPatternType.ENDS_WITH, "", "BCD");
+ StringBuffer sb = new StringBuffer("ABCD");
+ SqlPatternMatcher sqlPatternEndsWith = SqlPatternFactory.getSqlPatternMatcher(patternInfo, sb);
+
+ assertEquals(sqlPatternEndsWith.match(), 1); // ABCD should match EndsWith BCD
+
+ sb.setLength(0);
+ sb.append("ABC");
+ assertEquals(sqlPatternEndsWith.match(), 0); // ABC should not match EndsWith BCD
+
+ sb.setLength(0);
+ assertEquals(sqlPatternEndsWith.match(), 0); // null string should not match EndsWith BCD
+
+ sb.append("A");
+ assertEquals(sqlPatternEndsWith.match(), 0); // ABCD should not match EndsWith A
+
+ sb.setLength(0);
+ sb.append("XYZBCD");
+ assertEquals(sqlPatternEndsWith.match(), 1); // XYZBCD should match EndsWith BCD
+
+ // EndsWith null pattern should match anything
+ patternInfo = new RegexpUtil.SqlPatternInfo(RegexpUtil.SqlPatternType.ENDS_WITH,"", "");
+ sqlPatternEndsWith = SqlPatternFactory.getSqlPatternMatcher(patternInfo, sb);
+ assertEquals(sqlPatternEndsWith.match(), 1); // AB should match StartsWith null pattern
+
+ // null pattern and null text
+ sb.setLength(0);
+ assertEquals(sqlPatternEndsWith.match(), 1); // null text should match null pattern
+
+ // wide character string.
+ sb.append("b00dUrA0oa2i4ZEHg6zvPXPXlVQYB2BXe8T5gIEtvUDzcN6yUkIqyS07gaAy8k4" +
+ "ac6Bn1cxblsXFnkp8g8hiQkUMJPyl6l0jTdsIzQ4PkVCURGGyF0aduGqCXUaKp91gqkRMvL" +
+ "g1Lh6u0NrGCBoJajPxnwZCyh58cN5aFiNscBFKIqqLPTS1vnbR39nmzU88FM8qDepJRhvein" +
+ "hHhmrHdEb22QN20dXEHSygR7vrb2zZhhfWeJbXRsesuYDqdGig801IAS6VWRIdQtJ6gaRhCdNz" +
+ " DWnQWRXlMhcrR4MKJXeBgDtjzbHd0ZS53K8u8ORl6FKxtvdKmwUuHiuMJrQQm6Rgx6WJrAtvTf" +
+ "UE8a5I3nYXdRppnm3MbRsLu4IxXIblh8kmAIG6n2yHwGhpWYkRI7cwl4dOB3bsxxtdaaTlZMMx6T" +
+ "XPaUK10UzfZCAkWG9Du3QhJxxJBZaP3HPebXmw1l5swPohmG3L6zOcEWp7f" +
+ "saldC7TOrFa3ReYFHooclSGTgZ9sWjJ5SYJ0vEkI1RMWoeGcdJq5v4lrcB6YjrMqQJIaxAdRnIaNG" +
+ "V6oR9SkI4diiXspIvRWj6PMkpqI02ovI3va49bHauTrqTyM9eIhS" +
+ "0Mc3SHzknQwHJAFkqmhV9Lm2VLULou2iJDvc5sWW8W48IODGqGytqLogA01Cuo3gURmH2057nCld9" +
+ "PDHQEieFMddi4gKPOv4es1YX2aBo4RfYiTlUyXd6gGujVPgU2j" +
+ "AAhcz6JqVC08O73gM9zOAM2l4PwN2TN3lBufkQUGyOzHtoTDjSdQ2DPXIks9A6ehIpn92n1UtdrJeMz" +
+ "4oMN4kwP95YjQk1ko2e3DVAiPVlCiaWqnzXKa41kLVs3KiBhfAff5" +
+ "hoTnBGn9CaXed6g6kLs2YBTQYM9yLW9Wb5qNhLeCM4GGJM8dUWqqEsWYPrcPAkCMa6LXfgEcsCwQ6ij" +
+ "JhhjcxwoafBRyyEvQ6Pfhg8IqJ0afBpAZHhR2y4I11zbaJZqs3WG3H3aQHT" +
+ "wcPHdBHnk65GdL3Njuoo0K4mcmN6lk7pWptHwTjkw59zTw834PZ8TWm5XiUnsi9JKy41MPqHcbO0nN" +
+ "SYl9Q6kEjv4nt8p9unhUYqgrGvLl42nvqGb1F47f6PvxkewuouxMFAszYhaMjZzIf5" +
+ "AgmvaXbSP9MKYu6EkkvM9CIhYGZuq7PJUk6wmoG6IxIfOokUcnrGzuU9INFUuXf4LptQ987GU3hw0d" +
+ "yMNf6nncwABOOoC5EnqYBNoq29Mf54H5k2Xi8y1fh8ldtKcW9T4WsaXun9fKofegfhwY8wgfoG" +
+ "eW2YNW3fdalIsggRzMEAXVDxj7oieReUGiT53uV2kcmcQRQLdUDUcOC1JEiSRpgZl38c1DDVRlz8Rbhi" +
+ "KUxMqNCPx6PABXCPocpfXJa0yBT0l3ssgMlDfKsxAHX6aEC86zk0CDmTqZPmBjLAoYaHA3" +
+ "uGqoARbQ6rhIBHOdkb7PoRImjmF4sQ60TBIWdao9dqLMjslhOQrGQlPIniW5I1V9nisc5lV0jEqeaC3y" +
+ "lSnjhieVJ7H0FYjcsihjQryhyRwUZBGxWFuh0hI9rOv8h5jHKb549hOHPcIjSdLa6M048G" +
+ "9drX0LNEixfp7WUqq2DyRfBioybmoHVzFWzhXrMJXzwHakzLwb4T2BHcLK6VpC4b2GodYlZe43ggxTNUErif" +
+ "NEfEfxZhDj6HBMYobKvn4ofOsyKPGn6NXnCqIbCCvqOyBikxAYukgCmWHRJRGX4RjNbL" +
+ "BVjY5eoXJB7xisnrqOieXuEnZ9n7rnK8qM4RuOSA8EaDd5n58JU9SUUNRqpZZgK2nPy9Pv90ORiGr1Y30rZS" +
+ "bKT7SucjEZJ00WBF9FlJp6v8OcVvMBjRriaYYjVlOiLvVDQQ2NvYfbv5bLbEhkrJi5Nlg" +
+ "3Tq5jsgSTEBqSKTD5UIukFP194LvVMQIOQ9YM7m9iZHMpCCoIL99FJLsNmzRDVETCjyFoXxSputp6ufupS1n" +
+ "1SHRVlXm7Bx3bjJ79O3bGqjzxT1EZV39isegIyKx2H0zEUpnlXzzbusS0tusECmG3C3eGDOTs" +
+ "FZbYTp5ZxtXCrudDSX3kaeLtCstfqAHGsjHkPd87aSNaJJjPaSaMmGo7zTJGUIX1VCA2KJP37USIAa5NGHtM" +
+ "ChmtfO8kmrO9PZl6Ld18Yi7OlBsEUkMQE0yKwtSpkTK76XS5CG8S7S2S07vtYaBJJ9Bvuzr0F" +
+ "tLsQ1gYWPF1geDalS5MdWfpDvF5MaeJMd2fK0m3jui7xY1IfuSxqZs7SEL6wUVGdWc5tsVroCMMy6Nqjdz5T4vW" +
+ "zdSmpjrFnnB8edB5AOekeHua16I9qcNHuCcOgeYZIc6GzG0O1XAcQu6cEi1ZivUPoYf2sKr4uPvcD" +
+ "gnaIN1KmhwSmxPgkErJVroPAUO18E2apxRlmZkhS6CInyzcLkvycSDCGtFaAZBO3QDO5nmvPFgVxfSbwG8BhhY" +
+ "cWXqwnsbEEejtlXH3Zr5BtxTzd3Bo08s8HxjIXF6Z0CPXcvQzDoemL8M2A1AIrnBkT7vIHgvMuH475M" +
+ "TXIR4K0njrS4X4KrBQFxvuZey8tnUnm8oiJWdUFzdM4N0KioJsG8UzxRODxKh4e3GqxmZxsSwwL0nNnV1syiCCC" +
+ "zTgrtT6fcxpAfcFeTct7FNd4BjzbNCgBrSspzhxnEFMZXuqBGaOS9d9qcuUulwF0lAWGBauWI57qyjXfQnQ" +
+ "i6Sy6nXOcUIOZWJ9BVJf4A27Pa4Pi7ZFznFnIdiQOrxCbb2ZCVkCftWsmcEMnXWXUkGOuA5JXo9YvGyPGq2wgO1wj" +
+ "qAKyqxhBVOL48L2D0PYU16Ursxe0ckoBYXJheQi2d1eIa0pTD78325f8jCHclqINvuvj0GZfJENlc1e" +
+ "ULPRd358aPnsx2DOmN1UojjBI1hacijCtFCE8zGCa9M0L7aZbRUHe8lmlaqhx0Su6nPnPgfbJr6idfxTJHqCT4t8" +
+ "4BfZeqRZ5rgIS15Z7HFYSCPZixMPf683GQoQEFWIM0EqNTJmoHW3K7jDHOUpVutyyWt5VO5ray6rBrq1nAF" +
+ "QEN59RqxM04eXxAOBWnPB17TdvDmyXuXDpjnjXReJLNqJVgB2VFPxsqhQWQupAtjBGvffU7exZMM92fiYdBArV" +
+ "4SE1mBFewTNRz4PmwFVmUoxWj74rzZQuDMhAlx3jBXcaX8eD7PlaADdiMT1mF3faVyScA6bHbV2jU79XvppOfoD" +
+ "YtBFj3a5LtAhTy5BnN2v1XlTQtk6MZ0Ej6g7sW96w9n2XV8wqdWGgjeKHaqH7Pn1XFw7IHvpVYK4wFvIGubp4bpms" +
+ "C3ARq1Gqq8zvDQtoLZSZYOvXCZOIElGZLscqjbRckX5aRhTJX6CxjVcT7S3TScnCbqNdfqMpEsNl2GY3fprQF" +
+ "CTtiZv12uCj0WILSesMc5ct2tQcIvwnOHAuE6fw7lD8EgQ0emU4zxUIDowhTvJ46k27rXTctIX7HlBEZXInV9r49" +
+ "VbJdA3des3ZqGPbBYXTwQcns1jJTmnIf1S0jLWN0Wgk9bH5gkdhl53l2yc1AlZCyJdm9vktH5sctTDdMZrDPPHNUG2" +
+ "pTBg4DDR9Zc6YvkrO4f5O3mfOl441bJkmOSNwoOc3krHTQlN6SBGLEptT4m7MFwqVyrbsEXHegwa53aN4W0J7qwV0" +
+ "EMN2VHLtoHQDfXVOVDXnE1rK3cDJRMhCIvIRmywkA5T9GchtDVfek2qZq1H5wfe92RoXBseAuMoWtTCJiXOJraCxmj" +
+ "cluokF3eK0NpycncoQcObLiS1rield0fdx8UJhsV9QnNtok5a0f4L1MKtjnYJmvItSqn3Lo2VkWagxGSEJzKnK2gO3pH" +
+ "Whlarr6bRQeIwCXckALEVdGZBTPiqjYPBfk5H5wYXqkieh04tjSmnWytNebBNmGjTNgrqNVO7ftCbhh7wICOn" +
+ "lpSMt6BoFvjHYW1IpEyTlVlvNl5NzPPAn2119ttZTfXpifXfQtBGzlCNYTD6m1FvpmOydzqEq8YadgybW76HDtnBdU" +
+ "M1djhNcHfR12NkPc7UIvVJDiTTJ440pU1tqYISyEVr5QZBrhOP2y6RsZnlJy7Mqh56Jw0fJkbI2yQaoc7Jh2Wsh7" +
+ "R58SXBXsalwNM9TmTeBMrc8Hghx9hDpai8agUclHTCoyK2hkEpKLlEJiXUKOE8JPugYE8yFVYF49UAjJUbsj6we3Ocii" +
+ "FXs6oXGymttSxcRksGdfUaIonkrqniea31SgiGmhCjKi0x5ZDNFS26CqSEU0FKiLJyhui8HOJCddX64Ers0VTMHppS" +
+ "ydpQX7PndzDuhT7k8Wj2kGJvKCqzVxTGCssDHoedKmMULEjUqU2EcjT5VOaCFeHKUXyP1B7qfYPtKLcgXHH5bmSgRs8gY" +
+ "2JkPOST2Vr35mNKoulUMqFeo0s1y5hcVY39a3mBMytwZn7HgPhEJScwZdWJd6E5tZ13evEmcn1A5YPBYbm91CdJFXhj" +
+ "iuqmJS71Xq4j56K35TmCJCb4jAAbcGTGEHzcCP1HKVFfsNnLqwflvHwMYQMA3EumrMn1nXnETZFdZJRHlnO8dwgnT" +
+ "ehbB2XtrpErgaFbEWfWEinoiMd4Vs7kgHzs8UiuagYyyCxmg5gEvza3CXzjUnG2lfjI6ox6EYPgXvRySHmL" +
+ "atXzj4x3CgF6j1gn10aUJknF7KQLJ84DIA5fy33YaLLbeOoGJHsdr9rQZCjaIqZKH870sslgm0tnGw5yOddnj" +
+ "FDI2KwL6UVGr3YExI1p5sGaY0Su4G30PMJsOX9ZWvRF72Lk0pVMnjVugkzsnQrbyGezZ8WN8y8kOvrysQuhTt5" +
+ "AFyMJ4kLsONE52kZsJYYyDpWw9a8BZ");
+
+ patternInfo = new RegexpUtil.SqlPatternInfo(RegexpUtil.SqlPatternType.ENDS_WITH,"",
+ "AFyMJ4kLsONE52kZsJYYyDpWw9a8BZ");
+ sqlPatternEndsWith = SqlPatternFactory.getSqlPatternMatcher(patternInfo, sb);
+ assertEquals(sqlPatternEndsWith.match(), 1); // should match
+
+ patternInfo = new RegexpUtil.SqlPatternInfo(RegexpUtil.SqlPatternType.ENDS_WITH,"",
+ "atXzj4x3CgF6j1gn10aUJknF7KQLJ84D");
+ sqlPatternEndsWith = SqlPatternFactory.getSqlPatternMatcher(patternInfo, sb);
+ assertEquals(sqlPatternEndsWith.match(), 0); // should not match
+
+ // non ascii
+ sb.setLength(0);
+ sb.append("¤EÀsÆW°ê»Ú®i¶T¤¤¤ß3¼Ó®i¶TÆU2~~");
+
+ assertEquals(sqlPatternEndsWith.match(), 0); // should not match
+
+ patternInfo = new RegexpUtil.SqlPatternInfo(RegexpUtil.SqlPatternType.ENDS_WITH,"", "TÆU2~~");
+ sqlPatternEndsWith = SqlPatternFactory.getSqlPatternMatcher(patternInfo, sb);
+ assertEquals(sqlPatternEndsWith.match(), 1); // should match
+
+ }
+
+ @Test
+ public void testSqlPatternContains() {
+ RegexpUtil.SqlPatternInfo patternInfo = new RegexpUtil.SqlPatternInfo(RegexpUtil.SqlPatternType.CONTAINS,".*ABC.*", "ABCD");
+
+ StringBuffer sb = new StringBuffer("ABCD");
+ SqlPatternMatcher sqlPatternContains = SqlPatternFactory.getSqlPatternMatcher(patternInfo, sb);
+
+ assertEquals(1, sqlPatternContains.match()); // ABCD should contain ABCD
+
+ sb.setLength(0);
+ sb.append("BC");
+ assertEquals(0, sqlPatternContains.match()); // ABCD should contain BC
+
+ sb.setLength(0);
+ assertEquals(0, sqlPatternContains.match()); // null string should not match contains ABCD
+
+ sb.append("DE");
+ assertEquals(0, sqlPatternContains.match()); // ABCD should not contain DE
+
+ sb.setLength(0);
+ sb.append("xyzABCDqrs");
+ assertEquals(1, sqlPatternContains.match()); // xyzABCDqrs should contain ABCD
+
+ // contains null pattern should match anything
+ patternInfo = new RegexpUtil.SqlPatternInfo(RegexpUtil.SqlPatternType.CONTAINS,"", "");
+ sqlPatternContains = SqlPatternFactory.getSqlPatternMatcher(patternInfo, sb);
+ assertEquals(sqlPatternContains.match(), 1); // should match
+
+ // null pattern and null text
+ sb.setLength(0);
+ assertEquals(sqlPatternContains.match(), 1); // null text should match null pattern
+
+ // wide character string.
+ sb.append("b00dUrA0oa2i4ZEHg6zvPXPXlVQYB2BXe8T5gIEtvUDzcN6yUkIqyS07gaAy8k4" +
+ "ac6Bn1cxblsXFnkp8g8hiQkUMJPyl6l0jTdsIzQ4PkVCURGGyF0aduGqCXUaKp91gqkRMvL" +
+ "g1Lh6u0NrGCBoJajPxnwZCyh58cN5aFiNscBFKIqqLPTS1vnbR39nmzU88FM8qDepJRhvein" +
+ "hHhmrHdEb22QN20dXEHSygR7vrb2zZhhfWeJbXRsesuYDqdGig801IAS6VWRIdQtJ6gaRhCdNz" +
+ " DWnQWRXlMhcrR4MKJXeBgDtjzbHd0ZS53K8u8ORl6FKxtvdKmwUuHiuMJrQQm6Rgx6WJrAtvTf" +
+ "UE8a5I3nYXdRppnm3MbRsLu4IxXIblh8kmAIG6n2yHwGhpWYkRI7cwl4dOB3bsxxtdaaTlZMMx6T" +
+ "XPaUK10UzfZCAkWG9Du3QhJxxJBZaP3HPebXmw1l5swPohmG3L6zOcEWp7f" +
+ "saldC7TOrFa3ReYFHooclSGTgZ9sWjJ5SYJ0vEkI1RMWoeGcdJq5v4lrcB6YjrMqQJIaxAdRnIaNG" +
+ "V6oR9SkI4diiXspIvRWj6PMkpqI02ovI3va49bHauTrqTyM9eIhS" +
+ "0Mc3SHzknQwHJAFkqmhV9Lm2VLULou2iJDvc5sWW8W48IODGqGytqLogA01Cuo3gURmH2057nCld9" +
+ "PDHQEieFMddi4gKPOv4es1YX2aBo4RfYiTlUyXd6gGujVPgU2j" +
+ "AAhcz6JqVC08O73gM9zOAM2l4PwN2TN3lBufkQUGyOzHtoTDjSdQ2DPXIks9A6ehIpn92n1UtdrJeMz" +
+ "4oMN4kwP95YjQk1ko2e3DVAiPVlCiaWqnzXKa41kLVs3KiBhfAff5" +
+ "hoTnBGn9CaXed6g6kLs2YBTQYM9yLW9Wb5qNhLeCM4GGJM8dUWqqEsWYPrcPAkCMa6LXfgEcsCwQ6ij" +
+ "JhhjcxwoafBRyyEvQ6Pfhg8IqJ0afBpAZHhR2y4I11zbaJZqs3WG3H3aQHT" +
+ "wcPHdBHnk65GdL3Njuoo0K4mcmN6lk7pWptHwTjkw59zTw834PZ8TWm5XiUnsi9JKy41MPqHcbO0nN" +
+ "SYl9Q6kEjv4nt8p9unhUYqgrGvLl42nvqGb1F47f6PvxkewuouxMFAszYhaMjZzIf5" +
+ "AgmvaXbSP9MKYu6EkkvM9CIhYGZuq7PJUk6wmoG6IxIfOokUcnrGzuU9INFUuXf4LptQ987GU3hw0d" +
+ "yMNf6nncwABOOoC5EnqYBNoq29Mf54H5k2Xi8y1fh8ldtKcW9T4WsaXun9fKofegfhwY8wgfoG" +
+ "eW2YNW3fdalIsggRzMEAXVDxj7oieReUGiT53uV2kcmcQRQLdUDUcOC1JEiSRpgZl38c1DDVRlz8Rbhi" +
+ "KUxMqNCPx6PABXCPocpfXJa0yBT0l3ssgMlDfKsxAHX6aEC86zk0CDmTqZPmBjLAoYaHA3" +
+ "uGqoARbQ6rhIBHOdkb7PoRImjmF4sQ60TBIWdao9dqLMjslhOQrGQlPIniW5I1V9nisc5lV0jEqeaC3y" +
+ "lSnjhieVJ7H0FYjcsihjQryhyRwUZBGxWFuh0hI9rOv8h5jHKb549hOHPcIjSdLa6M048G" +
+ "9drX0LNEixfp7WUqq2DyRfBioybmoHVzFWzhXrMJXzwHakzLwb4T2BHcLK6VpC4b2GodYlZe43ggxTNUErif" +
+ "NEfEfxZhDj6HBMYobKvn4ofOsyKPGn6NXnCqIbCCvqOyBikxAYukgCmWHRJRGX4RjNbL" +
+ "BVjY5eoXJB7xisnrqOieXuEnZ9n7rnK8qM4RuOSA8EaDd5n58JU9SUUNRqpZZgK2nPy9Pv90ORiGr1Y30rZS" +
+ "bKT7SucjEZJ00WBF9FlJp6v8OcVvMBjRriaYYjVlOiLvVDQQ2NvYfbv5bLbEhkrJi5Nlg" +
+ "3Tq5jsgSTEBqSKTD5UIukFP194LvVMQIOQ9YM7m9iZHMpCCoIL99FJLsNmzRDVETCjyFoXxSputp6ufupS1n" +
+ "1SHRVlXm7Bx3bjJ79O3bGqjzxT1EZV39isegIyKx2H0zEUpnlXzzbusS0tusECmG3C3eGDOTs" +
+ "FZbYTp5ZxtXCrudDSX3kaeLtCstfqAHGsjHkPd87aSNaJJjPaSaMmGo7zTJGUIX1VCA2KJP37USIAa5NGHtM" +
+ "ChmtfO8kmrO9PZl6Ld18Yi7OlBsEUkMQE0yKwtSpkTK76XS5CG8S7S2S07vtYaBJJ9Bvuzr0F" +
+ "tLsQ1gYWPF1geDalS5MdWfpDvF5MaeJMd2fK0m3jui7xY1IfuSxqZs7SEL6wUVGdWc5tsVroCMMy6Nqjdz5T4vW" +
+ "zdSmpjrFnnB8edB5AOekeHua16I9qcNHuCcOgeYZIc6GzG0O1XAcQu6cEi1ZivUPoYf2sKr4uPvcD" +
+ "gnaIN1KmhwSmxPgkErJVroPAUO18E2apxRlmZkhS6CInyzcLkvycSDCGtFaAZBO3QDO5nmvPFgVxfSbwG8BhhY" +
+ "cWXqwnsbEEejtlXH3Zr5BtxTzd3Bo08s8HxjIXF6Z0CPXcvQzDoemL8M2A1AIrnBkT7vIHgvMuH475M" +
+ "TXIR4K0njrS4X4KrBQFxvuZey8tnUnm8oiJWdUFzdM4N0KioJsG8UzxRODxKh4e3GqxmZxsSwwL0nNnV1syiCCC" +
+ "zTgrtT6fcxpAfcFeTct7FNd4BjzbNCgBrSspzhxnEFMZXuqBGaOS9d9qcuUulwF0lAWGBauWI57qyjXfQnQ" +
+ "i6Sy6nXOcUIOZWJ9BVJf4A27Pa4Pi7ZFznFnIdiQOrxCbb2ZCVkCftWsmcEMnXWXUkGOuA5JXo9YvGyPGq2wgO1wj" +
+ "qAKyqxhBVOL48L2D0PYU16Ursxe0ckoBYXJheQi2d1eIa0pTD78325f8jCHclqINvuvj0GZfJENlc1e" +
+ "ULPRd358aPnsx2DOmN1UojjBI1hacijCtFCE8zGCa9M0L7aZbRUHe8lmlaqhx0Su6nPnPgfbJr6idfxTJHqCT4t8" +
+ "4BfZeqRZ5rgIS15Z7HFYSCPZixMPf683GQoQEFWIM0EqNTJmoHW3K7jDHOUpVutyyWt5VO5ray6rBrq1nAF" +
+ "QEN59RqxM04eXxAOBWnPB17TdvDmyXuXDpjnjXReJLNqJVgB2VFPxsqhQWQupAtjBGvffU7exZMM92fiYdBArV" +
+ "4SE1mBFewTNRz4PmwFVmUoxWj74rzZQuDMhAlx3jBXcaX8eD7PlaADdiMT1mF3faVyScA6bHbV2jU79XvppOfoD" +
+ "YtBFj3a5LtAhTy5BnN2v1XlTQtk6MZ0Ej6g7sW96w9n2XV8wqdWGgjeKHaqH7Pn1XFw7IHvpVYK4wFvIGubp4bpms" +
+ "C3ARq1Gqq8zvDQtoLZSZYOvXCZOIElGZLscqjbRckX5aRhTJX6CxjVcT7S3TScnCbqNdfqMpEsNl2GY3fprQF" +
+ "CTtiZv12uCj0WILSesMc5ct2tQcIvwnOHAuE6fw7lD8EgQ0emU4zxUIDowhTvJ46k27rXTctIX7HlBEZXInV9r49" +
+ "VbJdA3des3ZqGPbBYXTwQcns1jJTmnIf1S0jLWN0Wgk9bH5gkdhl53l2yc1AlZCyJdm9vktH5sctTDdMZrDPPHNUG2" +
+ "pTBg4DDR9Zc6YvkrO4f5O3mfOl441bJkmOSNwoOc3krHTQlN6SBGLEptT4m7MFwqVyrbsEXHegwa53aN4W0J7qwV0" +
+ "EMN2VHLtoHQDfXVOVDXnE1rK3cDJRMhCIvIRmywkA5T9GchtDVfek2qZq1H5wfe92RoXBseAuMoWtTCJiXOJraCxmj" +
+ "cluokF3eK0NpycncoQcObLiS1rield0fdx8UJhsV9QnNtok5a0f4L1MKtjnYJmvItSqn3Lo2VkWagxGSEJzKnK2gO3pH" +
+ "Whlarr6bRQeIwCXckALEVdGZBTPiqjYPBfk5H5wYXqkieh04tjSmnWytNebBNmGjTNgrqNVO7ftCbhh7wICOn" +
+ "lpSMt6BoFvjHYW1IpEyTlVlvNl5NzPPAn2119ttZTfXpifXfQtBGzlCNYTD6m1FvpmOydzqEq8YadgybW76HDtnBdU" +
+ "M1djhNcHfR12NkPc7UIvVJDiTTJ440pU1tqYISyEVr5QZBrhOP2y6RsZnlJy7Mqh56Jw0fJkbI2yQaoc7Jh2Wsh7" +
+ "R58SXBXsalwNM9TmTeBMrc8Hghx9hDpai8agUclHTCoyK2hkEpKLlEJiXUKOE8JPugYE8yFVYF49UAjJUbsj6we3Ocii" +
+ "FXs6oXGymttSxcRksGdfUaIonkrqniea31SgiGmhCjKi0x5ZDNFS26CqSEU0FKiLJyhui8HOJCddX64Ers0VTMHppS" +
+ "ydpQX7PndzDuhT7k8Wj2kGJvKCqzVxTGCssDHoedKmMULEjUqU2EcjT5VOaCFeHKUXyP1B7qfYPtKLcgXHH5bmSgRs8gY" +
+ "2JkPOST2Vr35mNKoulUMqFeo0s1y5hcVY39a3mBMytwZn7HgPhEJScwZdWJd6E5tZ13evEmcn1A5YPBYbm91CdJFXhj" +
+ "iuqmJS71Xq4j56K35TmCJCb4jAAbcGTGEHzcCP1HKVFfsNnLqwflvHwMYQMA3EumrMn1nXnETZFdZJRHlnO8dwgnT" +
+ "ehbB2XtrpErgaFbEWfWEinoiMd4Vs7kgHzs8UiuagYyyCxmg5gEvza3CXzjUnG2lfjI6ox6EYPgXvRySHmL" +
+ "atXzj4x3CgF6j1gn10aUJknF7KQLJ84DIA5fy33YaLLbeOoGJHsdr9rQZCjaIqZKH870sslgm0tnGw5yOddnj" +
+ "FDI2KwL6UVGr3YExI1p5sGaY0Su4G30PMJsOX9ZWvRF72Lk0pVMnjVugkzsnQrbyGezZ8WN8y8kOvrysQuhTt5" +
+ "AFyMJ4kLsONE52kZsJYYyDpWw9a8BZ");
+
+ patternInfo = new RegexpUtil.SqlPatternInfo(RegexpUtil.SqlPatternType.CONTAINS,"",
+ "tLsQ1gYWPF1geDalS5MdWfpDvF5MaeJMd2fK0m3jui7xY1IfuSxqZs7SEL6wUVGdWc5tsVroCMMy6Nqjdz5T4vW");
+ sqlPatternContains = SqlPatternFactory.getSqlPatternMatcher(patternInfo, sb);
+ assertEquals(sqlPatternContains.match(), 1);
+
+ patternInfo = new RegexpUtil.SqlPatternInfo(RegexpUtil.SqlPatternType.CONTAINS,"",
+ "ABCDEF");
+ sqlPatternContains = SqlPatternFactory.getSqlPatternMatcher(patternInfo, sb);
+ assertEquals(sqlPatternContains.match(), 0);
+
+ // non ascii
+ sb.setLength(0);
+ sb.append("¤EÀsÆW°ê»Ú®i¶T¤¤¤ß3¼Ó®i¶TÆU2~~");
+
+ assertEquals(sqlPatternContains.match(), 0); // should not match
+
+ patternInfo = new RegexpUtil.SqlPatternInfo(RegexpUtil.SqlPatternType.CONTAINS,"", "¶T¤¤¤ß");
+ sqlPatternContains = SqlPatternFactory.getSqlPatternMatcher(patternInfo, sb);
+ assertEquals(sqlPatternContains.match(), 1); // should match
+
+ }
+
+ @Test
+ public void testSqlPatternConstant() {
+ RegexpUtil.SqlPatternInfo patternInfo = new RegexpUtil.SqlPatternInfo(RegexpUtil.SqlPatternType.CONSTANT,"ABC.*", "ABC");
+
+ StringBuffer sb = new StringBuffer("ABC");
+ SqlPatternMatcher sqlPatternContains = SqlPatternFactory.getSqlPatternMatcher(patternInfo, sb);
+
+ assertEquals(1, sqlPatternContains.match()); // ABC should match ABC
+
+ sb.setLength(0);
+ sb.append("BC");
+ assertEquals(0, sqlPatternContains.match()); // ABC not same as BC
+
+ sb.setLength(0);
+ assertEquals(0, sqlPatternContains.match()); // null string not same as ABC
+
+ sb.append("DE");
+ assertEquals(0, sqlPatternContains.match()); // ABC not same as DE
+
+ // null pattern should match null string
+ patternInfo = new RegexpUtil.SqlPatternInfo(RegexpUtil.SqlPatternType.CONSTANT,"", "");
+ sqlPatternContains = SqlPatternFactory.getSqlPatternMatcher(patternInfo, sb);
+ sb.setLength(0);
+ assertEquals(sqlPatternContains.match(), 1); // null text should match null pattern
+
+ // wide character string.
+ sb.append("b00dUrA0oa2i4ZEHg6zvPXPXlVQYB2BXe8T5gIEtvUDzcN6yUkIqyS07gaAy8k4" +
+ "ac6Bn1cxblsXFnkp8g8hiQkUMJPyl6l0jTdsIzQ4PkVCURGGyF0aduGqCXUaKp91gqkRMvL" +
+ "g1Lh6u0NrGCBoJajPxnwZCyh58cN5aFiNscBFKIqqLPTS1vnbR39nmzU88FM8qDepJRhvein" +
+ "hHhmrHdEb22QN20dXEHSygR7vrb2zZhhfWeJbXRsesuYDqdGig801IAS6VWRIdQtJ6gaRhCdNz" +
+ " DWnQWRXlMhcrR4MKJXeBgDtjzbHd0ZS53K8u8ORl6FKxtvdKmwUuHiuMJrQQm6Rgx6WJrAtvTf" +
+ "UE8a5I3nYXdRppnm3MbRsLu4IxXIblh8kmAIG6n2yHwGhpWYkRI7cwl4dOB3bsxxtdaaTlZMMx6T" +
+ "XPaUK10UzfZCAkWG9Du3QhJxxJBZaP3HPebXmw1l5swPohmG3L6zOcEWp7f" +
+ "saldC7TOrFa3ReYFHooclSGTgZ9sWjJ5SYJ0vEkI1RMWoeGcdJq5v4lrcB6YjrMqQJIaxAdRnIaNG" +
+ "V6oR9SkI4diiXspIvRWj6PMkpqI02ovI3va49bHauTrqTyM9eIhS" +
+ "0Mc3SHzknQwHJAFkqmhV9Lm2VLULou2iJDvc5sWW8W48IODGqGytqLogA01Cuo3gURmH2057nCld9" +
+ "PDHQEieFMddi4gKPOv4es1YX2aBo4RfYiTlUyXd6gGujVPgU2j" +
+ "AAhcz6JqVC08O73gM9zOAM2l4PwN2TN3lBufkQUGyOzHtoTDjSdQ2DPXIks9A6ehIpn92n1UtdrJeMz" +
+ "4oMN4kwP95YjQk1ko2e3DVAiPVlCiaWqnzXKa41kLVs3KiBhfAff5" +
+ "hoTnBGn9CaXed6g6kLs2YBTQYM9yLW9Wb5qNhLeCM4GGJM8dUWqqEsWYPrcPAkCMa6LXfgEcsCwQ6ij" +
+ "JhhjcxwoafBRyyEvQ6Pfhg8IqJ0afBpAZHhR2y4I11zbaJZqs3WG3H3aQHT" +
+ "wcPHdBHnk65GdL3Njuoo0K4mcmN6lk7pWptHwTjkw59zTw834PZ8TWm5XiUnsi9JKy41MPqHcbO0nN" +
+ "SYl9Q6kEjv4nt8p9unhUYqgrGvLl42nvqGb1F47f6PvxkewuouxMFAszYhaMjZzIf5" +
+ "AgmvaXbSP9MKYu6EkkvM9CIhYGZuq7PJUk6wmoG6IxIfOokUcnrGzuU9INFUuXf4LptQ987GU3hw0d" +
+ "yMNf6nncwABOOoC5EnqYBNoq29Mf54H5k2Xi8y1fh8ldtKcW9T4WsaXun9fKofegfhwY8wgfoG" +
+ "eW2YNW3fdalIsggRzMEAXVDxj7oieReUGiT53uV2kcmcQRQLdUDUcOC1JEiSRpgZl38c1DDVRlz8Rbhi" +
+ "KUxMqNCPx6PABXCPocpfXJa0yBT0l3ssgMlDfKsxAHX6aEC86zk0CDmTqZPmBjLAoYaHA3" +
+ "uGqoARbQ6rhIBHOdkb7PoRImjmF4sQ60TBIWdao9dqLMjslhOQrGQlPIniW5I1V9nisc5lV0jEqeaC3y" +
+ "lSnjhieVJ7H0FYjcsihjQryhyRwUZBGxWFuh0hI9rOv8h5jHKb549hOHPcIjSdLa6M048G" +
+ "9drX0LNEixfp7WUqq2DyRfBioybmoHVzFWzhXrMJXzwHakzLwb4T2BHcLK6VpC4b2GodYlZe43ggxTNUErif" +
+ "NEfEfxZhDj6HBMYobKvn4ofOsyKPGn6NXnCqIbCCvqOyBikxAYukgCmWHRJRGX4RjNbL" +
+ "BVjY5eoXJB7xisnrqOieXuEnZ9n7rnK8qM4RuOSA8EaDd5n58JU9SUUNRqpZZgK2nPy9Pv90ORiGr1Y30rZS" +
+ "bKT7SucjEZJ00WBF9FlJp6v8OcVvMBjRriaYYjVlOiLvVDQQ2NvYfbv5bLbEhkrJi5Nlg" +
+ "3Tq5jsgSTEBqSKTD5UIukFP194LvVMQIOQ9YM7m9iZHMpCCoIL99FJLsNmzRDVETCjyFoXxSputp6ufupS1n" +
+ "1SHRVlXm7Bx3bjJ79O3bGqjzxT1EZV39isegIyKx2H0zEUpnlXzzbusS0tusECmG3C3eGDOTs" +
+ "FZbYTp5ZxtXCrudDSX3kaeLtCstfqAHGsjHkPd87aSNaJJjPaSaMmGo7zTJGUIX1VCA2KJP37USIAa5NGHtM" +
+ "ChmtfO8kmrO9PZl6Ld18Yi7OlBsEUkMQE0yKwtSpkTK76XS5CG8S7S2S07vtYaBJJ9Bvuzr0F" +
+ "tLsQ1gYWPF1geDalS5MdWfpDvF5MaeJMd2fK0m3jui7xY1IfuSxqZs7SEL6wUVGdWc5tsVroCMMy6Nqjdz5T4vW" +
+ "zdSmpjrFnnB8edB5AOekeHua16I9qcNHuCcOgeYZIc6GzG0O1XAcQu6cEi1ZivUPoYf2sKr4uPvcD" +
+ "gnaIN1KmhwSmxPgkErJVroPAUO18E2apxRlmZkhS6CInyzcLkvycSDCGtFaAZBO3QDO5nmvPFgVxfSbwG8BhhY" +
+ "cWXqwnsbEEejtlXH3Zr5BtxTzd3Bo08s8HxjIXF6Z0CPXcvQzDoemL8M2A1AIrnBkT7vIHgvMuH475M" +
+ "TXIR4K0njrS4X4KrBQFxvuZey8tnUnm8oiJWdUFzdM4N0KioJsG8UzxRODxKh4e3GqxmZxsSwwL0nNnV1syiCCC" +
+ "zTgrtT6fcxpAfcFeTct7FNd4BjzbNCgBrSspzhxnEFMZXuqBGaOS9d9qcuUulwF0lAWGBauWI57qyjXfQnQ" +
+ "i6Sy6nXOcUIOZWJ9BVJf4A27Pa4Pi7ZFznFnIdiQOrxCbb2ZCVkCftWsmcEMnXWXUkGOuA5JXo9YvGyPGq2wgO1wj" +
+ "qAKyqxhBVOL48L2D0PYU16Ursxe0ckoBYXJheQi2d1eIa0pTD78325f8jCHclqINvuvj0GZfJENlc1e" +
+ "ULPRd358aPnsx2DOmN1UojjBI1hacijCtFCE8zGCa9M0L7aZbRUHe8lmlaqhx0Su6nPnPgfbJr6idfxTJHqCT4t8" +
+ "4BfZeqRZ5rgIS15Z7HFYSCPZixMPf683GQoQEFWIM0EqNTJmoHW3K7jDHOUpVutyyWt5VO5ray6rBrq1nAF" +
+ "QEN59RqxM04eXxAOBWnPB17TdvDmyXuXDpjnjXReJLNqJVgB2VFPxsqhQWQupAtjBGvffU7exZMM92fiYdBArV" +
+ "4SE1mBFewTNRz4PmwFVmUoxWj74rzZQuDMhAlx3jBXcaX8eD7PlaADdiMT1mF3faVyScA6bHbV2jU79XvppOfoD" +
+ "YtBFj3a5LtAhTy5BnN2v1XlTQtk6MZ0Ej6g7sW96w9n2XV8wqdWGgjeKHaqH7Pn1XFw7IHvpVYK4wFvIGubp4bpms" +
+ "C3ARq1Gqq8zvDQtoLZSZYOvXCZOIElGZLscqjbRckX5aRhTJX6CxjVcT7S3TScnCbqNdfqMpEsNl2GY3fprQF" +
+ "CTtiZv12uCj0WILSesMc5ct2tQcIvwnOHAuE6fw7lD8EgQ0emU4zxUIDowhTvJ46k27rXTctIX7HlBEZXInV9r49" +
+ "VbJdA3des3ZqGPbBYXTwQcns1jJTmnIf1S0jLWN0Wgk9bH5gkdhl53l2yc1AlZCyJdm9vktH5sctTDdMZrDPPHNUG2" +
+ "pTBg4DDR9Zc6YvkrO4f5O3mfOl441bJkmOSNwoOc3krHTQlN6SBGLEptT4m7MFwqVyrbsEXHegwa53aN4W0J7qwV0" +
+ "EMN2VHLtoHQDfXVOVDXnE1rK3cDJRMhCIvIRmywkA5T9GchtDVfek2qZq1H5wfe92RoXBseAuMoWtTCJiXOJraCxmj" +
+ "cluokF3eK0NpycncoQcObLiS1rield0fdx8UJhsV9QnNtok5a0f4L1MKtjnYJmvItSqn3Lo2VkWagxGSEJzKnK2gO3pH" +
+ "Whlarr6bRQeIwCXckALEVdGZBTPiqjYPBfk5H5wYXqkieh04tjSmnWytNebBNmGjTNgrqNVO7ftCbhh7wICOn" +
+ "lpSMt6BoFvjHYW1IpEyTlVlvNl5NzPPAn2119ttZTfXpifXfQtBGzlCNYTD6m1FvpmOydzqEq8YadgybW76HDtnBdU" +
+ "M1djhNcHfR12NkPc7UIvVJDiTTJ440pU1tqYISyEVr5QZBrhOP2y6RsZnlJy7Mqh56Jw0fJkbI2yQaoc7Jh2Wsh7" +
+ "R58SXBXsalwNM9TmTeBMrc8Hghx9hDpai8agUclHTCoyK2hkEpKLlEJiXUKOE8JPugYE8yFVYF49UAjJUbsj6we3Ocii" +
+ "FXs6oXGymttSxcRksGdfUaIonkrqniea31SgiGmhCjKi0x5ZDNFS26CqSEU0FKiLJyhui8HOJCddX64Ers0VTMHppS" +
+ "ydpQX7PndzDuhT7k8Wj2kGJvKCqzVxTGCssDHoedKmMULEjUqU2EcjT5VOaCFeHKUXyP1B7qfYPtKLcgXHH5bmSgRs8gY" +
+ "2JkPOST2Vr35mNKoulUMqFeo0s1y5hcVY39a3mBMytwZn7HgPhEJScwZdWJd6E5tZ13evEmcn1A5YPBYbm91CdJFXhj" +
+ "iuqmJS71Xq4j56K35TmCJCb4jAAbcGTGEHzcCP1HKVFfsNnLqwflvHwMYQMA3EumrMn1nXnETZFdZJRHlnO8dwgnT" +
+ "ehbB2XtrpErgaFbEWfWEinoiMd4Vs7kgHzs8UiuagYyyCxmg5gEvza3CXzjUnG2lfjI6ox6EYPgXvRySHmL" +
+ "atXzj4x3CgF6j1gn10aUJknF7KQLJ84DIA5fy33YaLLbeOoGJHsdr9rQZCjaIqZKH870sslgm0tnGw5yOddnj" +
+ "FDI2KwL6UVGr3YExI1p5sGaY0Su4G30PMJsOX9ZWvRF72Lk0pVMnjVugkzsnQrbyGezZ8WN8y8kOvrysQuhTt5" +
+ "AFyMJ4kLsONE52kZsJYYyDpWw9a8BZ");
+
+ patternInfo = new RegexpUtil.SqlPatternInfo(RegexpUtil.SqlPatternType.CONTAINS,"", sb.toString());
+ sqlPatternContains = SqlPatternFactory.getSqlPatternMatcher(patternInfo, sb);
+
+ assertEquals(sqlPatternContains.match(), 1);
+
+ // non ascii
+ sb.setLength(0);
+ sb.append("¤EÀsÆW°ê»Ú®i¶T¤¤¤ß3¼Ó®i¶TÆU2~~");
+
+ assertEquals(sqlPatternContains.match(), 0); // should not match
+
+ patternInfo = new RegexpUtil.SqlPatternInfo(RegexpUtil.SqlPatternType.CONSTANT,"", "¤EÀsÆW°ê»Ú®i¶T¤¤¤ß3¼Ó®i¶TÆU2~~");
+ sqlPatternContains = SqlPatternFactory.getSqlPatternMatcher(patternInfo, sb);
+ assertEquals(sqlPatternContains.match(), 1); // should match
+
+ }
+
+ @Test
+ public void testSqlPatternNotSimple() {
+ RegexpUtil.SqlPatternInfo patternInfo = new RegexpUtil.SqlPatternInfo(RegexpUtil.SqlPatternType.COMPLEX,"A.*BC.*", "");
+
+ StringBuffer sb = new StringBuffer("ADEBCDF");
+ SqlPatternMatcher sqlPatternComplex = SqlPatternFactory.getSqlPatternMatcher(patternInfo, sb);
+
+ assertEquals(sqlPatternComplex.match(), 1); // ADEBCDF should match A.*BC.*
+
+ sb.setLength(0);
+ sb.append("BC");
+ assertEquals(sqlPatternComplex.match(), 0); // BC should not match A.*BC.*
+
+ sb.setLength(0);
+ assertEquals(sqlPatternComplex.match(), 0); // null string should not match
+
+ sb.append("DEFGHIJ");
+ assertEquals(sqlPatternComplex.match(), 0); // DEFGHIJ should not match A.*BC.*
+
+ java.util.regex.Matcher matcher;
+ matcher = java.util.regex.Pattern.compile("b00dUrA0.*").matcher("");
+
+ patternInfo = new RegexpUtil.SqlPatternInfo(RegexpUtil.SqlPatternType.COMPLEX,"b00dUrA0.*42.*9a8BZ", "");
+ sb.setLength(0);
+ // wide character string.
+ sb.append("b00dUrA0oa2i4ZEHg6zvPXPXlVQYB2BXe8T5gIEtvUDzcN6yUkIqyS07gaAy8k4" +
+ "ac6Bn1cxblsXFnkp8g8hiQkUMJPyl6l0jTdsIzQ4PkVCURGGyF0aduGqCXUaKp91gqkRMvL" +
+ "g1Lh6u0NrGCBoJajPxnwZCyh58cN5aFiNscBFKIqqLPTS1vnbR39nmzU88FM8qDepJRhvein" +
+ "hHhmrHdEb22QN20dXEHSygR7vrb2zZhhfWeJbXRsesuYDqdGig801IAS6VWRIdQtJ6gaRhCdNz" +
+ " DWnQWRXlMhcrR4MKJXeBgDtjzbHd0ZS53K8u8ORl6FKxtvdKmwUuHiuMJrQQm6Rgx6WJrAtvTf" +
+ "UE8a5I3nYXdRppnm3MbRsLu4IxXIblh8kmAIG6n2yHwGhpWYkRI7cwl4dOB3bsxxtdaaTlZMMx6T" +
+ "XPaUK10UzfZCAkWG9Du3QhJxxJBZaP3HPebXmw1l5swPohmG3L6zOcEWp7f" +
+ "saldC7TOrFa3ReYFHooclSGTgZ9sWjJ5SYJ0vEkI1RMWoeGcdJq5v4lrcB6YjrMqQJIaxAdRnIaNG" +
+ "V6oR9SkI4diiXspIvRWj6PMkpqI02ovI3va49bHauTrqTyM9eIhS" +
+ "0Mc3SHzknQwHJAFkqmhV9Lm2VLULou2iJDvc5sWW8W48IODGqGytqLogA01Cuo3gURmH2057nCld9" +
+ "PDHQEieFMddi4gKPOv4es1YX2aBo4RfYiTlUyXd6gGujVPgU2j" +
+ "AAhcz6JqVC08O73gM9zOAM2l4PwN2TN3lBufkQUGyOzHtoTDjSdQ2DPXIks9A6ehIpn92n1UtdrJeMz" +
+ "4oMN4kwP95YjQk1ko2e3DVAiPVlCiaWqnzXKa41kLVs3KiBhfAff5" +
+ "hoTnBGn9CaXed6g6kLs2YBTQYM9yLW9Wb5qNhLeCM4GGJM8dUWqqEsWYPrcPAkCMa6LXfgEcsCwQ6ij" +
+ "JhhjcxwoafBRyyEvQ6Pfhg8IqJ0afBpAZHhR2y4I11zbaJZqs3WG3H3aQHT" +
+ "wcPHdBHnk65GdL3Njuoo0K4mcmN6lk7pWptHwTjkw59zTw834PZ8TWm5XiUnsi9JKy41MPqHcbO0nN" +
+ "SYl9Q6kEjv4nt8p9unhUYqgrGvLl42nvqGb1F47f6PvxkewuouxMFAszYhaMjZzIf5" +
+ "AgmvaXbSP9MKYu6EkkvM9CIhYGZuq7PJUk6wmoG6IxIfOokUcnrGzuU9INFUuXf4LptQ987GU3hw0d" +
+ "yMNf6nncwABOOoC5EnqYBNoq29Mf54H5k2Xi8y1fh8ldtKcW9T4WsaXun9fKofegfhwY8wgfoG" +
+ "eW2YNW3fdalIsggRzMEAXVDxj7oieReUGiT53uV2kcmcQRQLdUDUcOC1JEiSRpgZl38c1DDVRlz8Rbhi" +
+ "KUxMqNCPx6PABXCPocpfXJa0yBT0l3ssgMlDfKsxAHX6aEC86zk0CDmTqZPmBjLAoYaHA3" +
+ "uGqoARbQ6rhIBHOdkb7PoRImjmF4sQ60TBIWdao9dqLMjslhOQrGQlPIniW5I1V9nisc5lV0jEqeaC3y" +
+ "lSnjhieVJ7H0FYjcsihjQryhyRwUZBGxWFuh0hI9rOv8h5jHKb549hOHPcIjSdLa6M048G" +
+ "9drX0LNEixfp7WUqq2DyRfBioybmoHVzFWzhXrMJXzwHakzLwb4T2BHcLK6VpC4b2GodYlZe43ggxTNUErif" +
+ "NEfEfxZhDj6HBMYobKvn4ofOsyKPGn6NXnCqIbCCvqOyBikxAYukgCmWHRJRGX4RjNbL" +
+ "BVjY5eoXJB7xisnrqOieXuEnZ9n7rnK8qM4RuOSA8EaDd5n58JU9SUUNRqpZZgK2nPy9Pv90ORiGr1Y30rZS" +
+ "bKT7SucjEZJ00WBF9FlJp6v8OcVvMBjRriaYYjVlOiLvVDQQ2NvYfbv5bLbEhkrJi5Nlg" +
+ "3Tq5jsgSTEBqSKTD5UIukFP194LvVMQIOQ9YM7m9iZHMpCCoIL99FJLsNmzRDVETCjyFoXxSputp6ufupS1n" +
+ "1SHRVlXm7Bx3bjJ79O3bGqjzxT1EZV39isegIyKx2H0zEUpnlXzzbusS0tusECmG3C3eGDOTs" +
+ "FZbYTp5ZxtXCrudDSX3kaeLtCstfqAHGsjHkPd87aSNaJJjPaSaMmGo7zTJGUIX1VCA2KJP37USIAa5NGHtM" +
+ "ChmtfO8kmrO9PZl6Ld18Yi7OlBsEUkMQE0yKwtSpkTK76XS5CG8S7S2S07vtYaBJJ9Bvuzr0F" +
+ "tLsQ1gYWPF1geDalS5MdWfpDvF5MaeJMd2fK0m3jui7xY1IfuSxqZs7SEL6wUVGdWc5tsVroCMMy6Nqjdz5T4vW" +
+ "zdSmpjrFnnB8edB5AOekeHua16I9qcNHuCcOgeYZIc6GzG0O1XAcQu6cEi1ZivUPoYf2sKr4uPvcD" +
+ "gnaIN1KmhwSmxPgkErJVroPAUO18E2apxRlmZkhS6CInyzcLkvycSDCGtFaAZBO3QDO5nmvPFgVxfSbwG8BhhY" +
+ "cWXqwnsbEEejtlXH3Zr5BtxTzd3Bo08s8HxjIXF6Z0CPXcvQzDoemL8M2A1AIrnBkT7vIHgvMuH475M" +
+ "TXIR4K0njrS4X4KrBQFxvuZey8tnUnm8oiJWdUFzdM4N0KioJsG8UzxRODxKh4e3GqxmZxsSwwL0nNnV1syiCCC" +
+ "zTgrtT6fcxpAfcFeTct7FNd4BjzbNCgBrSspzhxnEFMZXuqBGaOS9d9qcuUulwF0lAWGBauWI57qyjXfQnQ" +
+ "i6Sy6nXOcUIOZWJ9BVJf4A27Pa4Pi7ZFznFnIdiQOrxCbb2ZCVkCftWsmcEMnXWXUkGOuA5JXo9YvGyPGq2wgO1wj" +
+ "qAKyqxhBVOL48L2D0PYU16Ursxe0ckoBYXJheQi2d1eIa0pTD78325f8jCHclqINvuvj0GZfJENlc1e" +
+ "ULPRd358aPnsx2DOmN1UojjBI1hacijCtFCE8zGCa9M0L7aZbRUHe8lmlaqhx0Su6nPnPgfbJr6idfxTJHqCT4t8" +
+ "4BfZeqRZ5rgIS15Z7HFYSCPZixMPf683GQoQEFWIM0EqNTJmoHW3K7jDHOUpVutyyWt5VO5ray6rBrq1nAF" +
+ "QEN59RqxM04eXxAOBWnPB17TdvDmyXuXDpjnjXReJLNqJVgB2VFPxsqhQWQupAtjBGvffU7exZMM92fiYdBArV" +
+ "4SE1mBFewTNRz4PmwFVmUoxWj74rzZQuDMhAlx3jBXcaX8eD7PlaADdiMT1mF3faVyScA6bHbV2jU79XvppOfoD" +
+ "YtBFj3a5LtAhTy5BnN2v1XlTQtk6MZ0Ej6g7sW96w9n2XV8wqdWGgjeKHaqH7Pn1XFw7IHvpVYK4wFvIGubp4bpms" +
+ "C3ARq1Gqq8zvDQtoLZSZYOvXCZOIElGZLscqjbRckX5aRhTJX6CxjVcT7S3TScnCbqNdfqMpEsNl2GY3fprQF" +
+ "CTtiZv12uCj0WILSesMc5ct2tQcIvwnOHAuE6fw7lD8EgQ0emU4zxUIDowhTvJ46k27rXTctIX7HlBEZXInV9r49" +
+ "VbJdA3des3ZqGPbBYXTwQcns1jJTmnIf1S0jLWN0Wgk9bH5gkdhl53l2yc1AlZCyJdm9vktH5sctTDdMZrDPPHNUG2" +
+ "pTBg4DDR9Zc6YvkrO4f5O3mfOl441bJkmOSNwoOc3krHTQlN6SBGLEptT4m7MFwqVyrbsEXHegwa53aN4W0J7qwV0" +
+ "EMN2VHLtoHQDfXVOVDXnE1rK3cDJRMhCIvIRmywkA5T9GchtDVfek2qZq1H5wfe92RoXBseAuMoWtTCJiXOJraCxmj" +
+ "cluokF3eK0NpycncoQcObLiS1rield0fdx8UJhsV9QnNtok5a0f4L1MKtjnYJmvItSqn3Lo2VkWagxGSEJzKnK2gO3pH" +
+ "Whlarr6bRQeIwCXckALEVdGZBTPiqjYPBfk5H5wYXqkieh04tjSmnWytNebBNmGjTNgrqNVO7ftCbhh7wICOn" +
+ "lpSMt6BoFvjHYW1IpEyTlVlvNl5NzPPAn2119ttZTfXpifXfQtBGzlCNYTD6m1FvpmOydzqEq8YadgybW76HDtnBdU" +
+ "M1djhNcHfR12NkPc7UIvVJDiTTJ440pU1tqYISyEVr5QZBrhOP2y6RsZnlJy7Mqh56Jw0fJkbI2yQaoc7Jh2Wsh7" +
+ "R58SXBXsalwNM9TmTeBMrc8Hghx9hDpai8agUclHTCoyK2hkEpKLlEJiXUKOE8JPugYE8yFVYF49UAjJUbsj6we3Ocii" +
+ "FXs6oXGymttSxcRksGdfUaIonkrqniea31SgiGmhCjKi0x5ZDNFS26CqSEU0FKiLJyhui8HOJCddX64Ers0VTMHppS" +
+ "ydpQX7PndzDuhT7k8Wj2kGJvKCqzVxTGCssDHoedKmMULEjUqU2EcjT5VOaCFeHKUXyP1B7qfYPtKLcgXHH5bmSgRs8gY" +
+ "2JkPOST2Vr35mNKoulUMqFeo0s1y5hcVY39a3mBMytwZn7HgPhEJScwZdWJd6E5tZ13evEmcn1A5YPBYbm91CdJFXhj" +
+ "iuqmJS71Xq4j56K35TmCJCb4jAAbcGTGEHzcCP1HKVFfsNnLqwflvHwMYQMA3EumrMn1nXnETZFdZJRHlnO8dwgnT" +
+ "ehbB2XtrpErgaFbEWfWEinoiMd4Vs7kgHzs8UiuagYyyCxmg5gEvza3CXzjUnG2lfjI6ox6EYPgXvRySHmL" +
+ "atXzj4x3CgF6j1gn10aUJknF7KQLJ84DIA5fy33YaLLbeOoGJHsdr9rQZCjaIqZKH870sslgm0tnGw5yOddnj" +
+ "FDI2KwL6UVGr3YExI1p5sGaY0Su4G30PMJsOX9ZWvRF72Lk0pVMnjVugkzsnQrbyGezZ8WN8y8kOvrysQuhTt5" +
+ "AFyMJ4kLsONE52kZsJYYyDpWw9a8BZ");
+
+ sqlPatternComplex = SqlPatternFactory.getSqlPatternMatcher(patternInfo, sb);
+ assertEquals(sqlPatternComplex.match(), 1);
+
+ // non ascii
+ sb.setLength(0);
+ sb.append("¤EÀsÆW°ê»Ú®i¶T¤¤¤ß3¼Ó®i¶TÆU2~~");
+
+ assertEquals(sqlPatternComplex.match(), 0); // should not match
+
+ patternInfo = new RegexpUtil.SqlPatternInfo(RegexpUtil.SqlPatternType.COMPLEX,".*»Ú®i¶T¤¤¤.*¼Ó®i.*ÆU2~~", "");
+ sqlPatternComplex = SqlPatternFactory.getSqlPatternMatcher(patternInfo, sb);
+ assertEquals(sqlPatternComplex.match(), 1); // should match
+
+ }
+
+}