You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jena.apache.org by an...@apache.org on 2017/06/12 21:25:42 UTC

[1/2] jena git commit: JENA-1360: Improve regex compliance

Repository: jena
Updated Branches:
  refs/heads/master d64d555cc -> 6a3f439bb


JENA-1360: Improve regex compliance


Project: http://git-wip-us.apache.org/repos/asf/jena/repo
Commit: http://git-wip-us.apache.org/repos/asf/jena/commit/6baed63f
Tree: http://git-wip-us.apache.org/repos/asf/jena/tree/6baed63f
Diff: http://git-wip-us.apache.org/repos/asf/jena/diff/6baed63f

Branch: refs/heads/master
Commit: 6baed63fbb9da232194ebe402cbe06ad6576bf26
Parents: d64d555
Author: Andy Seaborne <an...@apache.org>
Authored: Fri Jun 9 17:27:22 2017 +0100
Committer: Andy Seaborne <an...@apache.org>
Committed: Fri Jun 9 17:27:22 2017 +0100

----------------------------------------------------------------------
 .../org/apache/jena/sparql/expr/E_Regex.java    | 32 +++++++---
 .../apache/jena/sparql/expr/RegexEngine.java    |  2 +-
 .../org/apache/jena/sparql/expr/RegexJava.java  | 19 +++---
 .../apache/jena/sparql/expr/RegexXerces.java    |  5 ++
 .../org/apache/jena/sparql/expr/TestRegex.java  | 63 ++++++++++++++------
 5 files changed, 84 insertions(+), 37 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/jena/blob/6baed63f/jena-arq/src/main/java/org/apache/jena/sparql/expr/E_Regex.java
----------------------------------------------------------------------
diff --git a/jena-arq/src/main/java/org/apache/jena/sparql/expr/E_Regex.java b/jena-arq/src/main/java/org/apache/jena/sparql/expr/E_Regex.java
index a4d89d4..ff37b00 100644
--- a/jena-arq/src/main/java/org/apache/jena/sparql/expr/E_Regex.java
+++ b/jena-arq/src/main/java/org/apache/jena/sparql/expr/E_Regex.java
@@ -20,14 +20,16 @@ package org.apache.jena.sparql.expr;
 
 import java.util.List ;
 
+import org.apache.commons.lang3.StringUtils ;
 import org.apache.jena.atlas.logging.Log ;
 import org.apache.jena.graph.Node ;
 import org.apache.jena.query.ARQ ;
 import org.apache.jena.sparql.expr.nodevalue.NodeFunctions ;
 import org.apache.jena.sparql.sse.Tags ;
+import org.apache.jena.sparql.util.FmtUtils ;
 import org.apache.jena.sparql.util.Symbol ;
 
-/** Indirect to the choosen regular expression implementation */
+/** Indirect to the chosen regular expression implementation */
 
 public class E_Regex extends ExprFunctionN
 {
@@ -71,14 +73,16 @@ public class E_Regex extends ExprFunctionN
     
     private void init(Expr pattern, Expr flags)
     {
-        try {
-        if ( pattern.isConstant() && pattern.getConstant().isString() && ( flags==null || flags.isConstant() ) )
-            regexEngine = makeRegexEngine(pattern.getConstant(), (flags==null)?null:flags.getConstant()) ;
-        } catch (ExprEvalException ex) {
-            // Here, we are doing static compilation of the pattern.
-            // ExprEvalException does not have a stacktrace. 
-            // We could throw a non-eval exception.
-            throw ex; //new ExprException(ex.getMessage(), ex.getCause());
+        if ( ! ARQ.isStrictMode() ) {
+            try {
+                if ( pattern.isConstant() && pattern.getConstant().isString() && ( flags==null || flags.isConstant() ) )
+                    regexEngine = makeRegexEngine(pattern.getConstant(), (flags==null)?null:flags.getConstant()) ;
+            } catch (ExprEvalException ex) {
+                // Here, we are doing static compilation of the pattern.
+                // ExprEvalException does not have a stacktrace. 
+                // We could throw a non-eval exception.
+                throw ex; //new ExprException(ex.getMessage(), ex.getCause());
+            }
         }
     }
 
@@ -117,10 +121,20 @@ public class E_Regex extends ExprFunctionN
         if ( vFlags != null && ! vFlags.isString() )
             throw new ExprException("REGEX: Pattern flags are not a string: "+vFlags) ;
         String s = (vFlags==null)?null:vFlags.getString() ;
+        checkFlags(s);
         
         return makeRegexEngine(vPattern.getString(), s) ;
     }
     
+    private static void checkFlags(String flags) {
+        if ( flags == null )
+            return;
+        // F&O spec defines regex: Can only contain s, m, i, x, q
+        // Not all are supported by all regex engines.
+        if ( ! StringUtils.containsOnly(flags, "smixq") )
+            throw new ExprEvalException("REGEX: Only 'smixq' are legal as pattern flags: got \""+FmtUtils.stringEsc(flags)+"\"");
+    }
+
     public static RegexEngine makeRegexEngine(String pattern, String flags)
     {
         if ( regexImpl.equals(ARQ.xercesRegex))

http://git-wip-us.apache.org/repos/asf/jena/blob/6baed63f/jena-arq/src/main/java/org/apache/jena/sparql/expr/RegexEngine.java
----------------------------------------------------------------------
diff --git a/jena-arq/src/main/java/org/apache/jena/sparql/expr/RegexEngine.java b/jena-arq/src/main/java/org/apache/jena/sparql/expr/RegexEngine.java
index a31c48d..5204f10 100644
--- a/jena-arq/src/main/java/org/apache/jena/sparql/expr/RegexEngine.java
+++ b/jena-arq/src/main/java/org/apache/jena/sparql/expr/RegexEngine.java
@@ -19,7 +19,7 @@
 package org.apache.jena.sparql.expr;
 
 /** Interface to abstract away from specific regular expression systsms
- *  (ORO and java.util.regex) */
+ *  (Apache Xerces and java.util.regex) */
 
 public interface RegexEngine
 {

http://git-wip-us.apache.org/repos/asf/jena/blob/6baed63f/jena-arq/src/main/java/org/apache/jena/sparql/expr/RegexJava.java
----------------------------------------------------------------------
diff --git a/jena-arq/src/main/java/org/apache/jena/sparql/expr/RegexJava.java b/jena-arq/src/main/java/org/apache/jena/sparql/expr/RegexJava.java
index 4a85b2f..d4faba2 100644
--- a/jena-arq/src/main/java/org/apache/jena/sparql/expr/RegexJava.java
+++ b/jena-arq/src/main/java/org/apache/jena/sparql/expr/RegexJava.java
@@ -42,15 +42,18 @@ public class RegexJava implements RegexEngine
     {
         try { 
             int mask = 0 ;
-            if ( flags != null )
+            if ( flags != null ) {
                 mask = makeMask(flags) ;
+                if ( flags.contains("q") )
+                    patternStr = Pattern.quote(patternStr);
+            }
+            
             return Pattern.compile(patternStr, mask) ;
         } 
         catch (PatternSyntaxException pEx)
         { throw new ExprEvalException("Regex: Pattern exception: "+pEx) ; }
     }
 
-
     public static int makeMask(String modifiers)
     {
         if ( modifiers == null )
@@ -61,17 +64,17 @@ public class RegexJava implements RegexEngine
         {
             switch(modifiers.charAt(i))
             {
-                case 'i' : 
-                    // Need both (Java 1.4)
+                case 'i': 
                     newMask |= Pattern.UNICODE_CASE ;
                     newMask |= Pattern.CASE_INSENSITIVE;
                     break ;
-                case 'm' : newMask |= Pattern.MULTILINE ;           break ;
-                case 's' : newMask |= Pattern.DOTALL ;              break ;
-                //case 'x' : newMask |= Pattern.;  break ;
+                case 'm': newMask |= Pattern.MULTILINE ;           break ;
+                case 's': newMask |= Pattern.DOTALL ;              break ;
+                //case 'x': newMask |= Pattern.; break ;
+                case 'q': ; break ;
                 
                 default: 
-                    throw new ExprEvalException("Illegal flag in regex modifiers: "+modifiers.charAt(i)) ;
+                    throw new ExprEvalException("Unsupported flag in regex modifiers: "+modifiers.charAt(i)) ;
             }
         }
         return newMask ;

http://git-wip-us.apache.org/repos/asf/jena/blob/6baed63f/jena-arq/src/main/java/org/apache/jena/sparql/expr/RegexXerces.java
----------------------------------------------------------------------
diff --git a/jena-arq/src/main/java/org/apache/jena/sparql/expr/RegexXerces.java b/jena-arq/src/main/java/org/apache/jena/sparql/expr/RegexXerces.java
index 890f4f3..b2a2a88 100644
--- a/jena-arq/src/main/java/org/apache/jena/sparql/expr/RegexXerces.java
+++ b/jena-arq/src/main/java/org/apache/jena/sparql/expr/RegexXerces.java
@@ -19,6 +19,7 @@
 package org.apache.jena.sparql.expr;
 
 import org.apache.xerces.impl.xpath.regex.ParseException ;
+import org.apache.xerces.impl.xpath.regex.REUtil ;
 import org.apache.xerces.impl.xpath.regex.RegularExpression ;
 
 public class RegexXerces implements RegexEngine
@@ -27,6 +28,9 @@ public class RegexXerces implements RegexEngine
 
     public RegexXerces(String pattern, String flags)
     {
+        if ( flags.contains("q") )
+            // Nest we can do.
+            pattern = REUtil.quoteMeta(pattern);
         regexPattern = makePattern(pattern, flags) ;
     }
     
@@ -38,6 +42,7 @@ public class RegexXerces implements RegexEngine
     
     private RegularExpression makePattern(String patternStr, String flags)
     {
+        // flag q supported above.
         // Input : only  m s i x
         // Check/modify flags.
         // Always "u", never patternStr

http://git-wip-us.apache.org/repos/asf/jena/blob/6baed63f/jena-arq/src/test/java/org/apache/jena/sparql/expr/TestRegex.java
----------------------------------------------------------------------
diff --git a/jena-arq/src/test/java/org/apache/jena/sparql/expr/TestRegex.java b/jena-arq/src/test/java/org/apache/jena/sparql/expr/TestRegex.java
index 53e883b..3fc82b9 100644
--- a/jena-arq/src/test/java/org/apache/jena/sparql/expr/TestRegex.java
+++ b/jena-arq/src/test/java/org/apache/jena/sparql/expr/TestRegex.java
@@ -18,36 +18,57 @@
 
 package org.apache.jena.sparql.expr;
 
+import java.util.Arrays;
+import java.util.Collection;
+
 import org.apache.jena.atlas.junit.BaseTest ;
 import org.apache.jena.query.ARQ ;
 import org.apache.jena.sparql.engine.binding.BindingFactory ;
-import org.apache.jena.sparql.expr.E_Regex ;
-import org.apache.jena.sparql.expr.Expr ;
-import org.apache.jena.sparql.expr.NodeValue ;
+import org.apache.jena.sparql.util.Symbol;
+import org.junit.AfterClass;
 import org.junit.BeforeClass ;
 import org.junit.Test ;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+import org.junit.runners.Parameterized.Parameters;
 
+@RunWith(Parameterized.class)
 public class TestRegex extends BaseTest
 {
+    @Parameters(name = "{index}: {0}")
+    public static Collection<Object[]> data() {
+        return Arrays.asList(new Object[][] { { "Java Regex",   ARQ.javaRegex },
+                                              { "Xerces Regex", ARQ.xercesRegex } });
+    }
+    
+    public TestRegex(String name, Symbol setting) {
+        ARQ.getContext().set(ARQ.regexImpl, setting) ;
+    }
+    
+    private static Object value;  
+    
     @BeforeClass
     public static void beforeClass() {
-        if ( false )
-            ARQ.getContext().set(ARQ.regexImpl, ARQ.xercesRegex) ;
+        value = ARQ.getContext().get(ARQ.regexImpl);
     }
     
-    @Test public void testRegex1() { regexTest("ABC", "ABC", null, true) ; }
-    @Test public void testRegex2() { regexTest("ABC", "abc", null, false) ; }
-    @Test public void testRegex3() { regexTest("ABC", "abc", "", false) ; }
-    @Test public void testRegex4() { regexTest("ABC", "abc", "i", true) ; }
-    @Test public void testRegex5() { regexTest("abc", "B", "i", true) ; }
-    @Test public void testRegex6() { regexTest("ABC", "^ABC", null, true) ; }
-    @Test public void testRegex7() { regexTest("ABC", "BC", null, true) ; }
-    @Test public void testRegex8() { regexTest("ABC", "^BC", null, false) ; }
+    @AfterClass
+    public static void afterClass() {
+        ARQ.getContext().set(ARQ.regexImpl, value);
+    }
 
-    public void regexTest(String value, String pattern, String flags, boolean expected)
-    {
+    @Test public void testRegex01() { regexTest( "ABC",  "ABC",  null,   true) ; }
+    @Test public void testRegex02() { regexTest( "ABC",  "abc",  null,   false) ; }
+    @Test public void testRegex03() { regexTest( "ABC",  "abc",  "",     false) ; }
+    @Test public void testRegex04() { regexTest( "ABC",  "abc",  "i",    true) ; }
+    @Test public void testRegex05() { regexTest( "abc",  "B",    "i",    true) ; }
+    @Test public void testRegex06() { regexTest( "ABC",  "^ABC", null,   true) ; }
+    @Test public void testRegex07() { regexTest( "ABC",  "BC",   null,   true) ; }
+    @Test public void testRegex08() { regexTest( "ABC",  "^BC",  null,   false) ; }
+    @Test public void testRegex09() { regexTest( "[[",   "[",    "q",    true) ; }
+    
+    public void regexTest(String value, String pattern, String flags, boolean expected) {
         Expr s = NodeValue.makeString(value) ;
-        
         E_Regex r = new E_Regex(s, pattern, flags) ;
         NodeValue nv = r.eval(BindingFactory.binding(), null) ;
         boolean b = nv.getBoolean() ;
@@ -55,11 +76,10 @@ public class TestRegex extends BaseTest
             fail(fmtTest(value, pattern, flags)+" ==> "+b+" expected "+expected) ;
     }
 
-    private String fmtTest(String value, String pattern, String flags)
-    {
+    private String fmtTest(String value, String pattern, String flags) {
         String tmp = "regex(\""+value+"\", \""+pattern+"\"" ;
         if ( flags != null )
-            tmp = tmp + "\""+flags+"\"" ;
+            tmp = tmp + ", \""+flags+"\"" ;
         tmp = tmp + ")" ;
         return tmp ; 
     }
@@ -71,4 +91,9 @@ public class TestRegex extends BaseTest
     // No such flag
     @Test(expected=ExprEvalException.class)
     public void testRegexErr2() { regexTest("ABC", "abc", "g", false) ; }
+    
+    // No such flag
+    @Test(expected=ExprEvalException.class)
+    public void testRegexErr3() { regexTest("ABC", "abc", "u", false) ; }
+
 }


[2/2] jena git commit: JENA-1360: Merge commit 'refs/pull/261/head' of github.com:apache/jena

Posted by an...@apache.org.
JENA-1360: Merge commit 'refs/pull/261/head' of github.com:apache/jena

This closes #261.


Project: http://git-wip-us.apache.org/repos/asf/jena/repo
Commit: http://git-wip-us.apache.org/repos/asf/jena/commit/6a3f439b
Tree: http://git-wip-us.apache.org/repos/asf/jena/tree/6a3f439b
Diff: http://git-wip-us.apache.org/repos/asf/jena/diff/6a3f439b

Branch: refs/heads/master
Commit: 6a3f439bbbd1868c53dbfc44e91f3fd4b87dfba9
Parents: d64d555 6baed63
Author: Andy Seaborne <an...@apache.org>
Authored: Mon Jun 12 22:20:07 2017 +0100
Committer: Andy Seaborne <an...@apache.org>
Committed: Mon Jun 12 22:20:07 2017 +0100

----------------------------------------------------------------------
 .../org/apache/jena/sparql/expr/E_Regex.java    | 32 +++++++---
 .../apache/jena/sparql/expr/RegexEngine.java    |  2 +-
 .../org/apache/jena/sparql/expr/RegexJava.java  | 19 +++---
 .../apache/jena/sparql/expr/RegexXerces.java    |  5 ++
 .../org/apache/jena/sparql/expr/TestRegex.java  | 63 ++++++++++++++------
 5 files changed, 84 insertions(+), 37 deletions(-)
----------------------------------------------------------------------