You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jena.apache.org by an...@apache.org on 2012/10/23 21:23:48 UTC

svn commit: r1401409 - in /jena/trunk/jena-arq/src: main/java/com/hp/hpl/jena/sparql/modify/UpdateEngineWorker.java main/java/org/openjena/riot/system/LangTag.java test/java/arq/qtest.java test/java/org/openjena/atlas/junit/TextListener2.java

Author: andy
Date: Tue Oct 23 19:23:48 2012
New Revision: 1401409

URL: http://svn.apache.org/viewvc?rev=1401409&view=rev
Log:
Catch parser execption more gracefully on SPARQL Update LOAD.

Added:
    jena/trunk/jena-arq/src/main/java/org/openjena/riot/system/LangTag.java
Modified:
    jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/modify/UpdateEngineWorker.java
    jena/trunk/jena-arq/src/test/java/arq/qtest.java
    jena/trunk/jena-arq/src/test/java/org/openjena/atlas/junit/TextListener2.java

Modified: jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/modify/UpdateEngineWorker.java
URL: http://svn.apache.org/viewvc/jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/modify/UpdateEngineWorker.java?rev=1401409&r1=1401408&r2=1401409&view=diff
==============================================================================
--- jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/modify/UpdateEngineWorker.java (original)
+++ jena/trunk/jena-arq/src/main/java/com/hp/hpl/jena/sparql/modify/UpdateEngineWorker.java Tue Oct 23 19:23:48 2012
@@ -28,6 +28,7 @@ import org.openjena.atlas.data.DataBag ;
 import org.openjena.atlas.data.ThresholdPolicy ;
 import org.openjena.atlas.data.ThresholdPolicyFactory ;
 import org.openjena.atlas.iterator.Iter ;
+import org.openjena.riot.RiotException ;
 import org.openjena.riot.SerializationFactoryFinder ;
 
 import com.hp.hpl.jena.graph.Graph ;
@@ -156,7 +157,10 @@ public class UpdateEngineWorker implemen
 //            }
             
             // Read into temporary model to protect against parse errors.
-            Model model = FileManager.get().loadModel(source) ;
+            Model model = null ;
+            try {
+                model = FileManager.get().loadModel(source) ;
+            } catch (RuntimeException ex) { throw new UpdateException("Failed to LOAD '"+source+"'", ex) ; }     
             Graph g = graph(graphStore, dest) ;
             g.getBulkUpdateHandler().add(model.getGraph()) ;
         } catch (RuntimeException ex)

Added: jena/trunk/jena-arq/src/main/java/org/openjena/riot/system/LangTag.java
URL: http://svn.apache.org/viewvc/jena/trunk/jena-arq/src/main/java/org/openjena/riot/system/LangTag.java?rev=1401409&view=auto
==============================================================================
--- jena/trunk/jena-arq/src/main/java/org/openjena/riot/system/LangTag.java (added)
+++ jena/trunk/jena-arq/src/main/java/org/openjena/riot/system/LangTag.java Tue Oct 23 19:23:48 2012
@@ -0,0 +1,356 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.openjena.riot.system;
+
+import java.util.regex.Matcher ;
+import java.util.regex.Pattern ;
+
+import org.openjena.atlas.lib.Chars ;
+
+
+/**
+ * Language tags: support for parsing and canonicalization of case. 
+ * Grandfathered forms ("i-") are left untouched.
+ * Unsupported or syntactically illegal forms are handled in
+ * canonicalization by doing nothing.
+ * <ul>
+ * <li>Language tags syntax: <a href="http://www.ietf.org/rfc/rfc4646.txt">RFC 4646</a></li>
+ * <li>Matching Language tags: <a href="http://www.ietf.org/rfc/rfc4647.txt">RFC 4647</a></li>
+ * <li>Language tags syntax: <a href="http://www.ietf.org/rfc/rfc5646.txt">RFC 5646</a></li>
+ * </ul>
+ */
+  
+public class LangTag
+{
+    // See also http://tools.ietf.org/html/rfc5646 - irregular lang tags
+    
+    /** Index of the language part */
+    public static final int idxLanguage     = 0 ;
+    /** Index of the script part */ 
+    public static final int idxScript       = 1 ;
+    /** Index of the region part */
+    public static final int idxRegion       = 2 ;
+    /** Index of the variant part */
+    public static final int idxVariant      = 3 ;
+    /** Index of all extensions */
+    public static final int idxExtension    = 4 ;
+    
+    private static final int partsLength    = 5 ;
+    
+    private LangTag(){}
+    
+    // ABNF is defined in http://www.ietf.org/rfc/rfc4234.txt
+
+    /*
+    In this format, all non-initial two-letter subtags are uppercase, all
+    non-initial four-letter subtags are titlecase, and all other subtags
+    are lowercase.
+    */
+
+  
+  /*
+   *     <li>ABNF definition: <a href="http://www.ietf.org/rfc/rfc4234.txt">RFC 4234</a></li>
+
+ Language-Tag  = langtag
+               / privateuse             ; private use tag
+               / grandfathered          ; grandfathered registrations
+
+ langtag       = (language
+                  ["-" script]
+                  ["-" region]
+                  *("-" variant)
+                  *("-" extension)
+                  ["-" privateuse])
+
+ language      = (2*3ALPHA [ extlang ]) ; shortest ISO 639 code
+               / 4ALPHA                 ; reserved for future use
+               / 5*8ALPHA               ; registered language subtag
+
+ extlang       = *3("-" 3ALPHA)         ; reserved for future use
+
+ script        = 4ALPHA                 ; ISO 15924 code
+
+ region        = 2ALPHA                 ; ISO 3166 code
+               / 3DIGIT                 ; UN M.49 code
+
+ variant       = 5*8alphanum            ; registered variants
+               / (DIGIT 3alphanum)
+
+ extension     = singleton 1*("-" (2*8alphanum))
+
+ singleton     = %x41-57 / %x59-5A / %x61-77 / %x79-7A / DIGIT
+               ; "a"-"w" / "y"-"z" / "A"-"W" / "Y"-"Z" / "0"-"9"
+               ; Single letters: x/X is reserved for private use
+
+ privateuse    = ("x"/"X") 1*("-" (1*8alphanum))
+
+ grandfathered = 1*3ALPHA 1*2("-" (2*8alphanum))
+                 ; grandfathered registration
+                 ; Note: i is the only singleton
+                 ; that starts a grandfathered tag
+
+ alphanum      = (ALPHA / DIGIT)       ; letters and numbers
+                  
+
+   */
+
+    private static final String languageRE_1    = "(?:[a-zA-Z]{2,3}(?:-[a-zA-Z]{3}){0,3})" ; //including extlang
+    private static final String languageRE_2    = "[a-zA-Z]{4}" ;
+    private static final String languageRE_3    = "[a-zA-Z]{5,8}" ;
+    private static final String language        = "(?:"+languageRE_1+"|"+languageRE_2+"|"+languageRE_3+")" ;
+
+    private static final String script          = "[a-zA-Z]{4}" ;
+    private static final String region          = "[a-zA-Z]{2}|[0-9]{3}" ;
+    private static final String variant         = "[a-zA-Z0-9]{5,8}" ;
+    private static final String extension1      = "(?:[a-zA-Z0-9]-[a-zA-Z0-9]{2,8})" ;
+    private static final String extension       = extension1+"(?:-"+extension1+")*" ;
+    
+//    private static final String singleton = null ;
+//    private static final String privateuse = null ;
+//    private static final String grandfathered = null ;
+
+    private static final String langtag = String.format("^(%s)(?:-(%s))?(?:-(%s))?(?:-(%s))?(?:-(%s))?$"
+                                                        ,language
+                                                        ,script
+                                                        ,region
+                                                        ,variant
+                                                        ,extension
+                                                        ) ;
+    
+    // Private use forms "x-"
+    private static final String privateuseRE    = "^[xX](-[a-zA-Z0-9]{1,8})*$" ; 
+    // In general, this can look like a langtag but there are no registered forms that do so.
+    // This is for the "i-" forms only.
+    private static final String grandfatheredRE = "i(?:-[a-zA-Z0-9]{2,8}){1,2}" ;  
+    
+    private static Pattern pattern              = Pattern.compile(langtag) ;
+    private static Pattern patternPrivateuse    = Pattern.compile(privateuseRE) ;
+    private static Pattern patternGrandfathered = Pattern.compile(grandfatheredRE) ; 
+    
+    /** Validate - basic syntax check for a language tags: [a-zA-Z]+ ('-' [a-zA-Z0-9]+)* */
+    public static boolean check(String languageTag)
+    {
+        int len = languageTag.length() ;
+        int idx = 0;
+        boolean first = true ;
+        while ( idx < languageTag.length() )
+        {
+            int idx2 = checkPart(languageTag, idx, first) ;
+            first = false ;
+            if ( idx2 == idx )
+                // zero length part.
+                return false ;
+            idx = idx2 ;
+            if ( idx == len )
+                return true ;
+            if ( languageTag.charAt(idx) != Chars.CH_DASH )
+                return false ;
+            idx ++ ;
+            if ( idx == len)
+                // trailing DASH
+                return false ;
+        }
+        return true ;
+    }
+    
+    private static int checkPart(String languageTag, int idx, boolean leader)
+    {
+        for ( ; idx < languageTag.length() ; idx++)
+        {
+            int ch = languageTag.charAt(idx) ;
+            if ( leader )
+            {
+                if ( RiotChars.isA2Z(ch) ) continue ;
+            }
+            else
+            {
+                if ( RiotChars.isA2ZN(ch) ) continue ;
+            }
+            // Not acceptable.
+            return idx ;
+        }
+        // Off end.
+        return idx ;
+    }
+
+    /** Parse a langtag string and return it's parts in canonical case.
+     *  See constants for the array contents.  Parts not present cause a null
+     *  in the return array. 
+     *  @return Langtag parts, or null if the input string does not poarse as a lang tag.  
+     */
+    public static String[] parse(String languageTag)
+    {
+        String[] parts = new String[partsLength] ;
+        Matcher m = pattern.matcher(languageTag) ;
+        if ( ! m.find() )
+        {
+            m = patternPrivateuse.matcher(languageTag) ;
+            if ( m.find() )
+            {
+                // Place in the "extension" part
+                parts[idxExtension] = m.group(0) ;
+                return parts ;
+            }
+                
+            m = patternGrandfathered.matcher(languageTag) ;
+            
+            if ( m.find() )
+            {
+                // Place in the "extension" part
+                parts[idxExtension] = m.group(0) ;
+                return parts ;
+            }
+            
+            // Give up.
+            return null ;
+        }
+            
+        int gc = m.groupCount() ;
+        for ( int i = 0 ; i < gc ; i++ )
+            parts[i] = m.group(i+1) ;
+        
+        parts[idxLanguage]  = lowercase(parts[idxLanguage]) ;
+        parts[idxScript]    = strcase(parts[idxScript]) ;
+        parts[idxRegion]    = strcase(parts[idxRegion]) ;
+        parts[idxVariant]   = strcase(parts[idxVariant]) ;
+        //parts[idxExtension] = strcase(parts[idxExtension]) ;  // Leave extensions alone.
+        return parts ;
+    }
+
+    /** Canonicalize with the rules of RFC 4646 */
+    public static String canonical(String str)
+    {
+        if ( str == null )
+            return null ;
+        String[] parts = parse(str) ;
+        String x = canonical(parts) ;
+        if ( x == null )
+            return str ;
+        return x ;
+    }
+    
+    /** Canonicalize with the rules of RFC 4646
+    "In this format, all non-initial two-letter subtags are uppercase, all
+    non-initial four-letter subtags are titlecase, and all other subtags
+    are lowercase."
+    In addition, leave extensions unchanged.
+     */
+    public static String canonical(String[] parts)
+    {
+        if ( parts == null )
+            return null ;
+        
+        if ( parts[0] == null )
+        {
+            // Grandfathered
+            return parts[idxExtension] ;
+        }
+
+        StringBuilder sb = new StringBuilder() ;
+        sb.append(parts[0]) ;
+        for ( int i = 1 ; i < parts.length ; i++ )
+        {
+            if ( parts[i] != null )
+            {
+                sb.append("-") ;
+                sb.append(parts[i]) ;
+            }
+        }
+        return sb.toString(); 
+    }
+    
+    private static String strcase(String string)
+    {
+        if ( string == null ) return null ;
+        if ( string.length() == 2 ) return  uppercase(string) ;
+        if ( string.length() == 4 ) return  titlecase(string) ;
+        return lowercase(string) ;
+    }
+
+    private static String lowercase(String string)
+    {
+        if ( string == null ) return null ;
+        return string.toLowerCase() ;
+    }
+
+    private static String uppercase(String string)
+    {
+        if ( string == null ) return null ;
+        return string.toUpperCase() ;
+    }
+
+    private static String titlecase(String string)
+    {
+        if ( string == null ) return null ;
+        char ch1 = string.charAt(0) ;
+        ch1 = Character.toUpperCase(ch1) ;
+        string = string.substring(1).toLowerCase() ;
+        return ch1+string ;
+    }
+
+    // ----------
+    
+    public static void main(String ... args) //throws IOException
+    {
+        // Test data.
+        String[] tags = {
+            "en", "en-uk", "es-419", "zh-Hant", 
+            "sr-Latn-CS" , "sl-nedis", "sl-IT-nedis" , "sl-Latn-IT-nedis",
+            "de-CH-x-Phonebk",
+            "zh-cn-a-myExt-x-private",
+            "x-foo",
+            "x-kx-kx-kx",
+            "i-whatever",
+            "12345"} ;
+        
+        if ( args.length == 0 )
+            args = tags ;
+        
+        for ( String str : args )
+        {
+            String[] parts = LangTag.parse(str) ;
+            System.out.print("\""+str+"\"") ;
+            boolean first =true ;
+
+            if ( parts == null )
+            {
+                System.out.print("  ==>  Illegal") ;
+            }
+            else
+            {
+                String canonical = canonical(parts) ;
+                System.out.print("  ==>  \""+canonical+"\"") ;
+
+                System.out.print(" (") ;
+                for ( String s : parts )
+                {
+                    if ( ! first )
+                        System.out.print(", ") ;
+                    first = false ;
+                    if ( s == null )
+                        System.out.print("null") ;
+                    else
+                        System.out.print("\""+s+"\"") ;
+                }
+                System.out.print(")") ;
+            }
+            System.out.println() ;
+        }
+    }
+}

Modified: jena/trunk/jena-arq/src/test/java/arq/qtest.java
URL: http://svn.apache.org/viewvc/jena/trunk/jena-arq/src/test/java/arq/qtest.java?rev=1401409&r1=1401408&r2=1401409&view=diff
==============================================================================
--- jena/trunk/jena-arq/src/test/java/arq/qtest.java (original)
+++ jena/trunk/jena-arq/src/test/java/arq/qtest.java Tue Oct 23 19:23:48 2012
@@ -155,8 +155,6 @@ public class qtest extends CmdARQ
             
             createEarlReport = contains(earlDecl) ;
         }
-        
-        
     }
     
     @Override

Modified: jena/trunk/jena-arq/src/test/java/org/openjena/atlas/junit/TextListener2.java
URL: http://svn.apache.org/viewvc/jena/trunk/jena-arq/src/test/java/org/openjena/atlas/junit/TextListener2.java?rev=1401409&r1=1401408&r2=1401409&view=diff
==============================================================================
--- jena/trunk/jena-arq/src/test/java/org/openjena/atlas/junit/TextListener2.java (original)
+++ jena/trunk/jena-arq/src/test/java/org/openjena/atlas/junit/TextListener2.java Tue Oct 23 19:23:48 2012
@@ -26,7 +26,6 @@ import org.junit.runner.notification.Fai
 
 public class TextListener2 extends TextListener
 {
-
     private PrintStream out ;
     int count = 0 ;