You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jena.apache.org by an...@apache.org on 2014/01/21 21:33:07 UTC

svn commit: r1560173 - in /jena/trunk/jena-arq/src: main/java/org/apache/jena/riot/web/LangTag.java test/java/org/apache/jena/riot/web/TestLangTag.java

Author: andy
Date: Tue Jan 21 20:33:06 2014
New Revision: 1560173

URL: http://svn.apache.org/r1560173
Log:
Tidy up code and comments.
Add tests for canonical form.

Modified:
    jena/trunk/jena-arq/src/main/java/org/apache/jena/riot/web/LangTag.java
    jena/trunk/jena-arq/src/test/java/org/apache/jena/riot/web/TestLangTag.java

Modified: jena/trunk/jena-arq/src/main/java/org/apache/jena/riot/web/LangTag.java
URL: http://svn.apache.org/viewvc/jena/trunk/jena-arq/src/main/java/org/apache/jena/riot/web/LangTag.java?rev=1560173&r1=1560172&r2=1560173&view=diff
==============================================================================
--- jena/trunk/jena-arq/src/main/java/org/apache/jena/riot/web/LangTag.java (original)
+++ jena/trunk/jena-arq/src/main/java/org/apache/jena/riot/web/LangTag.java Tue Jan 21 20:33:06 2014
@@ -16,7 +16,7 @@
  * limitations under the License.
  */
 
-package org.apache.jena.riot.web;
+package org.apache.jena.riot.web ;
 
 import java.util.Locale ;
 import java.util.regex.Matcher ;
@@ -25,134 +25,130 @@ import java.util.regex.Pattern ;
 import org.apache.jena.atlas.lib.Chars ;
 import org.apache.jena.riot.system.RiotChars ;
 
-
 /**
- * Language tags: support for parsing and canonicalization of case. 
- * Grandfathered forms ("i-") are left untouched.
- * Unsupported or syntactically illegal forms are handled in
- * canonicalization by doing nothing.
+ * Language tags: support for parsing and canonicalization of case.
+ * Grandfathered forms ("i-") are left untouched. Unsupported or syntactically
+ * illegal forms are handled in canonicalization by doing nothing.
  * <ul>
- * <li>Language tags syntax: <a href="http://www.ietf.org/rfc/rfc4646.txt">RFC 4646</a></li>
- * <li>Matching Language tags: <a href="http://www.ietf.org/rfc/rfc4647.txt">RFC 4647</a></li>
- * <li>Language tags syntax: <a href="http://www.ietf.org/rfc/rfc5646.txt">RFC 5646</a></li>
+ * <li>Language tags syntax: <a href="http://www.ietf.org/rfc/rfc4646.txt">RFC
+ * 4646</a></li>
+ * <li>Matching Language tags: <a href="http://www.ietf.org/rfc/rfc4647.txt">RFC
+ * 4647</a></li>
+ * <li>Language tags syntax: <a href="http://www.ietf.org/rfc/rfc5646.txt">RFC
+ * 5646</a></li>
  * </ul>
  */
-  
-public class LangTag
-{
-    // See also http://tools.ietf.org/html/rfc5646 - irregular lang tags
-    
+
+public class LangTag {
+    // Valid language tag, not ireegular nor grandfathered.
     /** Index of the language part */
-    public static final int idxLanguage     = 0 ;
-    /** Index of the script part */ 
-    public static final int idxScript       = 1 ;
+    public static final int  idxLanguage  = 0 ;
+    /** Index of the script part */
+    public static final int  idxScript    = 1 ;
     /** Index of the region part */
-    public static final int idxRegion       = 2 ;
+    public static final int  idxRegion    = 2 ;
     /** Index of the variant part */
-    public static final int idxVariant      = 3 ;
+    public static final int  idxVariant   = 3 ;
     /** Index of all extensions */
-    public static final int idxExtension    = 4 ;
-    
-    private static final int partsLength    = 5 ;
-    
-    private LangTag(){}
-    
-    // ABNF is defined in http://www.ietf.org/rfc/rfc4234.txt
+    public static final int  idxExtension = 4 ;
+
+    private static final int partsLength  = 5 ;
+
+    private LangTag() {}
+
+    // Defined by BCP 47 which is currently RFC5646 which obsoletes RFC4646.
+
+    // Canonical forms:
+    /*
+     * RFC 4646 In this format, all non-initial two-letter subtags are
+     * uppercase, all non-initial four-letter subtags are titlecase, and all
+     * other subtags are lowercase.
+     */
+    /*
+     * RFC 5646 An implementation can reproduce this format without accessing
+     * the registry as follows. All subtags, including extension and private use
+     * subtags, use lowercase letters with two exceptions: two-letter and
+     * four-letter subtags that neither appear at the start of the tag nor occur
+     * after singletons. Such two-letter subtags are all uppercase (as in the
+     * tags "en-CA-x-ca" or "sgn-BE-FR") and four- letter subtags are titlecase
+     * (as in the tag "az-Latn-x-latn").
+     */
 
     /*
-    In this format, all non-initial two-letter subtags are uppercase, all
-    non-initial four-letter subtags are titlecase, and all other subtags
-    are lowercase.
-    */
-
-  
-  /*
-   *     <li>ABNF definition: <a href="http://www.ietf.org/rfc/rfc4234.txt">RFC 4234</a></li>
-
- Language-Tag  = langtag
-               / privateuse             ; private use tag
-               / grandfathered          ; grandfathered registrations
-
- langtag       = (language
-                  ["-" script]
-                  ["-" region]
-                  *("-" variant)
-                  *("-" extension)
-                  ["-" privateuse])
-
- language      = (2*3ALPHA [ extlang ]) ; shortest ISO 639 code
-               / 4ALPHA                 ; reserved for future use
-               / 5*8ALPHA               ; registered language subtag
-
- extlang       = *3("-" 3ALPHA)         ; reserved for future use
-
- script        = 4ALPHA                 ; ISO 15924 code
-
- region        = 2ALPHA                 ; ISO 3166 code
-               / 3DIGIT                 ; UN M.49 code
-
- variant       = 5*8alphanum            ; registered variants
-               / (DIGIT 3alphanum)
-
- extension     = singleton 1*("-" (2*8alphanum))
-
- singleton     = %x41-57 / %x59-5A / %x61-77 / %x79-7A / DIGIT
-               ; "a"-"w" / "y"-"z" / "A"-"W" / "Y"-"Z" / "0"-"9"
-               ; Single letters: x/X is reserved for private use
-
- privateuse    = ("x"/"X") 1*("-" (1*8alphanum))
-
- grandfathered = 1*3ALPHA 1*2("-" (2*8alphanum))
-                 ; grandfathered registration
-                 ; Note: i is the only singleton
-                 ; that starts a grandfathered tag
-
- alphanum      = (ALPHA / DIGIT)       ; letters and numbers
-                  
-
-   */
-
-    private static final String languageRE_1    = "(?:[a-zA-Z]{2,3}(?:-[a-zA-Z]{3}){0,3})" ; //including extlang
-    private static final String languageRE_2    = "[a-zA-Z]{4}" ;
-    private static final String languageRE_3    = "[a-zA-Z]{5,8}" ;
-    private static final String language        = "(?:"+languageRE_1+"|"+languageRE_2+"|"+languageRE_3+")" ;
-
-    private static final String script          = "[a-zA-Z]{4}" ;
-    private static final String region          = "[a-zA-Z]{2}|[0-9]{3}" ;
-    private static final String variant         = "[a-zA-Z0-9]{5,8}" ;
-    private static final String extension1      = "(?:[a-zA-Z0-9]-[a-zA-Z0-9]{2,8})" ;
-    private static final String extension       = extension1+"(?:-"+extension1+")*" ;
-    
-//    private static final String singleton = null ;
-//    private static final String privateuse = null ;
-//    private static final String grandfathered = null ;
-
-    private static final String langtag = String.format("^(%s)(?:-(%s))?(?:-(%s))?(?:-(%s))?(?:-(%s))?$"
-                                                        ,language
-                                                        ,script
-                                                        ,region
-                                                        ,variant
-                                                        ,extension
-                                                        ) ;
-    
+     * ABNF definition: <a href="http://www.ietf.org/rfc/rfc4234.txt">RFC
+     * 4234</a>
+     * 
+     * Language-Tag = langtag / privateuse ; private use tag / grandfathered ;
+     * grandfathered registrations
+     * 
+     * langtag = (language ["-" script] ["-" region]("-" variant)("-" extension)
+     * ["-" privateuse])
+     * 
+     * language = (2*3ALPHA [ extlang ]) ; shortest ISO 639 code / 4ALPHA ;
+     * reserved for future use / 5*8ALPHA ; registered language subtag
+     * 
+     * extlang = *3("-" 3ALPHA) ; reserved for future use
+     * 
+     * script = 4ALPHA ; ISO 15924 code
+     * 
+     * region = 2ALPHA ; ISO 3166 code / 3DIGIT ; UN M.49 code
+     * 
+     * variant = 5*8alphanum ; registered variants / (DIGIT 3alphanum)
+     * 
+     * extension = singleton 1*("-" (2*8alphanum))
+     * 
+     * singleton = %x41-57 / %x59-5A / %x61-77 / %x79-7A / DIGIT ; "a"-"w" /
+     * "y"-"z" / "A"-"W" / "Y"-"Z" / "0"-"9" ; Single letters: x/X is reserved
+     * for private use
+     * 
+     * privateuse = ("x"/"X") 1*("-" (1*8alphanum))
+     * 
+     * grandfathered = 1*3ALPHA 1*2("-" (2*8alphanum)) ; grandfathered
+     * registration ; Note: i is the only singleton ; that starts a
+     * grandfathered tag
+     * 
+     * alphanum = (ALPHA / DIGIT) ; letters and numbers
+     */
+
+    private static final String languageRE_1         = "(?:[a-zA-Z]{2,3}(?:-[a-zA-Z]{3}){0,3})" ;                   // including
+                                                                                                                     // extlang
+    private static final String languageRE_2         = "[a-zA-Z]{4}" ;
+    private static final String languageRE_3         = "[a-zA-Z]{5,8}" ;
+    private static final String language             = languageRE_1 + "|" + languageRE_2 + "|" + languageRE_3 ;
+
+    private static final String script               = "[a-zA-Z]{4}" ;
+    private static final String region               = "[a-zA-Z]{2}|[0-9]{3}" ;
+    private static final String variant              = "[a-zA-Z0-9]{5,8}" ;
+    private static final String extension1           = "(?:[a-zA-Z0-9]-[a-zA-Z0-9]{2,8})" ;
+    private static final String extension            = extension1 + "(?:-" + extension1 + ")*" ;
+
+    // private static final String singleton = null ;
+    // private static final String privateuse = null ;
+    // private static final String grandfathered = null ;
+
+    private static final String langtag              = String.format("^(%s)(?:-(%s))?(?:-(%s))?(?:-(%s))?(?:-(%s))?$",
+                                                                     language, script, region, variant, extension) ;
+
     // Private use forms "x-"
-    private static final String privateuseRE    = "^[xX](-[a-zA-Z0-9]{1,8})*$" ; 
-    // In general, this can look like a langtag but there are no registered forms that do so.
+    private static final String privateuseRE         = "^[xX](-[a-zA-Z0-9]{1,8})*$" ;
+    // In general, this can look like a langtag but there are no registered
+    // forms that do so.
     // This is for the "i-" forms only.
-    private static final String grandfatheredRE = "i(?:-[a-zA-Z0-9]{2,8}){1,2}" ;  
-    
-    private static Pattern pattern              = Pattern.compile(langtag) ;
-    private static Pattern patternPrivateuse    = Pattern.compile(privateuseRE) ;
-    private static Pattern patternGrandfathered = Pattern.compile(grandfatheredRE) ; 
-    
-    /** Validate - basic syntax check for a language tags: [a-zA-Z]+ ('-' [a-zA-Z0-9]+)* */
-    public static boolean check(String languageTag)
-    {
+    private static final String grandfatheredRE      = "i(?:-[a-zA-Z0-9]{2,8}){1,2}" ;
+
+    private static Pattern      pattern              = Pattern.compile(langtag) ;
+    private static Pattern      patternPrivateuse    = Pattern.compile(privateuseRE) ;
+    private static Pattern      patternGrandfathered = Pattern.compile(grandfatheredRE) ;
+
+    /**
+     * Validate - basic syntax check for a language tags: [a-zA-Z]+ ('-'
+     * [a-zA-Z0-9]+)*
+     */
+    public static boolean check(String languageTag) {
         int len = languageTag.length() ;
-        int idx = 0;
+        int idx = 0 ;
         boolean first = true ;
-        while ( idx < languageTag.length() )
-        {
+        while (idx < languageTag.length()) {
             int idx2 = checkPart(languageTag, idx, first) ;
             first = false ;
             if ( idx2 == idx )
@@ -163,26 +159,23 @@ public class LangTag
                 return true ;
             if ( languageTag.charAt(idx) != Chars.CH_DASH )
                 return false ;
-            idx ++ ;
-            if ( idx == len)
+            idx++ ;
+            if ( idx == len )
                 // trailing DASH
                 return false ;
         }
         return true ;
     }
-    
-    private static int checkPart(String languageTag, int idx, boolean leader)
-    {
-        for ( ; idx < languageTag.length() ; idx++)
-        {
+
+    private static int checkPart(String languageTag, int idx, boolean leader) {
+        for (; idx < languageTag.length(); idx++) {
             int ch = languageTag.charAt(idx) ;
-            if ( leader )
-            {
-                if ( RiotChars.isA2Z(ch) ) continue ;
-            }
-            else
-            {
-                if ( RiotChars.isA2ZN(ch) ) continue ;
+            if ( leader ) {
+                if ( RiotChars.isA2Z(ch) )
+                    continue ;
+            } else {
+                if ( RiotChars.isA2ZN(ch) )
+                    continue ;
             }
             // Not acceptable.
             return idx ;
@@ -191,168 +184,125 @@ public class LangTag
         return idx ;
     }
 
-    /** Parse a langtag string and return it's parts in canonical case.
-     *  See constants for the array contents.  Parts not present cause a null
-     *  in the return array. 
-     *  @return Langtag parts, or null if the input string does not poarse as a lang tag.  
+    /**
+     * Parse a langtag string and return it's parts in canonical case. See
+     * constants for the array contents. Parts not present cause a null in the
+     * return array.
+     * 
+     * @return Langtag parts, or null if the input string does not poarse as a
+     *         lang tag.
      */
-    public static String[] parse(String languageTag)
-    {
+    public static String[] parse(String languageTag) {
         String[] parts = new String[partsLength] ;
+
+        String x = pattern.toString() ;
+
+        Pattern.compile(languageRE_1) ;
+
         Matcher m = pattern.matcher(languageTag) ;
-        if ( ! m.find() )
-        {
+        if ( !m.find() ) {
             m = patternPrivateuse.matcher(languageTag) ;
-            if ( m.find() )
-            {
+            if ( m.find() ) {
                 // Place in the "extension" part
                 parts[idxExtension] = m.group(0) ;
                 return parts ;
             }
-                
+
             m = patternGrandfathered.matcher(languageTag) ;
-            
-            if ( m.find() )
-            {
+
+            if ( m.find() ) {
                 // Place in the "extension" part
                 parts[idxExtension] = m.group(0) ;
                 return parts ;
             }
-            
+
             // Give up.
             return null ;
         }
-            
+
         int gc = m.groupCount() ;
-        for ( int i = 0 ; i < gc ; i++ )
-            parts[i] = m.group(i+1) ;
-        
-        parts[idxLanguage]  = lowercase(parts[idxLanguage]) ;
-        parts[idxScript]    = strcase(parts[idxScript]) ;
-        parts[idxRegion]    = strcase(parts[idxRegion]) ;
-        parts[idxVariant]   = strcase(parts[idxVariant]) ;
-        //parts[idxExtension] = strcase(parts[idxExtension]) ;  // Leave extensions alone.
+        for (int i = 0; i < gc; i++)
+            parts[i] = m.group(i + 1) ;
+
+        parts[idxLanguage] = lowercase(parts[idxLanguage]) ;
+        parts[idxScript] = strcase(parts[idxScript]) ;
+        parts[idxRegion] = strcase(parts[idxRegion]) ;
+        parts[idxVariant] = strcase(parts[idxVariant]) ;
+        // parts[idxExtension] = strcase(parts[idxExtension]) ; // Leave
+        // extensions alone.
         return parts ;
     }
 
     /** Canonicalize with the rules of RFC 4646 */
-    public static String canonical(String str)
-    {
+    public static String canonical(String str) {
         if ( str == null )
             return null ;
         String[] parts = parse(str) ;
         String x = canonical(parts) ;
-        if ( x == null )
+        if ( x == null ) {
+            // Could try to apply the rule case-seeting rules
+            // even through it's not a conforming langtag.
             return str ;
+        }
         return x ;
     }
-    
-    /** Canonicalize with the rules of RFC 4646
-    "In this format, all non-initial two-letter subtags are uppercase, all
-    non-initial four-letter subtags are titlecase, and all other subtags
-    are lowercase."
-    In addition, leave extensions unchanged.
+
+    /**
+     * Canonicalize with the rules of RFC 4646 "In this format, all non-initial
+     * two-letter subtags are uppercase, all non-initial four-letter subtags are
+     * titlecase, and all other subtags are lowercase." In addition, leave
+     * extensions unchanged.
      */
-    public static String canonical(String[] parts)
-    {
+    public static String canonical(String[] parts) {
+        // We canonicalised parts on parsing.
+        // RFC 5646 is slightly different.
         if ( parts == null )
             return null ;
-        
-        if ( parts[0] == null )
-        {
+
+        if ( parts[0] == null ) {
             // Grandfathered
             return parts[idxExtension] ;
         }
 
         StringBuilder sb = new StringBuilder() ;
         sb.append(parts[0]) ;
-        for ( int i = 1 ; i < parts.length ; i++ )
-        {
-            if ( parts[i] != null )
-            {
+        for (int i = 1; i < parts.length; i++) {
+            if ( parts[i] != null ) {
                 sb.append("-") ;
                 sb.append(parts[i]) ;
             }
         }
-        return sb.toString(); 
+        return sb.toString() ;
     }
-    
-    private static String strcase(String string)
-    {
-        if ( string == null ) return null ;
-        if ( string.length() == 2 ) return  uppercase(string) ;
-        if ( string.length() == 4 ) return  titlecase(string) ;
+
+    private static String strcase(String string) {
+        if ( string == null )
+            return null ;
+        if ( string.length() == 2 )
+            return uppercase(string) ;
+        if ( string.length() == 4 )
+            return titlecase(string) ;
         return lowercase(string) ;
     }
 
-    private static String lowercase(String string)
-    {
-        if ( string == null ) return null ;
+    private static String lowercase(String string) {
+        if ( string == null )
+            return null ;
         return string.toLowerCase(Locale.ROOT) ;
     }
 
-    private static String uppercase(String string)
-    {
-        if ( string == null ) return null ;
+    private static String uppercase(String string) {
+        if ( string == null )
+            return null ;
         return string.toUpperCase(Locale.ROOT) ;
     }
 
-    private static String titlecase(String string)
-    {
-        if ( string == null ) return null ;
+    private static String titlecase(String string) {
+        if ( string == null )
+            return null ;
         char ch1 = string.charAt(0) ;
         ch1 = Character.toUpperCase(ch1) ;
         string = lowercase(string.substring(1)) ;
-        return ch1+string ;
-    }
-
-    // ----------
-    
-    public static void main(String ... args) //throws IOException
-    {
-        // Test data.
-        String[] tags = {
-            "en", "en-uk", "es-419", "zh-Hant", 
-            "sr-Latn-CS" , "sl-nedis", "sl-IT-nedis" , "sl-Latn-IT-nedis",
-            "de-CH-x-Phonebk",
-            "zh-cn-a-myExt-x-private",
-            "x-foo",
-            "x-kx-kx-kx",
-            "i-whatever",
-            "12345"} ;
-        
-        if ( args.length == 0 )
-            args = tags ;
-        
-        for ( String str : args )
-        {
-            String[] parts = LangTag.parse(str) ;
-            System.out.print("\""+str+"\"") ;
-            boolean first =true ;
-
-            if ( parts == null )
-            {
-                System.out.print("  ==>  Illegal") ;
-            }
-            else
-            {
-                String canonical = canonical(parts) ;
-                System.out.print("  ==>  \""+canonical+"\"") ;
-
-                System.out.print(" (") ;
-                for ( String s : parts )
-                {
-                    if ( ! first )
-                        System.out.print(", ") ;
-                    first = false ;
-                    if ( s == null )
-                        System.out.print("null") ;
-                    else
-                        System.out.print("\""+s+"\"") ;
-                }
-                System.out.print(")") ;
-            }
-            System.out.println() ;
-        }
+        return ch1 + string ;
     }
 }

Modified: jena/trunk/jena-arq/src/test/java/org/apache/jena/riot/web/TestLangTag.java
URL: http://svn.apache.org/viewvc/jena/trunk/jena-arq/src/test/java/org/apache/jena/riot/web/TestLangTag.java?rev=1560173&r1=1560172&r2=1560173&view=diff
==============================================================================
--- jena/trunk/jena-arq/src/test/java/org/apache/jena/riot/web/TestLangTag.java (original)
+++ jena/trunk/jena-arq/src/test/java/org/apache/jena/riot/web/TestLangTag.java Tue Jan 21 20:33:06 2014
@@ -62,16 +62,16 @@ public class TestLangTag extends BaseTes
     
     private static void parseGood(String input, String ex_output, String... ex_parts )
     {
-      String[] parts = LangTag.parse(input) ;
-      assertArrayEquals(ex_parts, parts) ;
-      
-      String output = LangTag.canonical(input) ;
-      assertEquals(ex_output, output) ;
-      
-      assertTrue(LangTag.check(input)) ;
+        String[] parts = LangTag.parse(input) ;
+        assertArrayEquals(ex_parts, parts) ;
+
+        String output = LangTag.canonical(input) ;
+        assertEquals(ex_output, output) ;
+
+        assertTrue(LangTag.check(input)) ;
     }
 
-    
+
     private static void parseBad(String input)
     {
         String[] parts = LangTag.parse(input) ;
@@ -80,5 +80,41 @@ public class TestLangTag extends BaseTes
         assertEquals(input, output) ;
         assertFalse(LangTag.check(input)) ;
     }
+    
+    private void testCanonical(String input, String ex_output) {
+        String output = LangTag.canonical(input) ;
+        assertEquals(ex_output, output) ;
+    }
 
+    // "x" extensions and irregular forms are left alone, including "sgn-be-fr" 
+
+    // Mentioned in BCP 47 tests
+//    @Test public void parseCanonical_01() { testCanonical("en-ca-x-ca","en-CA-x-ca"); }         // "x"
+//    @Test public void parseCanonical_02() { testCanonical("EN-ca-X-Ca","en-CA-x-ca"); }
+//    @Test public void parseCanonical_03() { testCanonical("En-Ca-X-Ca","en-CA-x-ca"); }
+//    @Test public void parseCanonical_04() { testCanonical("SGN-BE-FR","sgn-BE-FR"); }   // Irregular
+//    @Test public void parseCanonical_05() { testCanonical("sgn-be-fr","sgn-BE-FR"); }   // Irregular
+//    @Test public void parseCanonical_06() { testCanonical("AZ-latn-x-LATN","az-Latn-x-latn"); }
+//    @Test public void parseCanonical_07() { testCanonical("Az-latn-X-Latn","az-Latn-x-latn"); }
+    
+    @Test public void parseCanonical_10() { testCanonical("zh-hant",            "zh-Hant"); }
+    @Test public void parseCanonical_11() { testCanonical("zh-latn-wadegile",   "zh-Latn-wadegile"); }
+    @Test public void parseCanonical_12() { testCanonical("zh-latn-pinyin",     "zh-Latn-pinyin"); }
+    @Test public void parseCanonical_13() { testCanonical("en-us",              "en-US"); }
+    @Test public void parseCanonical_14() { testCanonical("EN-Gb",              "en-GB"); }
+    @Test public void parseCanonical_15() { testCanonical("qqq-002",            "qqq-002"); }
+    @Test public void parseCanonical_16() { testCanonical("ja-latn",            "ja-Latn"); }
+    @Test public void parseCanonical_17() { testCanonical("x-local",            "x-local"); }
+    @Test public void parseCanonical_18() { testCanonical("he-latn",            "he-Latn"); }
+    @Test public void parseCanonical_19() { testCanonical("und",                "und"); }
+    @Test public void parseCanonical_20() { testCanonical("nn",                 "nn"); }
+    @Test public void parseCanonical_21() { testCanonical("ko-latn",            "ko-Latn"); }
+    @Test public void parseCanonical_22() { testCanonical("ar-latn",            "ar-Latn"); }
+    @Test public void parseCanonical_23() { testCanonical("la-x-liturgic",      "la-x-liturgic"); }
+    @Test public void parseCanonical_24() { testCanonical("fa-x-middle",        "fa-x-middle"); }
+    @Test public void parseCanonical_25() { testCanonical("qqq-142",            "qqq-142"); }
+    @Test public void parseCanonical_26() { testCanonical("bnt",                "bnt"); }
+    @Test public void parseCanonical_27() { testCanonical("grc-x-liturgic",     "grc-x-liturgic"); }
+    @Test public void parseCanonical_28() { testCanonical("egy-Latn",           "egy-Latn"); }
+    @Test public void parseCanonical_29() { testCanonical("la-x-medieval",      "la-x-medieval"); }
 }