You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2012/07/31 22:59:01 UTC

svn commit: r1367777 [6/14] - in /lucene/dev/branches/pforcodec_3892: ./ dev-tools/ dev-tools/eclipse/ dev-tools/maven/ dev-tools/scripts/ lucene/ lucene/analysis/ lucene/analysis/common/ lucene/analysis/common/src/java/org/apache/lucene/analysis/ar/ l...

Modified: lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerImpl.jflex
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerImpl.jflex?rev=1367777&r1=1367776&r2=1367777&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerImpl.jflex (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerImpl.jflex Tue Jul 31 20:58:32 2012
@@ -39,7 +39,7 @@ import org.apache.lucene.analysis.tokena
  */
 %%
 
-%unicode 6.0
+%unicode 6.1
 %integer
 %final
 %public

Modified: lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/AbstractAnalysisFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/AbstractAnalysisFactory.java?rev=1367777&r1=1367776&r2=1367777&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/AbstractAnalysisFactory.java (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/AbstractAnalysisFactory.java Tue Jul 31 20:58:32 2012
@@ -59,7 +59,7 @@ public abstract class AbstractAnalysisFa
    * to inform user, that for this factory a {@link #luceneMatchVersion} is required */
   protected final void assureMatchVersion() {
     if (luceneMatchVersion == null) {
-      throw new InitializationException("Configuration Error: Factory '" + this.getClass().getName() +
+      throw new IllegalArgumentException("Configuration Error: Factory '" + this.getClass().getName() +
         "' needs a 'luceneMatchVersion' parameter");
     }
   }
@@ -86,7 +86,7 @@ public abstract class AbstractAnalysisFa
       if (useDefault) {
         return defaultVal;
       }
-      throw new InitializationException("Configuration Error: missing parameter '" + name + "'");
+      throw new IllegalArgumentException("Configuration Error: missing parameter '" + name + "'");
     }
     return Integer.parseInt(s);
   }
@@ -99,7 +99,7 @@ public abstract class AbstractAnalysisFa
     String s = args.get(name);
     if (s==null) {
       if (useDefault) return defaultVal;
-      throw new InitializationException("Configuration Error: missing parameter '" + name + "'");
+      throw new IllegalArgumentException("Configuration Error: missing parameter '" + name + "'");
     }
     return Boolean.parseBoolean(s);
   }
@@ -108,11 +108,11 @@ public abstract class AbstractAnalysisFa
     try {
       String pat = args.get(name);
       if (null == pat) {
-        throw new InitializationException("Configuration Error: missing parameter '" + name + "'");
+        throw new IllegalArgumentException("Configuration Error: missing parameter '" + name + "'");
       }
       return Pattern.compile(args.get(name));
     } catch (PatternSyntaxException e) {
-      throw new InitializationException
+      throw new IllegalArgumentException
         ("Configuration Error: '" + name + "' can not be parsed in " +
          this.getClass().getSimpleName(), e);
     }
@@ -129,13 +129,17 @@ public abstract class AbstractAnalysisFa
       words = new CharArraySet(luceneMatchVersion,
           files.size() * 10, ignoreCase);
       for (String file : files) {
-        List<String> wlist = loader.getLines(file.trim());
+        List<String> wlist = getLines(loader, file.trim());
         words.addAll(StopFilter.makeStopSet(luceneMatchVersion, wlist,
             ignoreCase));
       }
     }
     return words;
   }
+  
+  protected List<String> getLines(ResourceLoader loader, String resource) throws IOException {
+    return WordlistLoader.getLines(loader.openResource(resource), IOUtils.CHARSET_UTF_8);
+  }
 
   /** same as {@link #getWordSet(ResourceLoader, String, boolean)},
    * except the input is in snowball format. */

Modified: lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharFilterFactory.java?rev=1367777&r1=1367776&r2=1367777&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharFilterFactory.java (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharFilterFactory.java Tue Jul 31 20:58:32 2012
@@ -18,6 +18,7 @@ package org.apache.lucene.analysis.util;
  */
 
 import java.io.Reader;
+import java.util.Set;
 
 import org.apache.lucene.analysis.CharFilter;
 
@@ -27,5 +28,39 @@ import org.apache.lucene.analysis.CharFi
  */
 public abstract class CharFilterFactory extends AbstractAnalysisFactory {
 
-  public abstract CharFilter create(Reader input);
+  private static final AnalysisSPILoader<CharFilterFactory> loader =
+      new AnalysisSPILoader<CharFilterFactory>(CharFilterFactory.class);
+  
+  /** looks up a charfilter by name from context classpath */
+  public static CharFilterFactory forName(String name) {
+    return loader.newInstance(name);
+  }
+  
+  /** looks up a charfilter class by name from context classpath */
+  public static Class<? extends CharFilterFactory> lookupClass(String name) {
+    return loader.lookupClass(name);
+  }
+  
+  /** returns a list of all available charfilter names */
+  public static Set<String> availableCharFilters() {
+    return loader.availableServices();
+  }
+
+  /** 
+   * Reloads the factory list from the given {@link ClassLoader}.
+   * Changes to the factories are visible after the method ends, all
+   * iterators ({@link #availableCharFilters()},...) stay consistent. 
+   * 
+   * <p><b>NOTE:</b> Only new factories are added, existing ones are
+   * never removed or replaced.
+   * 
+   * <p><em>This method is expensive and should only be called for discovery
+   * of new factories on the given classpath/classloader!</em>
+   */
+  public static void reloadCharFilters(ClassLoader classloader) {
+    loader.reload(classloader);
+  }
+
+  /** Wraps the given Reader with a CharFilter. */
+  public abstract Reader create(Reader input);
 }

Modified: lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/ResourceLoader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/ResourceLoader.java?rev=1367777&r1=1367776&r2=1367777&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/ResourceLoader.java (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/ResourceLoader.java Tue Jul 31 20:58:32 2012
@@ -19,29 +19,20 @@ package org.apache.lucene.analysis.util;
 
 import java.io.IOException;
 import java.io.InputStream;
-import java.util.List;
 
 /**
  * Abstraction for loading resources (streams, files, and classes).
  */
 public interface ResourceLoader {
 
+  /**
+   * Opens a named resource
+   */
   public InputStream openResource(String resource) throws IOException;
   
   /**
-   * Accesses a resource by name and returns the (non comment) lines
-   * containing data.
-   *
-   * <p>
-   * A comment line is any line that starts with the character "#"
-   * </p>
-   *
-   * @param resource
-   * @return a list of non-blank non-comment lines with whitespace trimmed
-   * from front and back.
-   * @throws IOException
+   * Creates a class of the name and expected type
    */
-  public List<String> getLines(String resource) throws IOException;
-  
-  public <T> T newInstance(String cname, Class<T> expectedType, String ... subpackages);
+  // TODO: fix exception handling
+  public <T> T newInstance(String cname, Class<T> expectedType);
 }
\ No newline at end of file

Modified: lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/ResourceLoaderAware.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/ResourceLoaderAware.java?rev=1367777&r1=1367776&r2=1367777&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/ResourceLoaderAware.java (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/ResourceLoaderAware.java Tue Jul 31 20:58:32 2012
@@ -17,6 +17,8 @@
 
 package org.apache.lucene.analysis.util;
 
+import java.io.IOException;
+
 /**
  * Interface for a component that needs to be initialized by
  * an implementation of {@link ResourceLoader}.
@@ -25,5 +27,5 @@ package org.apache.lucene.analysis.util;
  */
 public interface ResourceLoaderAware {
 
-  void inform(ResourceLoader loader);
+  void inform(ResourceLoader loader) throws IOException;
 }

Modified: lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/TokenFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/TokenFilterFactory.java?rev=1367777&r1=1367776&r2=1367777&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/TokenFilterFactory.java (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/TokenFilterFactory.java Tue Jul 31 20:58:32 2012
@@ -17,6 +17,8 @@ package org.apache.lucene.analysis.util;
  * limitations under the License.
  */
 
+import java.util.Set;
+
 import org.apache.lucene.analysis.TokenStream;
 
 /**
@@ -25,6 +27,40 @@ import org.apache.lucene.analysis.TokenS
  */
 public abstract class TokenFilterFactory extends AbstractAnalysisFactory {
 
+  private static final AnalysisSPILoader<TokenFilterFactory> loader =
+      new AnalysisSPILoader<TokenFilterFactory>(TokenFilterFactory.class,
+          new String[] { "TokenFilterFactory", "FilterFactory" });
+  
+  /** looks up a tokenfilter by name from context classpath */
+  public static TokenFilterFactory forName(String name) {
+    return loader.newInstance(name);
+  }
+  
+  /** looks up a tokenfilter class by name from context classpath */
+  public static Class<? extends TokenFilterFactory> lookupClass(String name) {
+    return loader.lookupClass(name);
+  }
+  
+  /** returns a list of all available tokenfilter names from context classpath */
+  public static Set<String> availableTokenFilters() {
+    return loader.availableServices();
+  }
+  
+  /** 
+   * Reloads the factory list from the given {@link ClassLoader}.
+   * Changes to the factories are visible after the method ends, all
+   * iterators ({@link #availableTokenFilters()},...) stay consistent. 
+   * 
+   * <p><b>NOTE:</b> Only new factories are added, existing ones are
+   * never removed or replaced.
+   * 
+   * <p><em>This method is expensive and should only be called for discovery
+   * of new factories on the given classpath/classloader!</em>
+   */
+  public static void reloadTokenFilters(ClassLoader classloader) {
+    loader.reload(classloader);
+  }
+
   /** Transform the specified input TokenStream */
   public abstract TokenStream create(TokenStream input);
 }

Modified: lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/TokenizerFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/TokenizerFactory.java?rev=1367777&r1=1367776&r2=1367777&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/TokenizerFactory.java (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/TokenizerFactory.java Tue Jul 31 20:58:32 2012
@@ -20,6 +20,7 @@ package org.apache.lucene.analysis.util;
 import org.apache.lucene.analysis.Tokenizer;
 
 import java.io.Reader;
+import java.util.Set;
 
 /**
  * Abstract parent class for analysis factories that create {@link Tokenizer}
@@ -27,6 +28,39 @@ import java.io.Reader;
  */
 public abstract class TokenizerFactory extends AbstractAnalysisFactory {
 
+  private static final AnalysisSPILoader<TokenizerFactory> loader =
+      new AnalysisSPILoader<TokenizerFactory>(TokenizerFactory.class);
+  
+  /** looks up a tokenizer by name from context classpath */
+  public static TokenizerFactory forName(String name) {
+    return loader.newInstance(name);
+  }
+  
+  /** looks up a tokenizer class by name from context classpath */
+  public static Class<? extends TokenizerFactory> lookupClass(String name) {
+    return loader.lookupClass(name);
+  }
+  
+  /** returns a list of all available tokenizer names from context classpath */
+  public static Set<String> availableTokenizers() {
+    return loader.availableServices();
+  }
+  
+  /** 
+   * Reloads the factory list from the given {@link ClassLoader}.
+   * Changes to the factories are visible after the method ends, all
+   * iterators ({@link #availableTokenizers()},...) stay consistent. 
+   * 
+   * <p><b>NOTE:</b> Only new factories are added, existing ones are
+   * never removed or replaced.
+   * 
+   * <p><em>This method is expensive and should only be called for discovery
+   * of new factories on the given classpath/classloader!</em>
+   */
+  public static void reloadTokenizers(ClassLoader classloader) {
+    loader.reload(classloader);
+  }
+
   /** Creates a TokenStream of the specified input */
   public abstract Tokenizer create(Reader input);
 }

Modified: lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/WordlistLoader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/WordlistLoader.java?rev=1367777&r1=1367776&r2=1367777&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/WordlistLoader.java (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/WordlistLoader.java Tue Jul 31 20:58:32 2012
@@ -19,7 +19,11 @@ package org.apache.lucene.analysis.util;
 
 import java.io.BufferedReader;
 import java.io.IOException;
+import java.io.InputStream;
 import java.io.Reader;
+import java.nio.charset.Charset;
+import java.util.ArrayList;
+import java.util.List;
 
 import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.Version;
@@ -194,6 +198,47 @@ public class WordlistLoader {
     return result;
   }
   
+  /**
+   * Accesses a resource by name and returns the (non comment) lines containing
+   * data using the given character encoding.
+   *
+   * <p>
+   * A comment line is any line that starts with the character "#"
+   * </p>
+   *
+   * @return a list of non-blank non-comment lines with whitespace trimmed
+   * @throws IOException
+   */
+  public static List<String> getLines(InputStream stream, Charset charset) throws IOException{
+    BufferedReader input = null;
+    ArrayList<String> lines;
+    boolean success = false;
+    try {
+      input = getBufferedReader(IOUtils.getDecodingReader(stream, charset));
+
+      lines = new ArrayList<String>();
+      for (String word=null; (word=input.readLine())!=null;) {
+        // skip initial bom marker
+        if (lines.isEmpty() && word.length() > 0 && word.charAt(0) == '\uFEFF')
+          word = word.substring(1);
+        // skip comments
+        if (word.startsWith("#")) continue;
+        word=word.trim();
+        // skip blank lines
+        if (word.length()==0) continue;
+        lines.add(word);
+      }
+      success = true;
+      return lines;
+    } finally {
+      if (success) {
+        IOUtils.close(input);
+      } else {
+        IOUtils.closeWhileHandlingException(input);
+      }
+    }
+  }
+  
   private static BufferedReader getBufferedReader(Reader reader) {
     return (reader instanceof BufferedReader) ? (BufferedReader) reader
         : new BufferedReader(reader);

Modified: lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.java?rev=1367777&r1=1367776&r2=1367777&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.java (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.java Tue Jul 31 20:58:32 2012
@@ -1,4 +1,4 @@
-/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 08.07.12 17:00 */
+/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 7/15/12 1:57 AM */
 
 package org.apache.lucene.analysis.wikipedia;
 
@@ -25,8 +25,8 @@ import org.apache.lucene.analysis.tokena
 /**
  * This class is a scanner generated by 
  * <a href="http://www.jflex.de/">JFlex</a> 1.5.0-SNAPSHOT
- * on 08.07.12 17:00 from the specification file
- * <tt>C:/Users/Uwe Schindler/Projects/lucene/lucene4199/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.jflex</tt>
+ * on 7/15/12 1:57 AM from the specification file
+ * <tt>C:/cygwin/home/s/svn/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.jflex</tt>
  */
 class WikipediaTokenizerImpl {
 

Modified: lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestClassicAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestClassicAnalyzer.java?rev=1367777&r1=1367776&r2=1367777&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestClassicAnalyzer.java (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestClassicAnalyzer.java Tue Jul 31 20:58:32 2012
@@ -285,8 +285,7 @@ public class TestClassicAnalyzer extends
     DocsAndPositionsEnum tps = MultiFields.getTermPositionsEnum(reader,
                                                                 MultiFields.getLiveDocs(reader),
                                                                 "content",
-                                                                new BytesRef("another"),
-                                                                false);
+                                                                new BytesRef("another"));
     assertTrue(tps.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
     assertEquals(1, tps.freq());
     assertEquals(3, tps.nextPosition());

Modified: lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestKeywordAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestKeywordAnalyzer.java?rev=1367777&r1=1367776&r2=1367777&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestKeywordAnalyzer.java (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestKeywordAnalyzer.java Tue Jul 31 20:58:32 2012
@@ -103,7 +103,7 @@ public class TestKeywordAnalyzer extends
                                  new BytesRef("Q36"),
                                  MultiFields.getLiveDocs(reader),
                                  null,
-                                 false);
+                                 0);
     assertTrue(td.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
     td = _TestUtil.docs(random(),
                         reader,
@@ -111,7 +111,7 @@ public class TestKeywordAnalyzer extends
                         new BytesRef("Q37"),
                         MultiFields.getLiveDocs(reader),
                         null,
-                        false);
+                        0);
     assertTrue(td.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
   }
 

Modified: lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java?rev=1367777&r1=1367776&r2=1367777&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java Tue Jul 31 20:58:32 2012
@@ -235,7 +235,7 @@ public class TestRandomChains extends Ba
   private static <T> Constructor<T> castConstructor(Class<T> instanceClazz, Constructor<?> ctor) {
     return (Constructor<T>) ctor;
   }
-  private static void getClassesForPackage(String pckgname, List<Class<?>> classes) throws Exception {
+  static void getClassesForPackage(String pckgname, List<Class<?>> classes) throws Exception {
     final ClassLoader cld = TestRandomChains.class.getClassLoader();
     final String path = pckgname.replace('.', '/');
     final Enumeration<URL> resources = cld.getResources(path);

Modified: lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStandardAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStandardAnalyzer.java?rev=1367777&r1=1367776&r2=1367777&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStandardAnalyzer.java (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStandardAnalyzer.java Tue Jul 31 20:58:32 2012
@@ -202,7 +202,7 @@ public class TestStandardAnalyzer extend
   }
   
   public void testUnicodeWordBreaks() throws Exception {
-    WordBreakTestUnicode_6_0_0 wordBreakTest = new WordBreakTestUnicode_6_0_0();
+    WordBreakTestUnicode_6_1_0 wordBreakTest = new WordBreakTestUnicode_6_1_0();
     wordBreakTest.test(a);
   }
   

Modified: lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestUAX29URLEmailTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestUAX29URLEmailTokenizer.java?rev=1367777&r1=1367776&r2=1367777&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestUAX29URLEmailTokenizer.java (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestUAX29URLEmailTokenizer.java Tue Jul 31 20:58:32 2012
@@ -424,7 +424,7 @@ public class TestUAX29URLEmailTokenizer 
   }
 
   public void testUnicodeWordBreaks() throws Exception {
-    WordBreakTestUnicode_6_0_0 wordBreakTest = new WordBreakTestUnicode_6_0_0();
+    WordBreakTestUnicode_6_1_0 wordBreakTest = new WordBreakTestUnicode_6_1_0();
     wordBreakTest.test(a);
   }
   

Modified: lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/HunspellStemFilterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/HunspellStemFilterTest.java?rev=1367777&r1=1367776&r2=1367777&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/HunspellStemFilterTest.java (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/HunspellStemFilterTest.java Tue Jul 31 20:58:32 2012
@@ -30,6 +30,7 @@ import org.apache.lucene.analysis.Tokeni
 import org.apache.lucene.analysis.core.KeywordTokenizer;
 import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
 import org.apache.lucene.analysis.util.CharArraySet;
+import org.junit.AfterClass;
 import org.junit.BeforeClass;
 
 public class HunspellStemFilterTest  extends BaseTokenStreamTestCase {
@@ -39,6 +40,10 @@ public class HunspellStemFilterTest  ext
   public static void beforeClass() throws IOException, ParseException {
     DICTIONARY = createDict(true);
   }
+  @AfterClass
+  public static void afterClass() {
+    DICTIONARY = null;
+  }
   public static HunspellDictionary createDict(boolean ignoreCase) throws IOException, ParseException {
     InputStream affixStream = HunspellStemmerTest.class.getResourceAsStream("test.aff");
     InputStream dictStream = HunspellStemmerTest.class.getResourceAsStream("test.dic");

Modified: lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/HunspellStemmerTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/HunspellStemmerTest.java?rev=1367777&r1=1367776&r2=1367777&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/HunspellStemmerTest.java (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/HunspellStemmerTest.java Tue Jul 31 20:58:32 2012
@@ -19,6 +19,7 @@ package org.apache.lucene.analysis.hunsp
 
 import org.apache.lucene.util.LuceneTestCase;
 import org.apache.lucene.util.Version;
+import org.junit.AfterClass;
 import org.junit.BeforeClass;
 import org.junit.Test;
 
@@ -37,6 +38,11 @@ public class HunspellStemmerTest extends
   public static void beforeClass() throws IOException, ParseException {
     createStemmer(true);
   }
+  
+  @AfterClass
+  public static void afterClass() {
+    stemmer = null;
+  }
 
   @Test
   public void testStem_simpleSuffix() {

Modified: lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/test/org/apache/lucene/analysis/sinks/TestTeeSinkTokenFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/test/org/apache/lucene/analysis/sinks/TestTeeSinkTokenFilter.java?rev=1367777&r1=1367776&r2=1367777&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/test/org/apache/lucene/analysis/sinks/TestTeeSinkTokenFilter.java (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/test/org/apache/lucene/analysis/sinks/TestTeeSinkTokenFilter.java Tue Jul 31 20:58:32 2012
@@ -111,7 +111,7 @@ public class TestTeeSinkTokenFilter exte
     TermsEnum termsEnum = vector.iterator(null);
     termsEnum.next();
     assertEquals(2, termsEnum.totalTermFreq());
-    DocsAndPositionsEnum positions = termsEnum.docsAndPositions(null, null, true);
+    DocsAndPositionsEnum positions = termsEnum.docsAndPositions(null, null);
     assertTrue(positions.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
     assertEquals(2, positions.freq());
     positions.nextPosition();

Modified: lucene/dev/branches/pforcodec_3892/lucene/analysis/icu/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/analysis/icu/build.xml?rev=1367777&r1=1367776&r2=1367777&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/analysis/icu/build.xml (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/analysis/icu/build.xml Tue Jul 31 20:58:32 2012
@@ -26,7 +26,7 @@
   <import file="../analysis-module-build.xml"/>
 
   <path id="icujar">
-     <pathelement location="lib/icu4j-4.8.1.1.jar"/>
+     <pathelement location="lib/icu4j-49.1.jar"/>
   </path>
 
   <path id="classpath">
@@ -37,19 +37,32 @@
 
   <target name="compile-core" depends="jar-analyzers-common, common.compile-core" />
 
-  <property name="gennorm2.src.dir" value="src/data/utr30"/>
-  <property name="gennorm2.src.files" 
-  	value="nfkc.txt nfkc_cf.txt BasicFoldings.txt DiacriticFolding.txt DingbatFolding.txt HanRadicalFolding.txt NativeDigitFolding.txt"/>
+  <property name="utr30.data.dir" location="src/data/utr30"/>
+  <target name="gen-utr30-data-files" depends="compile-tools">
+    <java
+        classname="org.apache.lucene.analysis.icu.GenerateUTR30DataFiles"
+        dir="${utr30.data.dir}"
+        fork="true"
+        failonerror="true">
+      <classpath>
+        <path refid="icujar"/>
+        <pathelement location="${build.dir}/classes/tools"/>
+      </classpath>
+    </java>
+  </target>
+
+  <property name="gennorm2.src.files"
+  	value="nfc.txt nfkc.txt nfkc_cf.txt BasicFoldings.txt DiacriticFolding.txt DingbatFolding.txt HanRadicalFolding.txt NativeDigitFolding.txt"/>
   <property name="gennorm2.tmp" value="${build.dir}/gennorm2/utr30.tmp"/>
   <property name="gennorm2.dst" value="src/resources/org/apache/lucene/analysis/icu/utr30.nrm"/>
-  <target name="gennorm2">
+  <target name="gennorm2" depends="gen-utr30-data-files">
     <echo>Note that the gennorm2 and icupkg tools must be on your PATH. These tools
 are part of the ICU4C package. See http://site.icu-project.org/ </echo>
     <mkdir dir="${build.dir}/gennorm2"/>
     <exec executable="gennorm2" failonerror="true">
       <arg value="-v"/>
       <arg value="-s"/>
-      <arg value="${gennorm2.src.dir}"/>
+      <arg value="${utr30.data.dir}"/>
       <arg line="${gennorm2.src.files}"/>
       <arg value="-o"/>
       <arg value="${gennorm2.tmp}"/>

Modified: lucene/dev/branches/pforcodec_3892/lucene/analysis/icu/ivy.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/analysis/icu/ivy.xml?rev=1367777&r1=1367776&r2=1367777&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/analysis/icu/ivy.xml (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/analysis/icu/ivy.xml Tue Jul 31 20:58:32 2012
@@ -19,7 +19,7 @@
 <ivy-module version="2.0">
     <info organisation="org.apache.lucene" module="analyzers-icu"/>
     <dependencies>
-      <dependency org="com.ibm.icu" name="icu4j" rev="4.8.1.1" transitive="false"/>
+      <dependency org="com.ibm.icu" name="icu4j" rev="49.1" transitive="false"/>
       <exclude org="*" ext="*" matcher="regexp" type="${ivy.exclude.types}"/> 
     </dependencies>
 </ivy-module>

Modified: lucene/dev/branches/pforcodec_3892/lucene/analysis/icu/src/data/utr30/BasicFoldings.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/analysis/icu/src/data/utr30/BasicFoldings.txt?rev=1367777&r1=1367776&r2=1367777&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/analysis/icu/src/data/utr30/BasicFoldings.txt (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/analysis/icu/src/data/utr30/BasicFoldings.txt Tue Jul 31 20:58:32 2012
@@ -20,67 +20,96 @@
 
 ### Custom Normalization mappings for UTR#30 
 ### (http://www.unicode.org/reports/tr30/tr30-4.html)
-###
-### Created from Unicode 5.2 UCD
-###
+
+#### WARNING ####
+#### Rule: lines direct content generation.
+#### All non-comments will be REMOVED when this file's contents
+#### are generated by 'ant gen-utr30-data-files'.
+#### Use "# Rule: verbatim" to keep non-comments up until
+#### the next "# Rule:" line.
+#### WARNING ####
 
 ## Accent removal
 # See DiacriticFolding.txt
+
 ## Case Folding (done by cf)
+
 ## Canonical Duplicates Folding (done by cd)
+
 ## Dashes folding
-# [[:Dash:][:Pd:]]-2053(swung dash) > U+002D
+# Rule: [[[[:Dash:][:Pd:]]-[\u2053\uFE31\uFE32]] - [\u002D]] > 002D
 058A>002D
 05BE>002D
 1400>002D
 1806>002D
 2010..2015>002D
+207B>002D
+208B>002D
+2212>002D
 2E17>002D
 2E1A>002D
+2E3A..2E3B>002D
 301C>002D
 3030>002D
 30A0>002D
-#2053>002D
-2212>002D
-# FE31,FE32,FE58,FE63,FF0D done by kd
+FE58>002D
+FE63>002D
+FF0D>002D
 
 ## Greek letterforms folding (done by kd)
+
 ## Hebrew alternates folding (done by kd)
+
 ## Jamo folding (done by kd)
+
 ## Math symbol folding (done by kd)
+
 ## Native digit folding
 # See NativeDigitFolding.txt
+
 ## Nobreak folding (done by kd)
-## Overline Folding
-FE49..FE4C>203E
+
+## Overline Folding (done by kd)
+
 ## Positional forms folding (done by kd)
+
 ## Small forms folding (done by kd)
+
 ## Space Folding
-# [:Zs:] > U+0020
+# Rule: [[:Zs:] - [:Changes_When_NFKC_Casefolded=Yes:] - [\u0020]] > 0020
 1680>0020
 180E>0020
-# 00A0, 2000..200A,202F,205F,3000 done by kd
+
 ## Spacing Accents folding (done by kd)
+
 ## Subscript folding (done by kd)
+
 ## Symbol folding (done by kd)
+
 ## Underline Folding
+# Rule: verbatim
 2017>005E
 FE4D..FE4F>005E
+
 ## Diacritic Folding
-#
+# See DiacriticFolding.txt
 
 ## Vertical forms folding (done by kd)
+
 ## Han Radical Folding
 # See HanRadicalFolding.txt
+
 ## Letter Form Folding (done by kd)
 ## Superscript folding
 # Additions to kd:
+# Rule: verbatim
 02C0>0294
 02C1>0295
 06E5>0648
 06E6>064A
 ## Suzhou Numeral Folding
 # Additions to kd:
+# Rule: verbatim
 3021>4E00
 3022>4E8C
 3023>4E09
@@ -92,6 +121,7 @@ FE4D..FE4F>005E
 3029>4E5D
 ## Width Folding (done by kd)
 # Punctuation Folding
+# Rule: verbatim
 00AB>0022
 00BB>0022
 201C..201E>0022

Modified: lucene/dev/branches/pforcodec_3892/lucene/analysis/icu/src/data/utr30/DiacriticFolding.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/analysis/icu/src/data/utr30/DiacriticFolding.txt?rev=1367777&r1=1367776&r2=1367777&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/analysis/icu/src/data/utr30/DiacriticFolding.txt (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/analysis/icu/src/data/utr30/DiacriticFolding.txt Tue Jul 31 20:58:32 2012
@@ -24,41 +24,45 @@
 ### Created from Unicode 5.2 UCD
 ###
 
-# Removes diacritics, as defined by [:Diacritic:]
-# These may or may not be combining marks
+#### WARNING ####
+#### Rule: lines direct content generation.
+#### All non-comments will be REMOVED when this file's contents
+#### are generated by 'ant gen-utr30-data-files'.
+#### Use "# Rule: verbatim" to keep non-comments up until
+#### the next "# Rule:" line.
+#### WARNING ####
+
+## Remove diacritics
+# Rule: [:Diacritic:] >
 005E>
 0060>
-00B7>
-02B9..02D7>
-02DE>
-02DF>
-02E5..033F>
-0342>
-0346..034E>
+00A8>
+00AF>
+00B4>
+00B7..00B8>
+02B0..034E>
 0350..0357>
 035D..0362>
-0375>
+0374..0375>
+037A>
+0384..0385>
 0483..0487>
 0559>
 0591..05A1>
 05A3..05BD>
 05BF>
-05C1>
-05C2>
+05C1..05C2>
 05C4>
 064B..0652>
-0657>
-0658>
-06DF>
-06E0>
-06E5>
-06E6>
+0657..0658>
+06DF..06E0>
+06E5..06E6>
 06EA..06EC>
 0730..074A>
 07A6..07B0>
 07EB..07F5>
-0818>
-0819>
+0818..0819>
+08E4..08FE>
 093C>
 094D>
 0951..0954>
@@ -80,24 +84,19 @@
 0E47..0E4C>
 0E4E>
 0EC8..0ECC>
-0F18>
-0F19>
+0F18..0F19>
 0F35>
 0F37>
 0F39>
-0F3E>
-0F3F>
+0F3E..0F3F>
 0F82..0F84>
-0F86>
-0F87>
+0F86..0F87>
 0FC6>
 1037>
-1039>
-103A>
+1039..103A>
 1087..108D>
 108F>
-109A>
-109B>
+109A..109B>
 17C9..17D3>
 17DD>
 1939..193B>
@@ -106,31 +105,33 @@
 1B34>
 1B44>
 1B6B..1B73>
-1BAA>
-1C36>
-1C37>
+1BAA..1BAB>
+1C36..1C37>
 1C78..1C7D>
 1CD0..1CE8>
 1CED>
-1D2F>
-1D3B>
-1D4E>
+1CF4>
+1D2C..1D6A>
 1DC4..1DCF>
 1DFD..1DFF>
+1FBD>
+1FBF..1FC1>
+1FCD..1FCF>
+1FDD..1FDF>
+1FED..1FEF>
+1FFD..1FFE>
 2CEF..2CF1>
 2E2F>
 302A..302F>
-3099>
-309A>
+3099..309C>
 30FC>
 A66F>
-A67C>
-A67D>
+A67C..A67D>
 A67F>
-A6F0>
-A6F1>
+A6F0..A6F1>
 A717..A721>
 A788>
+A7F8..A7F9>
 A8C4>
 A8E0..A8F1>
 A92B..A92E>
@@ -139,12 +140,20 @@ A9B3>
 A9C0>
 AA7B>
 AABF..AAC2>
-ABEC>
-ABED>
+AAF6>
+ABEC..ABED>
 FB1E>
 FE20..FE26>
-110B9>
-110BA>
+FF3E>
+FF40>
+FF70>
+FF9E..FF9F>
+FFE3>
+110B9..110BA>
+11133..11134>
+111C0>
+116B6..116B7>
+16F8F..16F9F>
 1D167..1D169>
 1D16D..1D172>
 1D17B..1D182>
@@ -153,6 +162,7 @@ FE20..FE26>
 
 # Latin script "composed" that do not further decompose, so decompose here
 # These are from AsciiFoldingFilter
+# Rule: verbatim
 00E6>0061 0065
 00F0>0064
 00F8>006F
@@ -491,6 +501,7 @@ A7FF>004D
 
 # Cyrillic script "composed" that do not further decompose, so decompose here
 # These are from UTR#30 DiacriticFolding.txt
+# Rule: verbatim
 
 047D>0461
 048B>0439
@@ -520,6 +531,7 @@ A7FF>004D
 04CE>043C
 
 # Additional signs and diacritic, from examination of [:Mark:]&[:Lm:]
+# Rule: verbatim
 0358..035C>
 05A2>
 05C5>
@@ -555,6 +567,7 @@ A802>
 1D242..1D244>
 
 # Additional Arabic/Hebrew decompositions
+# Rule: verbatim
 05F3>0027
 05F4>0022
 0629>0647

Modified: lucene/dev/branches/pforcodec_3892/lucene/analysis/icu/src/data/utr30/DingbatFolding.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/analysis/icu/src/data/utr30/DingbatFolding.txt?rev=1367777&r1=1367776&r2=1367777&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/analysis/icu/src/data/utr30/DingbatFolding.txt (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/analysis/icu/src/data/utr30/DingbatFolding.txt Tue Jul 31 20:58:32 2012
@@ -24,8 +24,17 @@
 ### Created from Unicode 5.2 UCD
 ###
 
+#### WARNING ####
+#### Rule: lines direct content generation.
+#### All non-comments will be REMOVED when this file's contents
+#### are generated by 'ant gen-utr30-data-files'.
+#### Use "# Rule: verbatim" to keep non-comments up until
+#### the next "# Rule:" line.
+#### WARNING ####
+
 # Folds dingbats and other adorned forms
 # Generated from ASCIIFoldingFilter
+# Rule: verbatim
 24EB>0031 0031
 24EC>0031 0032
 24ED>0031 0033

Modified: lucene/dev/branches/pforcodec_3892/lucene/analysis/icu/src/data/utr30/HanRadicalFolding.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/analysis/icu/src/data/utr30/HanRadicalFolding.txt?rev=1367777&r1=1367776&r2=1367777&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/analysis/icu/src/data/utr30/HanRadicalFolding.txt (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/analysis/icu/src/data/utr30/HanRadicalFolding.txt Tue Jul 31 20:58:32 2012
@@ -24,6 +24,16 @@
 ### Created from UTR#30 HanRadicalFolding.txt
 ###
 
+#### WARNING ####
+#### Rule: lines direct content generation.
+#### All non-comments will be REMOVED when this file's contents
+#### are generated by 'ant gen-utr30-data-files'.
+#### Use "# Rule: verbatim" to keep non-comments up until
+#### the next "# Rule:" line.
+#### WARNING ####
+
+# Rule: verbatim
+
 # CJK Radicals
 2E81>5382
 2E82>4E5B

Modified: lucene/dev/branches/pforcodec_3892/lucene/analysis/icu/src/data/utr30/NativeDigitFolding.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/analysis/icu/src/data/utr30/NativeDigitFolding.txt?rev=1367777&r1=1367776&r2=1367777&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/analysis/icu/src/data/utr30/NativeDigitFolding.txt (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/analysis/icu/src/data/utr30/NativeDigitFolding.txt Tue Jul 31 20:58:32 2012
@@ -1,7 +1,7 @@
-# Copyright 2001-2010 Unicode, Inc.
-# 
+# Copyright 2001-2012 Unicode, Inc.
+#
 # Disclaimer
-# 
+#
 # This source code is provided as is by Unicode, Inc. No claims are
 # made as to fitness for any particular purpose. No warranties of any
 # kind are expressed or implied. The recipient agrees to determine
@@ -9,463 +9,485 @@
 # purchased on magnetic or optical media from Unicode, Inc., the
 # sole remedy for any claim will be exchange of defective media
 # within 90 days of receipt.
-# 
+#
 # Limitations on Rights to Redistribute This Code
-# 
+#
 # Unicode, Inc. hereby grants the right to freely use the information
 # supplied in this file in the creation of products supporting the
 # Unicode Standard, and to make copies of this file in any form
 # for internal or external distribution as long as this notice
 # remains attached.
 
-### Custom Normalization mappings for UTR#30 
+### Custom Normalization mappings for UTR#30
 ### (http://www.unicode.org/reports/tr30/tr30-4.html)
-###
-### Created from Unicode 5.2 UCD
-###
+
+#### WARNING ####
+#### Rule: lines direct content generation.
+#### All non-comments will be REMOVED when this file's contents
+#### are generated by 'ant gen-utr30-data-files'.
+#### Use "# Rule: verbatim" to keep non-comments up until
+#### the next "# Rule:" line.
+#### WARNING ####
 
 ## Native digit folding
-# [:Nd:] > Ascii digit equivalent
-# Arabic-Indic
-0660>0030
-0661>0031
-0662>0032
-0663>0033
-0664>0034
-0665>0035
-0666>0036
-0667>0037
-0668>0038
-0669>0039
-# Eastern Arabic-Indic
-06F0>0030
-06F1>0031
-06F2>0032
-06F3>0033
-06F4>0034
-06F5>0035
-06F6>0036
-06F7>0037
-06F8>0038
-06F9>0039
-# NKo
-07C0>0030
-07C1>0031
-07C2>0032
-07C3>0033
-07C4>0034
-07C5>0035
-07C6>0036
-07C7>0037
-07C8>0038
-07C9>0039
-# Devanagari
-0966>0030
-0967>0031
-0968>0032
-0969>0033
-096A>0034
-096B>0035
-096C>0036
-096D>0037
-096E>0038
-096F>0039
-# Bengali
-09E6>0030
-09E7>0031
-09E8>0032
-09E9>0033
-09EA>0034
-09EB>0035
-09EC>0036
-09ED>0037
-09EE>0038
-09EF>0039
-# Gurmukhi
-0A66>0030
-0A67>0031
-0A68>0032
-0A69>0033
-0A6A>0034
-0A6B>0035
-0A6C>0036
-0A6D>0037
-0A6E>0038
-0A6F>0039
-# Gujarati
-0AE6>0030
-0AE7>0031
-0AE8>0032
-0AE9>0033
-0AEA>0034
-0AEB>0035
-0AEC>0036
-0AED>0037
-0AEE>0038
-0AEF>0039
-# Oriya
-0B66>0030
-0B67>0031
-0B68>0032
-0B69>0033
-0B6A>0034
-0B6B>0035
-0B6C>0036
-0B6D>0037
-0B6E>0038
-0B6F>0039
-# Tamil
-0BE6>0030
-0BE7>0031
-0BE8>0032
-0BE9>0033
-0BEA>0034
-0BEB>0035
-0BEC>0036
-0BED>0037
-0BEE>0038
-0BEF>0039
-# Telugu
-0C66>0030
-0C67>0031
-0C68>0032
-0C69>0033
-0C6A>0034
-0C6B>0035
-0C6C>0036
-0C6D>0037
-0C6E>0038
-0C6F>0039
-# Kannada
-0CE6>0030
-0CE7>0031
-0CE8>0032
-0CE9>0033
-0CEA>0034
-0CEB>0035
-0CEC>0036
-0CED>0037
-0CEE>0038
-0CEF>0039
-# Malayalam
-0D66>0030
-0D67>0031
-0D68>0032
-0D69>0033
-0D6A>0034
-0D6B>0035
-0D6C>0036
-0D6D>0037
-0D6E>0038
-0D6F>0039
-# Thai
-0E50>0030
-0E51>0031
-0E52>0032
-0E53>0033
-0E54>0034
-0E55>0035
-0E56>0036
-0E57>0037
-0E58>0038
-0E59>0039
-# Lao
-0ED0>0030
-0ED1>0031
-0ED2>0032
-0ED3>0033
-0ED4>0034
-0ED5>0035
-0ED6>0036
-0ED7>0037
-0ED8>0038
-0ED9>0039
-# Tibetan
-0F20>0030
-0F21>0031
-0F22>0032
-0F23>0033
-0F24>0034
-0F25>0035
-0F26>0036
-0F27>0037
-0F28>0038
-0F29>0039
-# Myanmar
-1040>0030
-1041>0031
-1042>0032
-1043>0033
-1044>0034
-1045>0035
-1046>0036
-1047>0037
-1048>0038
-1049>0039
-# Myanmar Shan
-1090>0030
-1091>0031
-1092>0032
-1093>0033
-1094>0034
-1095>0035
-1096>0036
-1097>0037
-1098>0038
-1099>0039
-# Khmer
-17E0>0030
-17E1>0031
-17E2>0032
-17E3>0033
-17E4>0034
-17E5>0035
-17E6>0036
-17E7>0037
-17E8>0038
-17E9>0039
-# Mongolian
-1810>0030
-1811>0031
-1812>0032
-1813>0033
-1814>0034
-1815>0035
-1816>0036
-1817>0037
-1818>0038
-1819>0039
-# Limbu
-1946>0030
-1947>0031
-1948>0032
-1949>0033
-194A>0034
-194B>0035
-194C>0036
-194D>0037
-194E>0038
-194F>0039
-# New Tai Lue
-19D0>0030
-19D1>0031
-19D2>0032
-19D3>0033
-19D4>0034
-19D5>0035
-19D6>0036
-19D7>0037
-19D8>0038
-19D9>0039
-# New Tai Lue Tham Digit One
-19DA>0031
-# Tai Tham Hora
-1A80>0030
-1A81>0031
-1A82>0032
-1A83>0033
-1A84>0034
-1A85>0035
-1A86>0036
-1A87>0037
-1A88>0038
-1A89>0039
-# Tai Tham Tham
-1A90>0030
-1A91>0031
-1A92>0032
-1A93>0033
-1A94>0034
-1A95>0035
-1A96>0036
-1A97>0037
-1A98>0038
-1A99>0039
-# Balinese
-1B50>0030
-1B51>0031
-1B52>0032
-1B53>0033
-1B54>0034
-1B55>0035
-1B56>0036
-1B57>0037
-1B58>0038
-1B59>0039
-# Sundanese
-1BB0>0030
-1BB1>0031
-1BB2>0032
-1BB3>0033
-1BB4>0034
-1BB5>0035
-1BB6>0036
-1BB7>0037
-1BB8>0038
-1BB9>0039
-# Lepcha
-1C40>0030
-1C41>0031
-1C42>0032
-1C43>0033
-1C44>0034
-1C45>0035
-1C46>0036
-1C47>0037
-1C48>0038
-1C49>0039
-# Ol Chiki
-1C50>0030
-1C51>0031
-1C52>0032
-1C53>0033
-1C54>0034
-1C55>0035
-1C56>0036
-1C57>0037
-1C58>0038
-1C59>0039
-# Vai
-A620>0030
-A621>0031
-A622>0032
-A623>0033
-A624>0034
-A625>0035
-A626>0036
-A627>0037
-A628>0038
-A629>0039
-# Saurashtra
-A8D0>0030
-A8D1>0031
-A8D2>0032
-A8D3>0033
-A8D4>0034
-A8D5>0035
-A8D6>0036
-A8D7>0037
-A8D8>0038
-A8D9>0039
-# Kayah Li
-A900>0030
-A901>0031
-A902>0032
-A903>0033
-A904>0034
-A905>0035
-A906>0036
-A907>0037
-A908>0038
-A909>0039
-# Javanese
-A9D0>0030
-A9D1>0031
-A9D2>0032
-A9D3>0033
-A9D4>0034
-A9D5>0035
-A9D6>0036
-A9D7>0037
-A9D8>0038
-A9D9>0039
-# Cham
-AA50>0030
-AA51>0031
-AA52>0032
-AA53>0033
-AA54>0034
-AA55>0035
-AA56>0036
-AA57>0037
-AA58>0038
-AA59>0039
-# Meetei Mayek
-ABF0>0030
-ABF1>0031
-ABF2>0032
-ABF3>0033
-ABF4>0034
-ABF5>0035
-ABF6>0036
-ABF7>0037
-ABF8>0038
-ABF9>0039
-# Halfwidth and Fullwidth Forms (done by kd)
-# Osmanya
-104A0>0030
-104A1>0031
-104A2>0032
-104A3>0033
-104A4>0034
-104A5>0035
-104A6>0036
-104A7>0037
-104A8>0038
-104A9>0039
-# Brahmi
-11066>0030
-11067>0031
-11068>0032
-11069>0033
-1106A>0034
-1106B>0035
-1106C>0036
-1106D>0037
-1106E>0038
-1106F>0039
-# Mathematical Alphanumeric Symbols - Bold digits
-1D7CE>0030
-1D7CF>0031
-1D7D0>0032
-1D7D1>0033
-1D7D2>0034
-1D7D3>0035
-1D7D4>0036
-1D7D5>0037
-1D7D6>0038
-1D7D7>0039
-# Mathematical Alphanumeric Symbols - Double-struck digits
-1D7D8>0030
-1D7D9>0031
-1D7DA>0032
-1D7DB>0033
-1D7DC>0034
-1D7DD>0035
-1D7DE>0036
-1D7DF>0037
-1D7E0>0038
-1D7E1>0039
-# Mathematical Alphanumeric Symbols - Sans-serif digits
-1D7E2>0030
-1D7E3>0031
-1D7E4>0032
-1D7E5>0033
-1D7E6>0034
-1D7E7>0035
-1D7E8>0036
-1D7E9>0037
-1D7EA>0038
-1D7EB>0039
-# Mathematical Alphanumeric Symbols - Sans-serif bold digits
-1D7EC>0030
-1D7ED>0031
-1D7EE>0032
-1D7EF>0033
-1D7F0>0034
-1D7F1>0035
-1D7F2>0036
-1D7F3>0037
-1D7F4>0038
-1D7F5>0039
-# Mathematical Alphanumeric Symbols - Monospace digits
-1D7F6>0030
-1D7F7>0031
-1D7F8>0032
-1D7F9>0033
-1D7FA>0034
-1D7FB>0035
-1D7FC>0036
-1D7FD>0037
-1D7FE>0038
-1D7FF>0039
+# Rule: [[[:Numeric_Type=Digit:][:Nd:]] - [[:Changes_When_NFKC_Casefolded=Yes:][:Block=Superscripts_And_Subscripts:][\u00B2\u00B3\u00B9][\u0030-\u0039]]] > Numeric_Value
+0660>0030   # ARABIC-INDIC DIGIT ZERO
+0661>0031   # ARABIC-INDIC DIGIT ONE
+0662>0032   # ARABIC-INDIC DIGIT TWO
+0663>0033   # ARABIC-INDIC DIGIT THREE
+0664>0034   # ARABIC-INDIC DIGIT FOUR
+0665>0035   # ARABIC-INDIC DIGIT FIVE
+0666>0036   # ARABIC-INDIC DIGIT SIX
+0667>0037   # ARABIC-INDIC DIGIT SEVEN
+0668>0038   # ARABIC-INDIC DIGIT EIGHT
+0669>0039   # ARABIC-INDIC DIGIT NINE
+06F0>0030   # EXTENDED ARABIC-INDIC DIGIT ZERO
+06F1>0031   # EXTENDED ARABIC-INDIC DIGIT ONE
+06F2>0032   # EXTENDED ARABIC-INDIC DIGIT TWO
+06F3>0033   # EXTENDED ARABIC-INDIC DIGIT THREE
+06F4>0034   # EXTENDED ARABIC-INDIC DIGIT FOUR
+06F5>0035   # EXTENDED ARABIC-INDIC DIGIT FIVE
+06F6>0036   # EXTENDED ARABIC-INDIC DIGIT SIX
+06F7>0037   # EXTENDED ARABIC-INDIC DIGIT SEVEN
+06F8>0038   # EXTENDED ARABIC-INDIC DIGIT EIGHT
+06F9>0039   # EXTENDED ARABIC-INDIC DIGIT NINE
+07C0>0030   # NKO DIGIT ZERO
+07C1>0031   # NKO DIGIT ONE
+07C2>0032   # NKO DIGIT TWO
+07C3>0033   # NKO DIGIT THREE
+07C4>0034   # NKO DIGIT FOUR
+07C5>0035   # NKO DIGIT FIVE
+07C6>0036   # NKO DIGIT SIX
+07C7>0037   # NKO DIGIT SEVEN
+07C8>0038   # NKO DIGIT EIGHT
+07C9>0039   # NKO DIGIT NINE
+0966>0030   # DEVANAGARI DIGIT ZERO
+0967>0031   # DEVANAGARI DIGIT ONE
+0968>0032   # DEVANAGARI DIGIT TWO
+0969>0033   # DEVANAGARI DIGIT THREE
+096A>0034   # DEVANAGARI DIGIT FOUR
+096B>0035   # DEVANAGARI DIGIT FIVE
+096C>0036   # DEVANAGARI DIGIT SIX
+096D>0037   # DEVANAGARI DIGIT SEVEN
+096E>0038   # DEVANAGARI DIGIT EIGHT
+096F>0039   # DEVANAGARI DIGIT NINE
+09E6>0030   # BENGALI DIGIT ZERO
+09E7>0031   # BENGALI DIGIT ONE
+09E8>0032   # BENGALI DIGIT TWO
+09E9>0033   # BENGALI DIGIT THREE
+09EA>0034   # BENGALI DIGIT FOUR
+09EB>0035   # BENGALI DIGIT FIVE
+09EC>0036   # BENGALI DIGIT SIX
+09ED>0037   # BENGALI DIGIT SEVEN
+09EE>0038   # BENGALI DIGIT EIGHT
+09EF>0039   # BENGALI DIGIT NINE
+0A66>0030   # GURMUKHI DIGIT ZERO
+0A67>0031   # GURMUKHI DIGIT ONE
+0A68>0032   # GURMUKHI DIGIT TWO
+0A69>0033   # GURMUKHI DIGIT THREE
+0A6A>0034   # GURMUKHI DIGIT FOUR
+0A6B>0035   # GURMUKHI DIGIT FIVE
+0A6C>0036   # GURMUKHI DIGIT SIX
+0A6D>0037   # GURMUKHI DIGIT SEVEN
+0A6E>0038   # GURMUKHI DIGIT EIGHT
+0A6F>0039   # GURMUKHI DIGIT NINE
+0AE6>0030   # GUJARATI DIGIT ZERO
+0AE7>0031   # GUJARATI DIGIT ONE
+0AE8>0032   # GUJARATI DIGIT TWO
+0AE9>0033   # GUJARATI DIGIT THREE
+0AEA>0034   # GUJARATI DIGIT FOUR
+0AEB>0035   # GUJARATI DIGIT FIVE
+0AEC>0036   # GUJARATI DIGIT SIX
+0AED>0037   # GUJARATI DIGIT SEVEN
+0AEE>0038   # GUJARATI DIGIT EIGHT
+0AEF>0039   # GUJARATI DIGIT NINE
+0B66>0030   # ORIYA DIGIT ZERO
+0B67>0031   # ORIYA DIGIT ONE
+0B68>0032   # ORIYA DIGIT TWO
+0B69>0033   # ORIYA DIGIT THREE
+0B6A>0034   # ORIYA DIGIT FOUR
+0B6B>0035   # ORIYA DIGIT FIVE
+0B6C>0036   # ORIYA DIGIT SIX
+0B6D>0037   # ORIYA DIGIT SEVEN
+0B6E>0038   # ORIYA DIGIT EIGHT
+0B6F>0039   # ORIYA DIGIT NINE
+0BE6>0030   # TAMIL DIGIT ZERO
+0BE7>0031   # TAMIL DIGIT ONE
+0BE8>0032   # TAMIL DIGIT TWO
+0BE9>0033   # TAMIL DIGIT THREE
+0BEA>0034   # TAMIL DIGIT FOUR
+0BEB>0035   # TAMIL DIGIT FIVE
+0BEC>0036   # TAMIL DIGIT SIX
+0BED>0037   # TAMIL DIGIT SEVEN
+0BEE>0038   # TAMIL DIGIT EIGHT
+0BEF>0039   # TAMIL DIGIT NINE
+0C66>0030   # TELUGU DIGIT ZERO
+0C67>0031   # TELUGU DIGIT ONE
+0C68>0032   # TELUGU DIGIT TWO
+0C69>0033   # TELUGU DIGIT THREE
+0C6A>0034   # TELUGU DIGIT FOUR
+0C6B>0035   # TELUGU DIGIT FIVE
+0C6C>0036   # TELUGU DIGIT SIX
+0C6D>0037   # TELUGU DIGIT SEVEN
+0C6E>0038   # TELUGU DIGIT EIGHT
+0C6F>0039   # TELUGU DIGIT NINE
+0CE6>0030   # KANNADA DIGIT ZERO
+0CE7>0031   # KANNADA DIGIT ONE
+0CE8>0032   # KANNADA DIGIT TWO
+0CE9>0033   # KANNADA DIGIT THREE
+0CEA>0034   # KANNADA DIGIT FOUR
+0CEB>0035   # KANNADA DIGIT FIVE
+0CEC>0036   # KANNADA DIGIT SIX
+0CED>0037   # KANNADA DIGIT SEVEN
+0CEE>0038   # KANNADA DIGIT EIGHT
+0CEF>0039   # KANNADA DIGIT NINE
+0D66>0030   # MALAYALAM DIGIT ZERO
+0D67>0031   # MALAYALAM DIGIT ONE
+0D68>0032   # MALAYALAM DIGIT TWO
+0D69>0033   # MALAYALAM DIGIT THREE
+0D6A>0034   # MALAYALAM DIGIT FOUR
+0D6B>0035   # MALAYALAM DIGIT FIVE
+0D6C>0036   # MALAYALAM DIGIT SIX
+0D6D>0037   # MALAYALAM DIGIT SEVEN
+0D6E>0038   # MALAYALAM DIGIT EIGHT
+0D6F>0039   # MALAYALAM DIGIT NINE
+0E50>0030   # THAI DIGIT ZERO
+0E51>0031   # THAI DIGIT ONE
+0E52>0032   # THAI DIGIT TWO
+0E53>0033   # THAI DIGIT THREE
+0E54>0034   # THAI DIGIT FOUR
+0E55>0035   # THAI DIGIT FIVE
+0E56>0036   # THAI DIGIT SIX
+0E57>0037   # THAI DIGIT SEVEN
+0E58>0038   # THAI DIGIT EIGHT
+0E59>0039   # THAI DIGIT NINE
+0ED0>0030   # LAO DIGIT ZERO
+0ED1>0031   # LAO DIGIT ONE
+0ED2>0032   # LAO DIGIT TWO
+0ED3>0033   # LAO DIGIT THREE
+0ED4>0034   # LAO DIGIT FOUR
+0ED5>0035   # LAO DIGIT FIVE
+0ED6>0036   # LAO DIGIT SIX
+0ED7>0037   # LAO DIGIT SEVEN
+0ED8>0038   # LAO DIGIT EIGHT
+0ED9>0039   # LAO DIGIT NINE
+0F20>0030   # TIBETAN DIGIT ZERO
+0F21>0031   # TIBETAN DIGIT ONE
+0F22>0032   # TIBETAN DIGIT TWO
+0F23>0033   # TIBETAN DIGIT THREE
+0F24>0034   # TIBETAN DIGIT FOUR
+0F25>0035   # TIBETAN DIGIT FIVE
+0F26>0036   # TIBETAN DIGIT SIX
+0F27>0037   # TIBETAN DIGIT SEVEN
+0F28>0038   # TIBETAN DIGIT EIGHT
+0F29>0039   # TIBETAN DIGIT NINE
+1040>0030   # MYANMAR DIGIT ZERO
+1041>0031   # MYANMAR DIGIT ONE
+1042>0032   # MYANMAR DIGIT TWO
+1043>0033   # MYANMAR DIGIT THREE
+1044>0034   # MYANMAR DIGIT FOUR
+1045>0035   # MYANMAR DIGIT FIVE
+1046>0036   # MYANMAR DIGIT SIX
+1047>0037   # MYANMAR DIGIT SEVEN
+1048>0038   # MYANMAR DIGIT EIGHT
+1049>0039   # MYANMAR DIGIT NINE
+1090>0030   # MYANMAR SHAN DIGIT ZERO
+1091>0031   # MYANMAR SHAN DIGIT ONE
+1092>0032   # MYANMAR SHAN DIGIT TWO
+1093>0033   # MYANMAR SHAN DIGIT THREE
+1094>0034   # MYANMAR SHAN DIGIT FOUR
+1095>0035   # MYANMAR SHAN DIGIT FIVE
+1096>0036   # MYANMAR SHAN DIGIT SIX
+1097>0037   # MYANMAR SHAN DIGIT SEVEN
+1098>0038   # MYANMAR SHAN DIGIT EIGHT
+1099>0039   # MYANMAR SHAN DIGIT NINE
+1369>0031   # ETHIOPIC DIGIT ONE
+136A>0032   # ETHIOPIC DIGIT TWO
+136B>0033   # ETHIOPIC DIGIT THREE
+136C>0034   # ETHIOPIC DIGIT FOUR
+136D>0035   # ETHIOPIC DIGIT FIVE
+136E>0036   # ETHIOPIC DIGIT SIX
+136F>0037   # ETHIOPIC DIGIT SEVEN
+1370>0038   # ETHIOPIC DIGIT EIGHT
+1371>0039   # ETHIOPIC DIGIT NINE
+17E0>0030   # KHMER DIGIT ZERO
+17E1>0031   # KHMER DIGIT ONE
+17E2>0032   # KHMER DIGIT TWO
+17E3>0033   # KHMER DIGIT THREE
+17E4>0034   # KHMER DIGIT FOUR
+17E5>0035   # KHMER DIGIT FIVE
+17E6>0036   # KHMER DIGIT SIX
+17E7>0037   # KHMER DIGIT SEVEN
+17E8>0038   # KHMER DIGIT EIGHT
+17E9>0039   # KHMER DIGIT NINE
+1810>0030   # MONGOLIAN DIGIT ZERO
+1811>0031   # MONGOLIAN DIGIT ONE
+1812>0032   # MONGOLIAN DIGIT TWO
+1813>0033   # MONGOLIAN DIGIT THREE
+1814>0034   # MONGOLIAN DIGIT FOUR
+1815>0035   # MONGOLIAN DIGIT FIVE
+1816>0036   # MONGOLIAN DIGIT SIX
+1817>0037   # MONGOLIAN DIGIT SEVEN
+1818>0038   # MONGOLIAN DIGIT EIGHT
+1819>0039   # MONGOLIAN DIGIT NINE
+1946>0030   # LIMBU DIGIT ZERO
+1947>0031   # LIMBU DIGIT ONE
+1948>0032   # LIMBU DIGIT TWO
+1949>0033   # LIMBU DIGIT THREE
+194A>0034   # LIMBU DIGIT FOUR
+194B>0035   # LIMBU DIGIT FIVE
+194C>0036   # LIMBU DIGIT SIX
+194D>0037   # LIMBU DIGIT SEVEN
+194E>0038   # LIMBU DIGIT EIGHT
+194F>0039   # LIMBU DIGIT NINE
+19D0>0030   # NEW TAI LUE DIGIT ZERO
+19D1>0031   # NEW TAI LUE DIGIT ONE
+19D2>0032   # NEW TAI LUE DIGIT TWO
+19D3>0033   # NEW TAI LUE DIGIT THREE
+19D4>0034   # NEW TAI LUE DIGIT FOUR
+19D5>0035   # NEW TAI LUE DIGIT FIVE
+19D6>0036   # NEW TAI LUE DIGIT SIX
+19D7>0037   # NEW TAI LUE DIGIT SEVEN
+19D8>0038   # NEW TAI LUE DIGIT EIGHT
+19D9>0039   # NEW TAI LUE DIGIT NINE
+19DA>0031   # NEW TAI LUE THAM DIGIT ONE
+1A80>0030   # TAI THAM HORA DIGIT ZERO
+1A81>0031   # TAI THAM HORA DIGIT ONE
+1A82>0032   # TAI THAM HORA DIGIT TWO
+1A83>0033   # TAI THAM HORA DIGIT THREE
+1A84>0034   # TAI THAM HORA DIGIT FOUR
+1A85>0035   # TAI THAM HORA DIGIT FIVE
+1A86>0036   # TAI THAM HORA DIGIT SIX
+1A87>0037   # TAI THAM HORA DIGIT SEVEN
+1A88>0038   # TAI THAM HORA DIGIT EIGHT
+1A89>0039   # TAI THAM HORA DIGIT NINE
+1A90>0030   # TAI THAM THAM DIGIT ZERO
+1A91>0031   # TAI THAM THAM DIGIT ONE
+1A92>0032   # TAI THAM THAM DIGIT TWO
+1A93>0033   # TAI THAM THAM DIGIT THREE
+1A94>0034   # TAI THAM THAM DIGIT FOUR
+1A95>0035   # TAI THAM THAM DIGIT FIVE
+1A96>0036   # TAI THAM THAM DIGIT SIX
+1A97>0037   # TAI THAM THAM DIGIT SEVEN
+1A98>0038   # TAI THAM THAM DIGIT EIGHT
+1A99>0039   # TAI THAM THAM DIGIT NINE
+1B50>0030   # BALINESE DIGIT ZERO
+1B51>0031   # BALINESE DIGIT ONE
+1B52>0032   # BALINESE DIGIT TWO
+1B53>0033   # BALINESE DIGIT THREE
+1B54>0034   # BALINESE DIGIT FOUR
+1B55>0035   # BALINESE DIGIT FIVE
+1B56>0036   # BALINESE DIGIT SIX
+1B57>0037   # BALINESE DIGIT SEVEN
+1B58>0038   # BALINESE DIGIT EIGHT
+1B59>0039   # BALINESE DIGIT NINE
+1BB0>0030   # SUNDANESE DIGIT ZERO
+1BB1>0031   # SUNDANESE DIGIT ONE
+1BB2>0032   # SUNDANESE DIGIT TWO
+1BB3>0033   # SUNDANESE DIGIT THREE
+1BB4>0034   # SUNDANESE DIGIT FOUR
+1BB5>0035   # SUNDANESE DIGIT FIVE
+1BB6>0036   # SUNDANESE DIGIT SIX
+1BB7>0037   # SUNDANESE DIGIT SEVEN
+1BB8>0038   # SUNDANESE DIGIT EIGHT
+1BB9>0039   # SUNDANESE DIGIT NINE
+1C40>0030   # LEPCHA DIGIT ZERO
+1C41>0031   # LEPCHA DIGIT ONE
+1C42>0032   # LEPCHA DIGIT TWO
+1C43>0033   # LEPCHA DIGIT THREE
+1C44>0034   # LEPCHA DIGIT FOUR
+1C45>0035   # LEPCHA DIGIT FIVE
+1C46>0036   # LEPCHA DIGIT SIX
+1C47>0037   # LEPCHA DIGIT SEVEN
+1C48>0038   # LEPCHA DIGIT EIGHT
+1C49>0039   # LEPCHA DIGIT NINE
+1C50>0030   # OL CHIKI DIGIT ZERO
+1C51>0031   # OL CHIKI DIGIT ONE
+1C52>0032   # OL CHIKI DIGIT TWO
+1C53>0033   # OL CHIKI DIGIT THREE
+1C54>0034   # OL CHIKI DIGIT FOUR
+1C55>0035   # OL CHIKI DIGIT FIVE
+1C56>0036   # OL CHIKI DIGIT SIX
+1C57>0037   # OL CHIKI DIGIT SEVEN
+1C58>0038   # OL CHIKI DIGIT EIGHT
+1C59>0039   # OL CHIKI DIGIT NINE
+24F5>0031   # DOUBLE CIRCLED DIGIT ONE
+24F6>0032   # DOUBLE CIRCLED DIGIT TWO
+24F7>0033   # DOUBLE CIRCLED DIGIT THREE
+24F8>0034   # DOUBLE CIRCLED DIGIT FOUR
+24F9>0035   # DOUBLE CIRCLED DIGIT FIVE
+24FA>0036   # DOUBLE CIRCLED DIGIT SIX
+24FB>0037   # DOUBLE CIRCLED DIGIT SEVEN
+24FC>0038   # DOUBLE CIRCLED DIGIT EIGHT
+24FD>0039   # DOUBLE CIRCLED DIGIT NINE
+24FF>0030   # NEGATIVE CIRCLED DIGIT ZERO
+2776>0031   # DINGBAT NEGATIVE CIRCLED DIGIT ONE
+2777>0032   # DINGBAT NEGATIVE CIRCLED DIGIT TWO
+2778>0033   # DINGBAT NEGATIVE CIRCLED DIGIT THREE
+2779>0034   # DINGBAT NEGATIVE CIRCLED DIGIT FOUR
+277A>0035   # DINGBAT NEGATIVE CIRCLED DIGIT FIVE
+277B>0036   # DINGBAT NEGATIVE CIRCLED DIGIT SIX
+277C>0037   # DINGBAT NEGATIVE CIRCLED DIGIT SEVEN
+277D>0038   # DINGBAT NEGATIVE CIRCLED DIGIT EIGHT
+277E>0039   # DINGBAT NEGATIVE CIRCLED DIGIT NINE
+2780>0031   # DINGBAT CIRCLED SANS-SERIF DIGIT ONE
+2781>0032   # DINGBAT CIRCLED SANS-SERIF DIGIT TWO
+2782>0033   # DINGBAT CIRCLED SANS-SERIF DIGIT THREE
+2783>0034   # DINGBAT CIRCLED SANS-SERIF DIGIT FOUR
+2784>0035   # DINGBAT CIRCLED SANS-SERIF DIGIT FIVE
+2785>0036   # DINGBAT CIRCLED SANS-SERIF DIGIT SIX
+2786>0037   # DINGBAT CIRCLED SANS-SERIF DIGIT SEVEN
+2787>0038   # DINGBAT CIRCLED SANS-SERIF DIGIT EIGHT
+2788>0039   # DINGBAT CIRCLED SANS-SERIF DIGIT NINE
+278A>0031   # DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT ONE
+278B>0032   # DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT TWO
+278C>0033   # DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT THREE
+278D>0034   # DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT FOUR
+278E>0035   # DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT FIVE
+278F>0036   # DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT SIX
+2790>0037   # DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT SEVEN
+2791>0038   # DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT EIGHT
+2792>0039   # DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT NINE
+A620>0030   # VAI DIGIT ZERO
+A621>0031   # VAI DIGIT ONE
+A622>0032   # VAI DIGIT TWO
+A623>0033   # VAI DIGIT THREE
+A624>0034   # VAI DIGIT FOUR
+A625>0035   # VAI DIGIT FIVE
+A626>0036   # VAI DIGIT SIX
+A627>0037   # VAI DIGIT SEVEN
+A628>0038   # VAI DIGIT EIGHT
+A629>0039   # VAI DIGIT NINE
+A8D0>0030   # SAURASHTRA DIGIT ZERO
+A8D1>0031   # SAURASHTRA DIGIT ONE
+A8D2>0032   # SAURASHTRA DIGIT TWO
+A8D3>0033   # SAURASHTRA DIGIT THREE
+A8D4>0034   # SAURASHTRA DIGIT FOUR
+A8D5>0035   # SAURASHTRA DIGIT FIVE
+A8D6>0036   # SAURASHTRA DIGIT SIX
+A8D7>0037   # SAURASHTRA DIGIT SEVEN
+A8D8>0038   # SAURASHTRA DIGIT EIGHT
+A8D9>0039   # SAURASHTRA DIGIT NINE
+A900>0030   # KAYAH LI DIGIT ZERO
+A901>0031   # KAYAH LI DIGIT ONE
+A902>0032   # KAYAH LI DIGIT TWO
+A903>0033   # KAYAH LI DIGIT THREE
+A904>0034   # KAYAH LI DIGIT FOUR
+A905>0035   # KAYAH LI DIGIT FIVE
+A906>0036   # KAYAH LI DIGIT SIX
+A907>0037   # KAYAH LI DIGIT SEVEN
+A908>0038   # KAYAH LI DIGIT EIGHT
+A909>0039   # KAYAH LI DIGIT NINE
+A9D0>0030   # JAVANESE DIGIT ZERO
+A9D1>0031   # JAVANESE DIGIT ONE
+A9D2>0032   # JAVANESE DIGIT TWO
+A9D3>0033   # JAVANESE DIGIT THREE
+A9D4>0034   # JAVANESE DIGIT FOUR
+A9D5>0035   # JAVANESE DIGIT FIVE
+A9D6>0036   # JAVANESE DIGIT SIX
+A9D7>0037   # JAVANESE DIGIT SEVEN
+A9D8>0038   # JAVANESE DIGIT EIGHT
+A9D9>0039   # JAVANESE DIGIT NINE
+AA50>0030   # CHAM DIGIT ZERO
+AA51>0031   # CHAM DIGIT ONE
+AA52>0032   # CHAM DIGIT TWO
+AA53>0033   # CHAM DIGIT THREE
+AA54>0034   # CHAM DIGIT FOUR
+AA55>0035   # CHAM DIGIT FIVE
+AA56>0036   # CHAM DIGIT SIX
+AA57>0037   # CHAM DIGIT SEVEN
+AA58>0038   # CHAM DIGIT EIGHT
+AA59>0039   # CHAM DIGIT NINE
+ABF0>0030   # MEETEI MAYEK DIGIT ZERO
+ABF1>0031   # MEETEI MAYEK DIGIT ONE
+ABF2>0032   # MEETEI MAYEK DIGIT TWO
+ABF3>0033   # MEETEI MAYEK DIGIT THREE
+ABF4>0034   # MEETEI MAYEK DIGIT FOUR
+ABF5>0035   # MEETEI MAYEK DIGIT FIVE
+ABF6>0036   # MEETEI MAYEK DIGIT SIX
+ABF7>0037   # MEETEI MAYEK DIGIT SEVEN
+ABF8>0038   # MEETEI MAYEK DIGIT EIGHT
+ABF9>0039   # MEETEI MAYEK DIGIT NINE
+104A0>0030   # OSMANYA DIGIT ZERO
+104A1>0031   # OSMANYA DIGIT ONE
+104A2>0032   # OSMANYA DIGIT TWO
+104A3>0033   # OSMANYA DIGIT THREE
+104A4>0034   # OSMANYA DIGIT FOUR
+104A5>0035   # OSMANYA DIGIT FIVE
+104A6>0036   # OSMANYA DIGIT SIX
+104A7>0037   # OSMANYA DIGIT SEVEN
+104A8>0038   # OSMANYA DIGIT EIGHT
+104A9>0039   # OSMANYA DIGIT NINE
+10A40>0031   # KHAROSHTHI DIGIT ONE
+10A41>0032   # KHAROSHTHI DIGIT TWO
+10A42>0033   # KHAROSHTHI DIGIT THREE
+10A43>0034   # KHAROSHTHI DIGIT FOUR
+10E60>0031   # RUMI DIGIT ONE
+10E61>0032   # RUMI DIGIT TWO
+10E62>0033   # RUMI DIGIT THREE
+10E63>0034   # RUMI DIGIT FOUR
+10E64>0035   # RUMI DIGIT FIVE
+10E65>0036   # RUMI DIGIT SIX
+10E66>0037   # RUMI DIGIT SEVEN
+10E67>0038   # RUMI DIGIT EIGHT
+10E68>0039   # RUMI DIGIT NINE
+11052>0031   # BRAHMI NUMBER ONE
+11053>0032   # BRAHMI NUMBER TWO
+11054>0033   # BRAHMI NUMBER THREE
+11055>0034   # BRAHMI NUMBER FOUR
+11056>0035   # BRAHMI NUMBER FIVE
+11057>0036   # BRAHMI NUMBER SIX
+11058>0037   # BRAHMI NUMBER SEVEN
+11059>0038   # BRAHMI NUMBER EIGHT
+1105A>0039   # BRAHMI NUMBER NINE
+11066>0030   # BRAHMI DIGIT ZERO
+11067>0031   # BRAHMI DIGIT ONE
+11068>0032   # BRAHMI DIGIT TWO
+11069>0033   # BRAHMI DIGIT THREE
+1106A>0034   # BRAHMI DIGIT FOUR
+1106B>0035   # BRAHMI DIGIT FIVE
+1106C>0036   # BRAHMI DIGIT SIX
+1106D>0037   # BRAHMI DIGIT SEVEN
+1106E>0038   # BRAHMI DIGIT EIGHT
+1106F>0039   # BRAHMI DIGIT NINE
+110F0>0030   # SORA SOMPENG DIGIT ZERO
+110F1>0031   # SORA SOMPENG DIGIT ONE
+110F2>0032   # SORA SOMPENG DIGIT TWO
+110F3>0033   # SORA SOMPENG DIGIT THREE
+110F4>0034   # SORA SOMPENG DIGIT FOUR
+110F5>0035   # SORA SOMPENG DIGIT FIVE
+110F6>0036   # SORA SOMPENG DIGIT SIX
+110F7>0037   # SORA SOMPENG DIGIT SEVEN
+110F8>0038   # SORA SOMPENG DIGIT EIGHT
+110F9>0039   # SORA SOMPENG DIGIT NINE
+11136>0030   # CHAKMA DIGIT ZERO
+11137>0031   # CHAKMA DIGIT ONE
+11138>0032   # CHAKMA DIGIT TWO
+11139>0033   # CHAKMA DIGIT THREE
+1113A>0034   # CHAKMA DIGIT FOUR
+1113B>0035   # CHAKMA DIGIT FIVE
+1113C>0036   # CHAKMA DIGIT SIX
+1113D>0037   # CHAKMA DIGIT SEVEN
+1113E>0038   # CHAKMA DIGIT EIGHT
+1113F>0039   # CHAKMA DIGIT NINE
+111D0>0030   # SHARADA DIGIT ZERO
+111D1>0031   # SHARADA DIGIT ONE
+111D2>0032   # SHARADA DIGIT TWO
+111D3>0033   # SHARADA DIGIT THREE
+111D4>0034   # SHARADA DIGIT FOUR
+111D5>0035   # SHARADA DIGIT FIVE
+111D6>0036   # SHARADA DIGIT SIX
+111D7>0037   # SHARADA DIGIT SEVEN
+111D8>0038   # SHARADA DIGIT EIGHT
+111D9>0039   # SHARADA DIGIT NINE
+116C0>0030   # TAKRI DIGIT ZERO
+116C1>0031   # TAKRI DIGIT ONE
+116C2>0032   # TAKRI DIGIT TWO
+116C3>0033   # TAKRI DIGIT THREE
+116C4>0034   # TAKRI DIGIT FOUR
+116C5>0035   # TAKRI DIGIT FIVE
+116C6>0036   # TAKRI DIGIT SIX
+116C7>0037   # TAKRI DIGIT SEVEN
+116C8>0038   # TAKRI DIGIT EIGHT
+116C9>0039   # TAKRI DIGIT NINE
+