You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2012/07/31 22:59:01 UTC
svn commit: r1367777 [6/14] - in /lucene/dev/branches/pforcodec_3892: ./
dev-tools/ dev-tools/eclipse/ dev-tools/maven/ dev-tools/scripts/ lucene/
lucene/analysis/ lucene/analysis/common/
lucene/analysis/common/src/java/org/apache/lucene/analysis/ar/ l...
Modified: lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerImpl.jflex
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerImpl.jflex?rev=1367777&r1=1367776&r2=1367777&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerImpl.jflex (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerImpl.jflex Tue Jul 31 20:58:32 2012
@@ -39,7 +39,7 @@ import org.apache.lucene.analysis.tokena
*/
%%
-%unicode 6.0
+%unicode 6.1
%integer
%final
%public
Modified: lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/AbstractAnalysisFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/AbstractAnalysisFactory.java?rev=1367777&r1=1367776&r2=1367777&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/AbstractAnalysisFactory.java (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/AbstractAnalysisFactory.java Tue Jul 31 20:58:32 2012
@@ -59,7 +59,7 @@ public abstract class AbstractAnalysisFa
* to inform user, that for this factory a {@link #luceneMatchVersion} is required */
protected final void assureMatchVersion() {
if (luceneMatchVersion == null) {
- throw new InitializationException("Configuration Error: Factory '" + this.getClass().getName() +
+ throw new IllegalArgumentException("Configuration Error: Factory '" + this.getClass().getName() +
"' needs a 'luceneMatchVersion' parameter");
}
}
@@ -86,7 +86,7 @@ public abstract class AbstractAnalysisFa
if (useDefault) {
return defaultVal;
}
- throw new InitializationException("Configuration Error: missing parameter '" + name + "'");
+ throw new IllegalArgumentException("Configuration Error: missing parameter '" + name + "'");
}
return Integer.parseInt(s);
}
@@ -99,7 +99,7 @@ public abstract class AbstractAnalysisFa
String s = args.get(name);
if (s==null) {
if (useDefault) return defaultVal;
- throw new InitializationException("Configuration Error: missing parameter '" + name + "'");
+ throw new IllegalArgumentException("Configuration Error: missing parameter '" + name + "'");
}
return Boolean.parseBoolean(s);
}
@@ -108,11 +108,11 @@ public abstract class AbstractAnalysisFa
try {
String pat = args.get(name);
if (null == pat) {
- throw new InitializationException("Configuration Error: missing parameter '" + name + "'");
+ throw new IllegalArgumentException("Configuration Error: missing parameter '" + name + "'");
}
return Pattern.compile(args.get(name));
} catch (PatternSyntaxException e) {
- throw new InitializationException
+ throw new IllegalArgumentException
("Configuration Error: '" + name + "' can not be parsed in " +
this.getClass().getSimpleName(), e);
}
@@ -129,13 +129,17 @@ public abstract class AbstractAnalysisFa
words = new CharArraySet(luceneMatchVersion,
files.size() * 10, ignoreCase);
for (String file : files) {
- List<String> wlist = loader.getLines(file.trim());
+ List<String> wlist = getLines(loader, file.trim());
words.addAll(StopFilter.makeStopSet(luceneMatchVersion, wlist,
ignoreCase));
}
}
return words;
}
+
+ protected List<String> getLines(ResourceLoader loader, String resource) throws IOException {
+ return WordlistLoader.getLines(loader.openResource(resource), IOUtils.CHARSET_UTF_8);
+ }
/** same as {@link #getWordSet(ResourceLoader, String, boolean)},
* except the input is in snowball format. */
Modified: lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharFilterFactory.java?rev=1367777&r1=1367776&r2=1367777&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharFilterFactory.java (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharFilterFactory.java Tue Jul 31 20:58:32 2012
@@ -18,6 +18,7 @@ package org.apache.lucene.analysis.util;
*/
import java.io.Reader;
+import java.util.Set;
import org.apache.lucene.analysis.CharFilter;
@@ -27,5 +28,39 @@ import org.apache.lucene.analysis.CharFi
*/
public abstract class CharFilterFactory extends AbstractAnalysisFactory {
- public abstract CharFilter create(Reader input);
+ private static final AnalysisSPILoader<CharFilterFactory> loader =
+ new AnalysisSPILoader<CharFilterFactory>(CharFilterFactory.class);
+
+ /** looks up a charfilter by name from context classpath */
+ public static CharFilterFactory forName(String name) {
+ return loader.newInstance(name);
+ }
+
+ /** looks up a charfilter class by name from context classpath */
+ public static Class<? extends CharFilterFactory> lookupClass(String name) {
+ return loader.lookupClass(name);
+ }
+
+ /** returns a list of all available charfilter names */
+ public static Set<String> availableCharFilters() {
+ return loader.availableServices();
+ }
+
+ /**
+ * Reloads the factory list from the given {@link ClassLoader}.
+ * Changes to the factories are visible after the method ends, all
+ * iterators ({@link #availableCharFilters()},...) stay consistent.
+ *
+ * <p><b>NOTE:</b> Only new factories are added, existing ones are
+ * never removed or replaced.
+ *
+ * <p><em>This method is expensive and should only be called for discovery
+ * of new factories on the given classpath/classloader!</em>
+ */
+ public static void reloadCharFilters(ClassLoader classloader) {
+ loader.reload(classloader);
+ }
+
+ /** Wraps the given Reader with a CharFilter. */
+ public abstract Reader create(Reader input);
}
Modified: lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/ResourceLoader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/ResourceLoader.java?rev=1367777&r1=1367776&r2=1367777&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/ResourceLoader.java (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/ResourceLoader.java Tue Jul 31 20:58:32 2012
@@ -19,29 +19,20 @@ package org.apache.lucene.analysis.util;
import java.io.IOException;
import java.io.InputStream;
-import java.util.List;
/**
* Abstraction for loading resources (streams, files, and classes).
*/
public interface ResourceLoader {
+ /**
+ * Opens a named resource
+ */
public InputStream openResource(String resource) throws IOException;
/**
- * Accesses a resource by name and returns the (non comment) lines
- * containing data.
- *
- * <p>
- * A comment line is any line that starts with the character "#"
- * </p>
- *
- * @param resource
- * @return a list of non-blank non-comment lines with whitespace trimmed
- * from front and back.
- * @throws IOException
+ * Creates a class of the name and expected type
*/
- public List<String> getLines(String resource) throws IOException;
-
- public <T> T newInstance(String cname, Class<T> expectedType, String ... subpackages);
+ // TODO: fix exception handling
+ public <T> T newInstance(String cname, Class<T> expectedType);
}
\ No newline at end of file
Modified: lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/ResourceLoaderAware.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/ResourceLoaderAware.java?rev=1367777&r1=1367776&r2=1367777&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/ResourceLoaderAware.java (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/ResourceLoaderAware.java Tue Jul 31 20:58:32 2012
@@ -17,6 +17,8 @@
package org.apache.lucene.analysis.util;
+import java.io.IOException;
+
/**
* Interface for a component that needs to be initialized by
* an implementation of {@link ResourceLoader}.
@@ -25,5 +27,5 @@ package org.apache.lucene.analysis.util;
*/
public interface ResourceLoaderAware {
- void inform(ResourceLoader loader);
+ void inform(ResourceLoader loader) throws IOException;
}
Modified: lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/TokenFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/TokenFilterFactory.java?rev=1367777&r1=1367776&r2=1367777&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/TokenFilterFactory.java (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/TokenFilterFactory.java Tue Jul 31 20:58:32 2012
@@ -17,6 +17,8 @@ package org.apache.lucene.analysis.util;
* limitations under the License.
*/
+import java.util.Set;
+
import org.apache.lucene.analysis.TokenStream;
/**
@@ -25,6 +27,40 @@ import org.apache.lucene.analysis.TokenS
*/
public abstract class TokenFilterFactory extends AbstractAnalysisFactory {
+ private static final AnalysisSPILoader<TokenFilterFactory> loader =
+ new AnalysisSPILoader<TokenFilterFactory>(TokenFilterFactory.class,
+ new String[] { "TokenFilterFactory", "FilterFactory" });
+
+ /** looks up a tokenfilter by name from context classpath */
+ public static TokenFilterFactory forName(String name) {
+ return loader.newInstance(name);
+ }
+
+ /** looks up a tokenfilter class by name from context classpath */
+ public static Class<? extends TokenFilterFactory> lookupClass(String name) {
+ return loader.lookupClass(name);
+ }
+
+ /** returns a list of all available tokenfilter names from context classpath */
+ public static Set<String> availableTokenFilters() {
+ return loader.availableServices();
+ }
+
+ /**
+ * Reloads the factory list from the given {@link ClassLoader}.
+ * Changes to the factories are visible after the method ends, all
+ * iterators ({@link #availableTokenFilters()},...) stay consistent.
+ *
+ * <p><b>NOTE:</b> Only new factories are added, existing ones are
+ * never removed or replaced.
+ *
+ * <p><em>This method is expensive and should only be called for discovery
+ * of new factories on the given classpath/classloader!</em>
+ */
+ public static void reloadTokenFilters(ClassLoader classloader) {
+ loader.reload(classloader);
+ }
+
/** Transform the specified input TokenStream */
public abstract TokenStream create(TokenStream input);
}
Modified: lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/TokenizerFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/TokenizerFactory.java?rev=1367777&r1=1367776&r2=1367777&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/TokenizerFactory.java (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/TokenizerFactory.java Tue Jul 31 20:58:32 2012
@@ -20,6 +20,7 @@ package org.apache.lucene.analysis.util;
import org.apache.lucene.analysis.Tokenizer;
import java.io.Reader;
+import java.util.Set;
/**
* Abstract parent class for analysis factories that create {@link Tokenizer}
@@ -27,6 +28,39 @@ import java.io.Reader;
*/
public abstract class TokenizerFactory extends AbstractAnalysisFactory {
+ private static final AnalysisSPILoader<TokenizerFactory> loader =
+ new AnalysisSPILoader<TokenizerFactory>(TokenizerFactory.class);
+
+ /** looks up a tokenizer by name from context classpath */
+ public static TokenizerFactory forName(String name) {
+ return loader.newInstance(name);
+ }
+
+ /** looks up a tokenizer class by name from context classpath */
+ public static Class<? extends TokenizerFactory> lookupClass(String name) {
+ return loader.lookupClass(name);
+ }
+
+ /** returns a list of all available tokenizer names from context classpath */
+ public static Set<String> availableTokenizers() {
+ return loader.availableServices();
+ }
+
+ /**
+ * Reloads the factory list from the given {@link ClassLoader}.
+ * Changes to the factories are visible after the method ends, all
+ * iterators ({@link #availableTokenizers()},...) stay consistent.
+ *
+ * <p><b>NOTE:</b> Only new factories are added, existing ones are
+ * never removed or replaced.
+ *
+ * <p><em>This method is expensive and should only be called for discovery
+ * of new factories on the given classpath/classloader!</em>
+ */
+ public static void reloadTokenizers(ClassLoader classloader) {
+ loader.reload(classloader);
+ }
+
/** Creates a TokenStream of the specified input */
public abstract Tokenizer create(Reader input);
}
Modified: lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/WordlistLoader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/WordlistLoader.java?rev=1367777&r1=1367776&r2=1367777&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/WordlistLoader.java (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/WordlistLoader.java Tue Jul 31 20:58:32 2012
@@ -19,7 +19,11 @@ package org.apache.lucene.analysis.util;
import java.io.BufferedReader;
import java.io.IOException;
+import java.io.InputStream;
import java.io.Reader;
+import java.nio.charset.Charset;
+import java.util.ArrayList;
+import java.util.List;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.Version;
@@ -194,6 +198,47 @@ public class WordlistLoader {
return result;
}
+ /**
+ * Accesses a resource by name and returns the (non comment) lines containing
+ * data using the given character encoding.
+ *
+ * <p>
+ * A comment line is any line that starts with the character "#"
+ * </p>
+ *
+ * @return a list of non-blank non-comment lines with whitespace trimmed
+ * @throws IOException
+ */
+ public static List<String> getLines(InputStream stream, Charset charset) throws IOException{
+ BufferedReader input = null;
+ ArrayList<String> lines;
+ boolean success = false;
+ try {
+ input = getBufferedReader(IOUtils.getDecodingReader(stream, charset));
+
+ lines = new ArrayList<String>();
+ for (String word=null; (word=input.readLine())!=null;) {
+ // skip initial bom marker
+ if (lines.isEmpty() && word.length() > 0 && word.charAt(0) == '\uFEFF')
+ word = word.substring(1);
+ // skip comments
+ if (word.startsWith("#")) continue;
+ word=word.trim();
+ // skip blank lines
+ if (word.length()==0) continue;
+ lines.add(word);
+ }
+ success = true;
+ return lines;
+ } finally {
+ if (success) {
+ IOUtils.close(input);
+ } else {
+ IOUtils.closeWhileHandlingException(input);
+ }
+ }
+ }
+
private static BufferedReader getBufferedReader(Reader reader) {
return (reader instanceof BufferedReader) ? (BufferedReader) reader
: new BufferedReader(reader);
Modified: lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.java?rev=1367777&r1=1367776&r2=1367777&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.java (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.java Tue Jul 31 20:58:32 2012
@@ -1,4 +1,4 @@
-/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 08.07.12 17:00 */
+/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 7/15/12 1:57 AM */
package org.apache.lucene.analysis.wikipedia;
@@ -25,8 +25,8 @@ import org.apache.lucene.analysis.tokena
/**
* This class is a scanner generated by
* <a href="http://www.jflex.de/">JFlex</a> 1.5.0-SNAPSHOT
- * on 08.07.12 17:00 from the specification file
- * <tt>C:/Users/Uwe Schindler/Projects/lucene/lucene4199/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.jflex</tt>
+ * on 7/15/12 1:57 AM from the specification file
+ * <tt>C:/cygwin/home/s/svn/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.jflex</tt>
*/
class WikipediaTokenizerImpl {
Modified: lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestClassicAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestClassicAnalyzer.java?rev=1367777&r1=1367776&r2=1367777&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestClassicAnalyzer.java (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestClassicAnalyzer.java Tue Jul 31 20:58:32 2012
@@ -285,8 +285,7 @@ public class TestClassicAnalyzer extends
DocsAndPositionsEnum tps = MultiFields.getTermPositionsEnum(reader,
MultiFields.getLiveDocs(reader),
"content",
- new BytesRef("another"),
- false);
+ new BytesRef("another"));
assertTrue(tps.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
assertEquals(1, tps.freq());
assertEquals(3, tps.nextPosition());
Modified: lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestKeywordAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestKeywordAnalyzer.java?rev=1367777&r1=1367776&r2=1367777&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestKeywordAnalyzer.java (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestKeywordAnalyzer.java Tue Jul 31 20:58:32 2012
@@ -103,7 +103,7 @@ public class TestKeywordAnalyzer extends
new BytesRef("Q36"),
MultiFields.getLiveDocs(reader),
null,
- false);
+ 0);
assertTrue(td.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
td = _TestUtil.docs(random(),
reader,
@@ -111,7 +111,7 @@ public class TestKeywordAnalyzer extends
new BytesRef("Q37"),
MultiFields.getLiveDocs(reader),
null,
- false);
+ 0);
assertTrue(td.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
}
Modified: lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java?rev=1367777&r1=1367776&r2=1367777&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java Tue Jul 31 20:58:32 2012
@@ -235,7 +235,7 @@ public class TestRandomChains extends Ba
private static <T> Constructor<T> castConstructor(Class<T> instanceClazz, Constructor<?> ctor) {
return (Constructor<T>) ctor;
}
- private static void getClassesForPackage(String pckgname, List<Class<?>> classes) throws Exception {
+ static void getClassesForPackage(String pckgname, List<Class<?>> classes) throws Exception {
final ClassLoader cld = TestRandomChains.class.getClassLoader();
final String path = pckgname.replace('.', '/');
final Enumeration<URL> resources = cld.getResources(path);
Modified: lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStandardAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStandardAnalyzer.java?rev=1367777&r1=1367776&r2=1367777&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStandardAnalyzer.java (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStandardAnalyzer.java Tue Jul 31 20:58:32 2012
@@ -202,7 +202,7 @@ public class TestStandardAnalyzer extend
}
public void testUnicodeWordBreaks() throws Exception {
- WordBreakTestUnicode_6_0_0 wordBreakTest = new WordBreakTestUnicode_6_0_0();
+ WordBreakTestUnicode_6_1_0 wordBreakTest = new WordBreakTestUnicode_6_1_0();
wordBreakTest.test(a);
}
Modified: lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestUAX29URLEmailTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestUAX29URLEmailTokenizer.java?rev=1367777&r1=1367776&r2=1367777&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestUAX29URLEmailTokenizer.java (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestUAX29URLEmailTokenizer.java Tue Jul 31 20:58:32 2012
@@ -424,7 +424,7 @@ public class TestUAX29URLEmailTokenizer
}
public void testUnicodeWordBreaks() throws Exception {
- WordBreakTestUnicode_6_0_0 wordBreakTest = new WordBreakTestUnicode_6_0_0();
+ WordBreakTestUnicode_6_1_0 wordBreakTest = new WordBreakTestUnicode_6_1_0();
wordBreakTest.test(a);
}
Modified: lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/HunspellStemFilterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/HunspellStemFilterTest.java?rev=1367777&r1=1367776&r2=1367777&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/HunspellStemFilterTest.java (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/HunspellStemFilterTest.java Tue Jul 31 20:58:32 2012
@@ -30,6 +30,7 @@ import org.apache.lucene.analysis.Tokeni
import org.apache.lucene.analysis.core.KeywordTokenizer;
import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
import org.apache.lucene.analysis.util.CharArraySet;
+import org.junit.AfterClass;
import org.junit.BeforeClass;
public class HunspellStemFilterTest extends BaseTokenStreamTestCase {
@@ -39,6 +40,10 @@ public class HunspellStemFilterTest ext
public static void beforeClass() throws IOException, ParseException {
DICTIONARY = createDict(true);
}
+ @AfterClass
+ public static void afterClass() {
+ DICTIONARY = null;
+ }
public static HunspellDictionary createDict(boolean ignoreCase) throws IOException, ParseException {
InputStream affixStream = HunspellStemmerTest.class.getResourceAsStream("test.aff");
InputStream dictStream = HunspellStemmerTest.class.getResourceAsStream("test.dic");
Modified: lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/HunspellStemmerTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/HunspellStemmerTest.java?rev=1367777&r1=1367776&r2=1367777&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/HunspellStemmerTest.java (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/HunspellStemmerTest.java Tue Jul 31 20:58:32 2012
@@ -19,6 +19,7 @@ package org.apache.lucene.analysis.hunsp
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.Version;
+import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.junit.Test;
@@ -37,6 +38,11 @@ public class HunspellStemmerTest extends
public static void beforeClass() throws IOException, ParseException {
createStemmer(true);
}
+
+ @AfterClass
+ public static void afterClass() {
+ stemmer = null;
+ }
@Test
public void testStem_simpleSuffix() {
Modified: lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/test/org/apache/lucene/analysis/sinks/TestTeeSinkTokenFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/test/org/apache/lucene/analysis/sinks/TestTeeSinkTokenFilter.java?rev=1367777&r1=1367776&r2=1367777&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/test/org/apache/lucene/analysis/sinks/TestTeeSinkTokenFilter.java (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/analysis/common/src/test/org/apache/lucene/analysis/sinks/TestTeeSinkTokenFilter.java Tue Jul 31 20:58:32 2012
@@ -111,7 +111,7 @@ public class TestTeeSinkTokenFilter exte
TermsEnum termsEnum = vector.iterator(null);
termsEnum.next();
assertEquals(2, termsEnum.totalTermFreq());
- DocsAndPositionsEnum positions = termsEnum.docsAndPositions(null, null, true);
+ DocsAndPositionsEnum positions = termsEnum.docsAndPositions(null, null);
assertTrue(positions.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
assertEquals(2, positions.freq());
positions.nextPosition();
Modified: lucene/dev/branches/pforcodec_3892/lucene/analysis/icu/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/analysis/icu/build.xml?rev=1367777&r1=1367776&r2=1367777&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/analysis/icu/build.xml (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/analysis/icu/build.xml Tue Jul 31 20:58:32 2012
@@ -26,7 +26,7 @@
<import file="../analysis-module-build.xml"/>
<path id="icujar">
- <pathelement location="lib/icu4j-4.8.1.1.jar"/>
+ <pathelement location="lib/icu4j-49.1.jar"/>
</path>
<path id="classpath">
@@ -37,19 +37,32 @@
<target name="compile-core" depends="jar-analyzers-common, common.compile-core" />
- <property name="gennorm2.src.dir" value="src/data/utr30"/>
- <property name="gennorm2.src.files"
- value="nfkc.txt nfkc_cf.txt BasicFoldings.txt DiacriticFolding.txt DingbatFolding.txt HanRadicalFolding.txt NativeDigitFolding.txt"/>
+ <property name="utr30.data.dir" location="src/data/utr30"/>
+ <target name="gen-utr30-data-files" depends="compile-tools">
+ <java
+ classname="org.apache.lucene.analysis.icu.GenerateUTR30DataFiles"
+ dir="${utr30.data.dir}"
+ fork="true"
+ failonerror="true">
+ <classpath>
+ <path refid="icujar"/>
+ <pathelement location="${build.dir}/classes/tools"/>
+ </classpath>
+ </java>
+ </target>
+
+ <property name="gennorm2.src.files"
+ value="nfc.txt nfkc.txt nfkc_cf.txt BasicFoldings.txt DiacriticFolding.txt DingbatFolding.txt HanRadicalFolding.txt NativeDigitFolding.txt"/>
<property name="gennorm2.tmp" value="${build.dir}/gennorm2/utr30.tmp"/>
<property name="gennorm2.dst" value="src/resources/org/apache/lucene/analysis/icu/utr30.nrm"/>
- <target name="gennorm2">
+ <target name="gennorm2" depends="gen-utr30-data-files">
<echo>Note that the gennorm2 and icupkg tools must be on your PATH. These tools
are part of the ICU4C package. See http://site.icu-project.org/ </echo>
<mkdir dir="${build.dir}/gennorm2"/>
<exec executable="gennorm2" failonerror="true">
<arg value="-v"/>
<arg value="-s"/>
- <arg value="${gennorm2.src.dir}"/>
+ <arg value="${utr30.data.dir}"/>
<arg line="${gennorm2.src.files}"/>
<arg value="-o"/>
<arg value="${gennorm2.tmp}"/>
Modified: lucene/dev/branches/pforcodec_3892/lucene/analysis/icu/ivy.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/analysis/icu/ivy.xml?rev=1367777&r1=1367776&r2=1367777&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/analysis/icu/ivy.xml (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/analysis/icu/ivy.xml Tue Jul 31 20:58:32 2012
@@ -19,7 +19,7 @@
<ivy-module version="2.0">
<info organisation="org.apache.lucene" module="analyzers-icu"/>
<dependencies>
- <dependency org="com.ibm.icu" name="icu4j" rev="4.8.1.1" transitive="false"/>
+ <dependency org="com.ibm.icu" name="icu4j" rev="49.1" transitive="false"/>
<exclude org="*" ext="*" matcher="regexp" type="${ivy.exclude.types}"/>
</dependencies>
</ivy-module>
Modified: lucene/dev/branches/pforcodec_3892/lucene/analysis/icu/src/data/utr30/BasicFoldings.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/analysis/icu/src/data/utr30/BasicFoldings.txt?rev=1367777&r1=1367776&r2=1367777&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/analysis/icu/src/data/utr30/BasicFoldings.txt (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/analysis/icu/src/data/utr30/BasicFoldings.txt Tue Jul 31 20:58:32 2012
@@ -20,67 +20,96 @@
### Custom Normalization mappings for UTR#30
### (http://www.unicode.org/reports/tr30/tr30-4.html)
-###
-### Created from Unicode 5.2 UCD
-###
+
+#### WARNING ####
+#### Rule: lines direct content generation.
+#### All non-comments will be REMOVED when this file's contents
+#### are generated by 'ant gen-utr30-data-files'.
+#### Use "# Rule: verbatim" to keep non-comments up until
+#### the next "# Rule:" line.
+#### WARNING ####
## Accent removal
# See DiacriticFolding.txt
+
## Case Folding (done by cf)
+
## Canonical Duplicates Folding (done by cd)
+
## Dashes folding
-# [[:Dash:][:Pd:]]-2053(swung dash) > U+002D
+# Rule: [[[[:Dash:][:Pd:]]-[\u2053\uFE31\uFE32]] - [\u002D]] > 002D
058A>002D
05BE>002D
1400>002D
1806>002D
2010..2015>002D
+207B>002D
+208B>002D
+2212>002D
2E17>002D
2E1A>002D
+2E3A..2E3B>002D
301C>002D
3030>002D
30A0>002D
-#2053>002D
-2212>002D
-# FE31,FE32,FE58,FE63,FF0D done by kd
+FE58>002D
+FE63>002D
+FF0D>002D
## Greek letterforms folding (done by kd)
+
## Hebrew alternates folding (done by kd)
+
## Jamo folding (done by kd)
+
## Math symbol folding (done by kd)
+
## Native digit folding
# See NativeDigitFolding.txt
+
## Nobreak folding (done by kd)
-## Overline Folding
-FE49..FE4C>203E
+
+## Overline Folding (done by kd)
+
## Positional forms folding (done by kd)
+
## Small forms folding (done by kd)
+
## Space Folding
-# [:Zs:] > U+0020
+# Rule: [[:Zs:] - [:Changes_When_NFKC_Casefolded=Yes:] - [\u0020]] > 0020
1680>0020
180E>0020
-# 00A0, 2000..200A,202F,205F,3000 done by kd
+
## Spacing Accents folding (done by kd)
+
## Subscript folding (done by kd)
+
## Symbol folding (done by kd)
+
## Underline Folding
+# Rule: verbatim
2017>005E
FE4D..FE4F>005E
+
## Diacritic Folding
-#
+# See DiacriticFolding.txt
## Vertical forms folding (done by kd)
+
## Han Radical Folding
# See HanRadicalFolding.txt
+
## Letter Form Folding (done by kd)
## Superscript folding
# Additions to kd:
+# Rule: verbatim
02C0>0294
02C1>0295
06E5>0648
06E6>064A
## Suzhou Numeral Folding
# Additions to kd:
+# Rule: verbatim
3021>4E00
3022>4E8C
3023>4E09
@@ -92,6 +121,7 @@ FE4D..FE4F>005E
3029>4E5D
## Width Folding (done by kd)
# Punctuation Folding
+# Rule: verbatim
00AB>0022
00BB>0022
201C..201E>0022
Modified: lucene/dev/branches/pforcodec_3892/lucene/analysis/icu/src/data/utr30/DiacriticFolding.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/analysis/icu/src/data/utr30/DiacriticFolding.txt?rev=1367777&r1=1367776&r2=1367777&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/analysis/icu/src/data/utr30/DiacriticFolding.txt (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/analysis/icu/src/data/utr30/DiacriticFolding.txt Tue Jul 31 20:58:32 2012
@@ -24,41 +24,45 @@
### Created from Unicode 5.2 UCD
###
-# Removes diacritics, as defined by [:Diacritic:]
-# These may or may not be combining marks
+#### WARNING ####
+#### Rule: lines direct content generation.
+#### All non-comments will be REMOVED when this file's contents
+#### are generated by 'ant gen-utr30-data-files'.
+#### Use "# Rule: verbatim" to keep non-comments up until
+#### the next "# Rule:" line.
+#### WARNING ####
+
+## Remove diacritics
+# Rule: [:Diacritic:] >
005E>
0060>
-00B7>
-02B9..02D7>
-02DE>
-02DF>
-02E5..033F>
-0342>
-0346..034E>
+00A8>
+00AF>
+00B4>
+00B7..00B8>
+02B0..034E>
0350..0357>
035D..0362>
-0375>
+0374..0375>
+037A>
+0384..0385>
0483..0487>
0559>
0591..05A1>
05A3..05BD>
05BF>
-05C1>
-05C2>
+05C1..05C2>
05C4>
064B..0652>
-0657>
-0658>
-06DF>
-06E0>
-06E5>
-06E6>
+0657..0658>
+06DF..06E0>
+06E5..06E6>
06EA..06EC>
0730..074A>
07A6..07B0>
07EB..07F5>
-0818>
-0819>
+0818..0819>
+08E4..08FE>
093C>
094D>
0951..0954>
@@ -80,24 +84,19 @@
0E47..0E4C>
0E4E>
0EC8..0ECC>
-0F18>
-0F19>
+0F18..0F19>
0F35>
0F37>
0F39>
-0F3E>
-0F3F>
+0F3E..0F3F>
0F82..0F84>
-0F86>
-0F87>
+0F86..0F87>
0FC6>
1037>
-1039>
-103A>
+1039..103A>
1087..108D>
108F>
-109A>
-109B>
+109A..109B>
17C9..17D3>
17DD>
1939..193B>
@@ -106,31 +105,33 @@
1B34>
1B44>
1B6B..1B73>
-1BAA>
-1C36>
-1C37>
+1BAA..1BAB>
+1C36..1C37>
1C78..1C7D>
1CD0..1CE8>
1CED>
-1D2F>
-1D3B>
-1D4E>
+1CF4>
+1D2C..1D6A>
1DC4..1DCF>
1DFD..1DFF>
+1FBD>
+1FBF..1FC1>
+1FCD..1FCF>
+1FDD..1FDF>
+1FED..1FEF>
+1FFD..1FFE>
2CEF..2CF1>
2E2F>
302A..302F>
-3099>
-309A>
+3099..309C>
30FC>
A66F>
-A67C>
-A67D>
+A67C..A67D>
A67F>
-A6F0>
-A6F1>
+A6F0..A6F1>
A717..A721>
A788>
+A7F8..A7F9>
A8C4>
A8E0..A8F1>
A92B..A92E>
@@ -139,12 +140,20 @@ A9B3>
A9C0>
AA7B>
AABF..AAC2>
-ABEC>
-ABED>
+AAF6>
+ABEC..ABED>
FB1E>
FE20..FE26>
-110B9>
-110BA>
+FF3E>
+FF40>
+FF70>
+FF9E..FF9F>
+FFE3>
+110B9..110BA>
+11133..11134>
+111C0>
+116B6..116B7>
+16F8F..16F9F>
1D167..1D169>
1D16D..1D172>
1D17B..1D182>
@@ -153,6 +162,7 @@ FE20..FE26>
# Latin script "composed" that do not further decompose, so decompose here
# These are from AsciiFoldingFilter
+# Rule: verbatim
00E6>0061 0065
00F0>0064
00F8>006F
@@ -491,6 +501,7 @@ A7FF>004D
# Cyrillic script "composed" that do not further decompose, so decompose here
# These are from UTR#30 DiacriticFolding.txt
+# Rule: verbatim
047D>0461
048B>0439
@@ -520,6 +531,7 @@ A7FF>004D
04CE>043C
# Additional signs and diacritic, from examination of [:Mark:]&[:Lm:]
+# Rule: verbatim
0358..035C>
05A2>
05C5>
@@ -555,6 +567,7 @@ A802>
1D242..1D244>
# Additional Arabic/Hebrew decompositions
+# Rule: verbatim
05F3>0027
05F4>0022
0629>0647
Modified: lucene/dev/branches/pforcodec_3892/lucene/analysis/icu/src/data/utr30/DingbatFolding.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/analysis/icu/src/data/utr30/DingbatFolding.txt?rev=1367777&r1=1367776&r2=1367777&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/analysis/icu/src/data/utr30/DingbatFolding.txt (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/analysis/icu/src/data/utr30/DingbatFolding.txt Tue Jul 31 20:58:32 2012
@@ -24,8 +24,17 @@
### Created from Unicode 5.2 UCD
###
+#### WARNING ####
+#### Rule: lines direct content generation.
+#### All non-comments will be REMOVED when this file's contents
+#### are generated by 'ant gen-utr30-data-files'.
+#### Use "# Rule: verbatim" to keep non-comments up until
+#### the next "# Rule:" line.
+#### WARNING ####
+
# Folds dingbats and other adorned forms
# Generated from ASCIIFoldingFilter
+# Rule: verbatim
24EB>0031 0031
24EC>0031 0032
24ED>0031 0033
Modified: lucene/dev/branches/pforcodec_3892/lucene/analysis/icu/src/data/utr30/HanRadicalFolding.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/analysis/icu/src/data/utr30/HanRadicalFolding.txt?rev=1367777&r1=1367776&r2=1367777&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/analysis/icu/src/data/utr30/HanRadicalFolding.txt (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/analysis/icu/src/data/utr30/HanRadicalFolding.txt Tue Jul 31 20:58:32 2012
@@ -24,6 +24,16 @@
### Created from UTR#30 HanRadicalFolding.txt
###
+#### WARNING ####
+#### Rule: lines direct content generation.
+#### All non-comments will be REMOVED when this file's contents
+#### are generated by 'ant gen-utr30-data-files'.
+#### Use "# Rule: verbatim" to keep non-comments up until
+#### the next "# Rule:" line.
+#### WARNING ####
+
+# Rule: verbatim
+
# CJK Radicals
2E81>5382
2E82>4E5B
Modified: lucene/dev/branches/pforcodec_3892/lucene/analysis/icu/src/data/utr30/NativeDigitFolding.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/analysis/icu/src/data/utr30/NativeDigitFolding.txt?rev=1367777&r1=1367776&r2=1367777&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/analysis/icu/src/data/utr30/NativeDigitFolding.txt (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/analysis/icu/src/data/utr30/NativeDigitFolding.txt Tue Jul 31 20:58:32 2012
@@ -1,7 +1,7 @@
-# Copyright 2001-2010 Unicode, Inc.
-#
+# Copyright 2001-2012 Unicode, Inc.
+#
# Disclaimer
-#
+#
# This source code is provided as is by Unicode, Inc. No claims are
# made as to fitness for any particular purpose. No warranties of any
# kind are expressed or implied. The recipient agrees to determine
@@ -9,463 +9,485 @@
# purchased on magnetic or optical media from Unicode, Inc., the
# sole remedy for any claim will be exchange of defective media
# within 90 days of receipt.
-#
+#
# Limitations on Rights to Redistribute This Code
-#
+#
# Unicode, Inc. hereby grants the right to freely use the information
# supplied in this file in the creation of products supporting the
# Unicode Standard, and to make copies of this file in any form
# for internal or external distribution as long as this notice
# remains attached.
-### Custom Normalization mappings for UTR#30
+### Custom Normalization mappings for UTR#30
### (http://www.unicode.org/reports/tr30/tr30-4.html)
-###
-### Created from Unicode 5.2 UCD
-###
+
+#### WARNING ####
+#### Rule: lines direct content generation.
+#### All non-comments will be REMOVED when this file's contents
+#### are generated by 'ant gen-utr30-data-files'.
+#### Use "# Rule: verbatim" to keep non-comments up until
+#### the next "# Rule:" line.
+#### WARNING ####
## Native digit folding
-# [:Nd:] > Ascii digit equivalent
-# Arabic-Indic
-0660>0030
-0661>0031
-0662>0032
-0663>0033
-0664>0034
-0665>0035
-0666>0036
-0667>0037
-0668>0038
-0669>0039
-# Eastern Arabic-Indic
-06F0>0030
-06F1>0031
-06F2>0032
-06F3>0033
-06F4>0034
-06F5>0035
-06F6>0036
-06F7>0037
-06F8>0038
-06F9>0039
-# NKo
-07C0>0030
-07C1>0031
-07C2>0032
-07C3>0033
-07C4>0034
-07C5>0035
-07C6>0036
-07C7>0037
-07C8>0038
-07C9>0039
-# Devanagari
-0966>0030
-0967>0031
-0968>0032
-0969>0033
-096A>0034
-096B>0035
-096C>0036
-096D>0037
-096E>0038
-096F>0039
-# Bengali
-09E6>0030
-09E7>0031
-09E8>0032
-09E9>0033
-09EA>0034
-09EB>0035
-09EC>0036
-09ED>0037
-09EE>0038
-09EF>0039
-# Gurmukhi
-0A66>0030
-0A67>0031
-0A68>0032
-0A69>0033
-0A6A>0034
-0A6B>0035
-0A6C>0036
-0A6D>0037
-0A6E>0038
-0A6F>0039
-# Gujarati
-0AE6>0030
-0AE7>0031
-0AE8>0032
-0AE9>0033
-0AEA>0034
-0AEB>0035
-0AEC>0036
-0AED>0037
-0AEE>0038
-0AEF>0039
-# Oriya
-0B66>0030
-0B67>0031
-0B68>0032
-0B69>0033
-0B6A>0034
-0B6B>0035
-0B6C>0036
-0B6D>0037
-0B6E>0038
-0B6F>0039
-# Tamil
-0BE6>0030
-0BE7>0031
-0BE8>0032
-0BE9>0033
-0BEA>0034
-0BEB>0035
-0BEC>0036
-0BED>0037
-0BEE>0038
-0BEF>0039
-# Telugu
-0C66>0030
-0C67>0031
-0C68>0032
-0C69>0033
-0C6A>0034
-0C6B>0035
-0C6C>0036
-0C6D>0037
-0C6E>0038
-0C6F>0039
-# Kannada
-0CE6>0030
-0CE7>0031
-0CE8>0032
-0CE9>0033
-0CEA>0034
-0CEB>0035
-0CEC>0036
-0CED>0037
-0CEE>0038
-0CEF>0039
-# Malayalam
-0D66>0030
-0D67>0031
-0D68>0032
-0D69>0033
-0D6A>0034
-0D6B>0035
-0D6C>0036
-0D6D>0037
-0D6E>0038
-0D6F>0039
-# Thai
-0E50>0030
-0E51>0031
-0E52>0032
-0E53>0033
-0E54>0034
-0E55>0035
-0E56>0036
-0E57>0037
-0E58>0038
-0E59>0039
-# Lao
-0ED0>0030
-0ED1>0031
-0ED2>0032
-0ED3>0033
-0ED4>0034
-0ED5>0035
-0ED6>0036
-0ED7>0037
-0ED8>0038
-0ED9>0039
-# Tibetan
-0F20>0030
-0F21>0031
-0F22>0032
-0F23>0033
-0F24>0034
-0F25>0035
-0F26>0036
-0F27>0037
-0F28>0038
-0F29>0039
-# Myanmar
-1040>0030
-1041>0031
-1042>0032
-1043>0033
-1044>0034
-1045>0035
-1046>0036
-1047>0037
-1048>0038
-1049>0039
-# Myanmar Shan
-1090>0030
-1091>0031
-1092>0032
-1093>0033
-1094>0034
-1095>0035
-1096>0036
-1097>0037
-1098>0038
-1099>0039
-# Khmer
-17E0>0030
-17E1>0031
-17E2>0032
-17E3>0033
-17E4>0034
-17E5>0035
-17E6>0036
-17E7>0037
-17E8>0038
-17E9>0039
-# Mongolian
-1810>0030
-1811>0031
-1812>0032
-1813>0033
-1814>0034
-1815>0035
-1816>0036
-1817>0037
-1818>0038
-1819>0039
-# Limbu
-1946>0030
-1947>0031
-1948>0032
-1949>0033
-194A>0034
-194B>0035
-194C>0036
-194D>0037
-194E>0038
-194F>0039
-# New Tai Lue
-19D0>0030
-19D1>0031
-19D2>0032
-19D3>0033
-19D4>0034
-19D5>0035
-19D6>0036
-19D7>0037
-19D8>0038
-19D9>0039
-# New Tai Lue Tham Digit One
-19DA>0031
-# Tai Tham Hora
-1A80>0030
-1A81>0031
-1A82>0032
-1A83>0033
-1A84>0034
-1A85>0035
-1A86>0036
-1A87>0037
-1A88>0038
-1A89>0039
-# Tai Tham Tham
-1A90>0030
-1A91>0031
-1A92>0032
-1A93>0033
-1A94>0034
-1A95>0035
-1A96>0036
-1A97>0037
-1A98>0038
-1A99>0039
-# Balinese
-1B50>0030
-1B51>0031
-1B52>0032
-1B53>0033
-1B54>0034
-1B55>0035
-1B56>0036
-1B57>0037
-1B58>0038
-1B59>0039
-# Sundanese
-1BB0>0030
-1BB1>0031
-1BB2>0032
-1BB3>0033
-1BB4>0034
-1BB5>0035
-1BB6>0036
-1BB7>0037
-1BB8>0038
-1BB9>0039
-# Lepcha
-1C40>0030
-1C41>0031
-1C42>0032
-1C43>0033
-1C44>0034
-1C45>0035
-1C46>0036
-1C47>0037
-1C48>0038
-1C49>0039
-# Ol Chiki
-1C50>0030
-1C51>0031
-1C52>0032
-1C53>0033
-1C54>0034
-1C55>0035
-1C56>0036
-1C57>0037
-1C58>0038
-1C59>0039
-# Vai
-A620>0030
-A621>0031
-A622>0032
-A623>0033
-A624>0034
-A625>0035
-A626>0036
-A627>0037
-A628>0038
-A629>0039
-# Saurashtra
-A8D0>0030
-A8D1>0031
-A8D2>0032
-A8D3>0033
-A8D4>0034
-A8D5>0035
-A8D6>0036
-A8D7>0037
-A8D8>0038
-A8D9>0039
-# Kayah Li
-A900>0030
-A901>0031
-A902>0032
-A903>0033
-A904>0034
-A905>0035
-A906>0036
-A907>0037
-A908>0038
-A909>0039
-# Javanese
-A9D0>0030
-A9D1>0031
-A9D2>0032
-A9D3>0033
-A9D4>0034
-A9D5>0035
-A9D6>0036
-A9D7>0037
-A9D8>0038
-A9D9>0039
-# Cham
-AA50>0030
-AA51>0031
-AA52>0032
-AA53>0033
-AA54>0034
-AA55>0035
-AA56>0036
-AA57>0037
-AA58>0038
-AA59>0039
-# Meetei Mayek
-ABF0>0030
-ABF1>0031
-ABF2>0032
-ABF3>0033
-ABF4>0034
-ABF5>0035
-ABF6>0036
-ABF7>0037
-ABF8>0038
-ABF9>0039
-# Halfwidth and Fullwidth Forms (done by kd)
-# Osmanya
-104A0>0030
-104A1>0031
-104A2>0032
-104A3>0033
-104A4>0034
-104A5>0035
-104A6>0036
-104A7>0037
-104A8>0038
-104A9>0039
-# Brahmi
-11066>0030
-11067>0031
-11068>0032
-11069>0033
-1106A>0034
-1106B>0035
-1106C>0036
-1106D>0037
-1106E>0038
-1106F>0039
-# Mathematical Alphanumeric Symbols - Bold digits
-1D7CE>0030
-1D7CF>0031
-1D7D0>0032
-1D7D1>0033
-1D7D2>0034
-1D7D3>0035
-1D7D4>0036
-1D7D5>0037
-1D7D6>0038
-1D7D7>0039
-# Mathematical Alphanumeric Symbols - Double-struck digits
-1D7D8>0030
-1D7D9>0031
-1D7DA>0032
-1D7DB>0033
-1D7DC>0034
-1D7DD>0035
-1D7DE>0036
-1D7DF>0037
-1D7E0>0038
-1D7E1>0039
-# Mathematical Alphanumeric Symbols - Sans-serif digits
-1D7E2>0030
-1D7E3>0031
-1D7E4>0032
-1D7E5>0033
-1D7E6>0034
-1D7E7>0035
-1D7E8>0036
-1D7E9>0037
-1D7EA>0038
-1D7EB>0039
-# Mathematical Alphanumeric Symbols - Sans-serif bold digits
-1D7EC>0030
-1D7ED>0031
-1D7EE>0032
-1D7EF>0033
-1D7F0>0034
-1D7F1>0035
-1D7F2>0036
-1D7F3>0037
-1D7F4>0038
-1D7F5>0039
-# Mathematical Alphanumeric Symbols - Monospace digits
-1D7F6>0030
-1D7F7>0031
-1D7F8>0032
-1D7F9>0033
-1D7FA>0034
-1D7FB>0035
-1D7FC>0036
-1D7FD>0037
-1D7FE>0038
-1D7FF>0039
+# Rule: [[[:Numeric_Type=Digit:][:Nd:]] - [[:Changes_When_NFKC_Casefolded=Yes:][:Block=Superscripts_And_Subscripts:][\u00B2\u00B3\u00B9][\u0030-\u0039]]] > Numeric_Value
+0660>0030 # ARABIC-INDIC DIGIT ZERO
+0661>0031 # ARABIC-INDIC DIGIT ONE
+0662>0032 # ARABIC-INDIC DIGIT TWO
+0663>0033 # ARABIC-INDIC DIGIT THREE
+0664>0034 # ARABIC-INDIC DIGIT FOUR
+0665>0035 # ARABIC-INDIC DIGIT FIVE
+0666>0036 # ARABIC-INDIC DIGIT SIX
+0667>0037 # ARABIC-INDIC DIGIT SEVEN
+0668>0038 # ARABIC-INDIC DIGIT EIGHT
+0669>0039 # ARABIC-INDIC DIGIT NINE
+06F0>0030 # EXTENDED ARABIC-INDIC DIGIT ZERO
+06F1>0031 # EXTENDED ARABIC-INDIC DIGIT ONE
+06F2>0032 # EXTENDED ARABIC-INDIC DIGIT TWO
+06F3>0033 # EXTENDED ARABIC-INDIC DIGIT THREE
+06F4>0034 # EXTENDED ARABIC-INDIC DIGIT FOUR
+06F5>0035 # EXTENDED ARABIC-INDIC DIGIT FIVE
+06F6>0036 # EXTENDED ARABIC-INDIC DIGIT SIX
+06F7>0037 # EXTENDED ARABIC-INDIC DIGIT SEVEN
+06F8>0038 # EXTENDED ARABIC-INDIC DIGIT EIGHT
+06F9>0039 # EXTENDED ARABIC-INDIC DIGIT NINE
+07C0>0030 # NKO DIGIT ZERO
+07C1>0031 # NKO DIGIT ONE
+07C2>0032 # NKO DIGIT TWO
+07C3>0033 # NKO DIGIT THREE
+07C4>0034 # NKO DIGIT FOUR
+07C5>0035 # NKO DIGIT FIVE
+07C6>0036 # NKO DIGIT SIX
+07C7>0037 # NKO DIGIT SEVEN
+07C8>0038 # NKO DIGIT EIGHT
+07C9>0039 # NKO DIGIT NINE
+0966>0030 # DEVANAGARI DIGIT ZERO
+0967>0031 # DEVANAGARI DIGIT ONE
+0968>0032 # DEVANAGARI DIGIT TWO
+0969>0033 # DEVANAGARI DIGIT THREE
+096A>0034 # DEVANAGARI DIGIT FOUR
+096B>0035 # DEVANAGARI DIGIT FIVE
+096C>0036 # DEVANAGARI DIGIT SIX
+096D>0037 # DEVANAGARI DIGIT SEVEN
+096E>0038 # DEVANAGARI DIGIT EIGHT
+096F>0039 # DEVANAGARI DIGIT NINE
+09E6>0030 # BENGALI DIGIT ZERO
+09E7>0031 # BENGALI DIGIT ONE
+09E8>0032 # BENGALI DIGIT TWO
+09E9>0033 # BENGALI DIGIT THREE
+09EA>0034 # BENGALI DIGIT FOUR
+09EB>0035 # BENGALI DIGIT FIVE
+09EC>0036 # BENGALI DIGIT SIX
+09ED>0037 # BENGALI DIGIT SEVEN
+09EE>0038 # BENGALI DIGIT EIGHT
+09EF>0039 # BENGALI DIGIT NINE
+0A66>0030 # GURMUKHI DIGIT ZERO
+0A67>0031 # GURMUKHI DIGIT ONE
+0A68>0032 # GURMUKHI DIGIT TWO
+0A69>0033 # GURMUKHI DIGIT THREE
+0A6A>0034 # GURMUKHI DIGIT FOUR
+0A6B>0035 # GURMUKHI DIGIT FIVE
+0A6C>0036 # GURMUKHI DIGIT SIX
+0A6D>0037 # GURMUKHI DIGIT SEVEN
+0A6E>0038 # GURMUKHI DIGIT EIGHT
+0A6F>0039 # GURMUKHI DIGIT NINE
+0AE6>0030 # GUJARATI DIGIT ZERO
+0AE7>0031 # GUJARATI DIGIT ONE
+0AE8>0032 # GUJARATI DIGIT TWO
+0AE9>0033 # GUJARATI DIGIT THREE
+0AEA>0034 # GUJARATI DIGIT FOUR
+0AEB>0035 # GUJARATI DIGIT FIVE
+0AEC>0036 # GUJARATI DIGIT SIX
+0AED>0037 # GUJARATI DIGIT SEVEN
+0AEE>0038 # GUJARATI DIGIT EIGHT
+0AEF>0039 # GUJARATI DIGIT NINE
+0B66>0030 # ORIYA DIGIT ZERO
+0B67>0031 # ORIYA DIGIT ONE
+0B68>0032 # ORIYA DIGIT TWO
+0B69>0033 # ORIYA DIGIT THREE
+0B6A>0034 # ORIYA DIGIT FOUR
+0B6B>0035 # ORIYA DIGIT FIVE
+0B6C>0036 # ORIYA DIGIT SIX
+0B6D>0037 # ORIYA DIGIT SEVEN
+0B6E>0038 # ORIYA DIGIT EIGHT
+0B6F>0039 # ORIYA DIGIT NINE
+0BE6>0030 # TAMIL DIGIT ZERO
+0BE7>0031 # TAMIL DIGIT ONE
+0BE8>0032 # TAMIL DIGIT TWO
+0BE9>0033 # TAMIL DIGIT THREE
+0BEA>0034 # TAMIL DIGIT FOUR
+0BEB>0035 # TAMIL DIGIT FIVE
+0BEC>0036 # TAMIL DIGIT SIX
+0BED>0037 # TAMIL DIGIT SEVEN
+0BEE>0038 # TAMIL DIGIT EIGHT
+0BEF>0039 # TAMIL DIGIT NINE
+0C66>0030 # TELUGU DIGIT ZERO
+0C67>0031 # TELUGU DIGIT ONE
+0C68>0032 # TELUGU DIGIT TWO
+0C69>0033 # TELUGU DIGIT THREE
+0C6A>0034 # TELUGU DIGIT FOUR
+0C6B>0035 # TELUGU DIGIT FIVE
+0C6C>0036 # TELUGU DIGIT SIX
+0C6D>0037 # TELUGU DIGIT SEVEN
+0C6E>0038 # TELUGU DIGIT EIGHT
+0C6F>0039 # TELUGU DIGIT NINE
+0CE6>0030 # KANNADA DIGIT ZERO
+0CE7>0031 # KANNADA DIGIT ONE
+0CE8>0032 # KANNADA DIGIT TWO
+0CE9>0033 # KANNADA DIGIT THREE
+0CEA>0034 # KANNADA DIGIT FOUR
+0CEB>0035 # KANNADA DIGIT FIVE
+0CEC>0036 # KANNADA DIGIT SIX
+0CED>0037 # KANNADA DIGIT SEVEN
+0CEE>0038 # KANNADA DIGIT EIGHT
+0CEF>0039 # KANNADA DIGIT NINE
+0D66>0030 # MALAYALAM DIGIT ZERO
+0D67>0031 # MALAYALAM DIGIT ONE
+0D68>0032 # MALAYALAM DIGIT TWO
+0D69>0033 # MALAYALAM DIGIT THREE
+0D6A>0034 # MALAYALAM DIGIT FOUR
+0D6B>0035 # MALAYALAM DIGIT FIVE
+0D6C>0036 # MALAYALAM DIGIT SIX
+0D6D>0037 # MALAYALAM DIGIT SEVEN
+0D6E>0038 # MALAYALAM DIGIT EIGHT
+0D6F>0039 # MALAYALAM DIGIT NINE
+0E50>0030 # THAI DIGIT ZERO
+0E51>0031 # THAI DIGIT ONE
+0E52>0032 # THAI DIGIT TWO
+0E53>0033 # THAI DIGIT THREE
+0E54>0034 # THAI DIGIT FOUR
+0E55>0035 # THAI DIGIT FIVE
+0E56>0036 # THAI DIGIT SIX
+0E57>0037 # THAI DIGIT SEVEN
+0E58>0038 # THAI DIGIT EIGHT
+0E59>0039 # THAI DIGIT NINE
+0ED0>0030 # LAO DIGIT ZERO
+0ED1>0031 # LAO DIGIT ONE
+0ED2>0032 # LAO DIGIT TWO
+0ED3>0033 # LAO DIGIT THREE
+0ED4>0034 # LAO DIGIT FOUR
+0ED5>0035 # LAO DIGIT FIVE
+0ED6>0036 # LAO DIGIT SIX
+0ED7>0037 # LAO DIGIT SEVEN
+0ED8>0038 # LAO DIGIT EIGHT
+0ED9>0039 # LAO DIGIT NINE
+0F20>0030 # TIBETAN DIGIT ZERO
+0F21>0031 # TIBETAN DIGIT ONE
+0F22>0032 # TIBETAN DIGIT TWO
+0F23>0033 # TIBETAN DIGIT THREE
+0F24>0034 # TIBETAN DIGIT FOUR
+0F25>0035 # TIBETAN DIGIT FIVE
+0F26>0036 # TIBETAN DIGIT SIX
+0F27>0037 # TIBETAN DIGIT SEVEN
+0F28>0038 # TIBETAN DIGIT EIGHT
+0F29>0039 # TIBETAN DIGIT NINE
+1040>0030 # MYANMAR DIGIT ZERO
+1041>0031 # MYANMAR DIGIT ONE
+1042>0032 # MYANMAR DIGIT TWO
+1043>0033 # MYANMAR DIGIT THREE
+1044>0034 # MYANMAR DIGIT FOUR
+1045>0035 # MYANMAR DIGIT FIVE
+1046>0036 # MYANMAR DIGIT SIX
+1047>0037 # MYANMAR DIGIT SEVEN
+1048>0038 # MYANMAR DIGIT EIGHT
+1049>0039 # MYANMAR DIGIT NINE
+1090>0030 # MYANMAR SHAN DIGIT ZERO
+1091>0031 # MYANMAR SHAN DIGIT ONE
+1092>0032 # MYANMAR SHAN DIGIT TWO
+1093>0033 # MYANMAR SHAN DIGIT THREE
+1094>0034 # MYANMAR SHAN DIGIT FOUR
+1095>0035 # MYANMAR SHAN DIGIT FIVE
+1096>0036 # MYANMAR SHAN DIGIT SIX
+1097>0037 # MYANMAR SHAN DIGIT SEVEN
+1098>0038 # MYANMAR SHAN DIGIT EIGHT
+1099>0039 # MYANMAR SHAN DIGIT NINE
+1369>0031 # ETHIOPIC DIGIT ONE
+136A>0032 # ETHIOPIC DIGIT TWO
+136B>0033 # ETHIOPIC DIGIT THREE
+136C>0034 # ETHIOPIC DIGIT FOUR
+136D>0035 # ETHIOPIC DIGIT FIVE
+136E>0036 # ETHIOPIC DIGIT SIX
+136F>0037 # ETHIOPIC DIGIT SEVEN
+1370>0038 # ETHIOPIC DIGIT EIGHT
+1371>0039 # ETHIOPIC DIGIT NINE
+17E0>0030 # KHMER DIGIT ZERO
+17E1>0031 # KHMER DIGIT ONE
+17E2>0032 # KHMER DIGIT TWO
+17E3>0033 # KHMER DIGIT THREE
+17E4>0034 # KHMER DIGIT FOUR
+17E5>0035 # KHMER DIGIT FIVE
+17E6>0036 # KHMER DIGIT SIX
+17E7>0037 # KHMER DIGIT SEVEN
+17E8>0038 # KHMER DIGIT EIGHT
+17E9>0039 # KHMER DIGIT NINE
+1810>0030 # MONGOLIAN DIGIT ZERO
+1811>0031 # MONGOLIAN DIGIT ONE
+1812>0032 # MONGOLIAN DIGIT TWO
+1813>0033 # MONGOLIAN DIGIT THREE
+1814>0034 # MONGOLIAN DIGIT FOUR
+1815>0035 # MONGOLIAN DIGIT FIVE
+1816>0036 # MONGOLIAN DIGIT SIX
+1817>0037 # MONGOLIAN DIGIT SEVEN
+1818>0038 # MONGOLIAN DIGIT EIGHT
+1819>0039 # MONGOLIAN DIGIT NINE
+1946>0030 # LIMBU DIGIT ZERO
+1947>0031 # LIMBU DIGIT ONE
+1948>0032 # LIMBU DIGIT TWO
+1949>0033 # LIMBU DIGIT THREE
+194A>0034 # LIMBU DIGIT FOUR
+194B>0035 # LIMBU DIGIT FIVE
+194C>0036 # LIMBU DIGIT SIX
+194D>0037 # LIMBU DIGIT SEVEN
+194E>0038 # LIMBU DIGIT EIGHT
+194F>0039 # LIMBU DIGIT NINE
+19D0>0030 # NEW TAI LUE DIGIT ZERO
+19D1>0031 # NEW TAI LUE DIGIT ONE
+19D2>0032 # NEW TAI LUE DIGIT TWO
+19D3>0033 # NEW TAI LUE DIGIT THREE
+19D4>0034 # NEW TAI LUE DIGIT FOUR
+19D5>0035 # NEW TAI LUE DIGIT FIVE
+19D6>0036 # NEW TAI LUE DIGIT SIX
+19D7>0037 # NEW TAI LUE DIGIT SEVEN
+19D8>0038 # NEW TAI LUE DIGIT EIGHT
+19D9>0039 # NEW TAI LUE DIGIT NINE
+19DA>0031 # NEW TAI LUE THAM DIGIT ONE
+1A80>0030 # TAI THAM HORA DIGIT ZERO
+1A81>0031 # TAI THAM HORA DIGIT ONE
+1A82>0032 # TAI THAM HORA DIGIT TWO
+1A83>0033 # TAI THAM HORA DIGIT THREE
+1A84>0034 # TAI THAM HORA DIGIT FOUR
+1A85>0035 # TAI THAM HORA DIGIT FIVE
+1A86>0036 # TAI THAM HORA DIGIT SIX
+1A87>0037 # TAI THAM HORA DIGIT SEVEN
+1A88>0038 # TAI THAM HORA DIGIT EIGHT
+1A89>0039 # TAI THAM HORA DIGIT NINE
+1A90>0030 # TAI THAM THAM DIGIT ZERO
+1A91>0031 # TAI THAM THAM DIGIT ONE
+1A92>0032 # TAI THAM THAM DIGIT TWO
+1A93>0033 # TAI THAM THAM DIGIT THREE
+1A94>0034 # TAI THAM THAM DIGIT FOUR
+1A95>0035 # TAI THAM THAM DIGIT FIVE
+1A96>0036 # TAI THAM THAM DIGIT SIX
+1A97>0037 # TAI THAM THAM DIGIT SEVEN
+1A98>0038 # TAI THAM THAM DIGIT EIGHT
+1A99>0039 # TAI THAM THAM DIGIT NINE
+1B50>0030 # BALINESE DIGIT ZERO
+1B51>0031 # BALINESE DIGIT ONE
+1B52>0032 # BALINESE DIGIT TWO
+1B53>0033 # BALINESE DIGIT THREE
+1B54>0034 # BALINESE DIGIT FOUR
+1B55>0035 # BALINESE DIGIT FIVE
+1B56>0036 # BALINESE DIGIT SIX
+1B57>0037 # BALINESE DIGIT SEVEN
+1B58>0038 # BALINESE DIGIT EIGHT
+1B59>0039 # BALINESE DIGIT NINE
+1BB0>0030 # SUNDANESE DIGIT ZERO
+1BB1>0031 # SUNDANESE DIGIT ONE
+1BB2>0032 # SUNDANESE DIGIT TWO
+1BB3>0033 # SUNDANESE DIGIT THREE
+1BB4>0034 # SUNDANESE DIGIT FOUR
+1BB5>0035 # SUNDANESE DIGIT FIVE
+1BB6>0036 # SUNDANESE DIGIT SIX
+1BB7>0037 # SUNDANESE DIGIT SEVEN
+1BB8>0038 # SUNDANESE DIGIT EIGHT
+1BB9>0039 # SUNDANESE DIGIT NINE
+1C40>0030 # LEPCHA DIGIT ZERO
+1C41>0031 # LEPCHA DIGIT ONE
+1C42>0032 # LEPCHA DIGIT TWO
+1C43>0033 # LEPCHA DIGIT THREE
+1C44>0034 # LEPCHA DIGIT FOUR
+1C45>0035 # LEPCHA DIGIT FIVE
+1C46>0036 # LEPCHA DIGIT SIX
+1C47>0037 # LEPCHA DIGIT SEVEN
+1C48>0038 # LEPCHA DIGIT EIGHT
+1C49>0039 # LEPCHA DIGIT NINE
+1C50>0030 # OL CHIKI DIGIT ZERO
+1C51>0031 # OL CHIKI DIGIT ONE
+1C52>0032 # OL CHIKI DIGIT TWO
+1C53>0033 # OL CHIKI DIGIT THREE
+1C54>0034 # OL CHIKI DIGIT FOUR
+1C55>0035 # OL CHIKI DIGIT FIVE
+1C56>0036 # OL CHIKI DIGIT SIX
+1C57>0037 # OL CHIKI DIGIT SEVEN
+1C58>0038 # OL CHIKI DIGIT EIGHT
+1C59>0039 # OL CHIKI DIGIT NINE
+24F5>0031 # DOUBLE CIRCLED DIGIT ONE
+24F6>0032 # DOUBLE CIRCLED DIGIT TWO
+24F7>0033 # DOUBLE CIRCLED DIGIT THREE
+24F8>0034 # DOUBLE CIRCLED DIGIT FOUR
+24F9>0035 # DOUBLE CIRCLED DIGIT FIVE
+24FA>0036 # DOUBLE CIRCLED DIGIT SIX
+24FB>0037 # DOUBLE CIRCLED DIGIT SEVEN
+24FC>0038 # DOUBLE CIRCLED DIGIT EIGHT
+24FD>0039 # DOUBLE CIRCLED DIGIT NINE
+24FF>0030 # NEGATIVE CIRCLED DIGIT ZERO
+2776>0031 # DINGBAT NEGATIVE CIRCLED DIGIT ONE
+2777>0032 # DINGBAT NEGATIVE CIRCLED DIGIT TWO
+2778>0033 # DINGBAT NEGATIVE CIRCLED DIGIT THREE
+2779>0034 # DINGBAT NEGATIVE CIRCLED DIGIT FOUR
+277A>0035 # DINGBAT NEGATIVE CIRCLED DIGIT FIVE
+277B>0036 # DINGBAT NEGATIVE CIRCLED DIGIT SIX
+277C>0037 # DINGBAT NEGATIVE CIRCLED DIGIT SEVEN
+277D>0038 # DINGBAT NEGATIVE CIRCLED DIGIT EIGHT
+277E>0039 # DINGBAT NEGATIVE CIRCLED DIGIT NINE
+2780>0031 # DINGBAT CIRCLED SANS-SERIF DIGIT ONE
+2781>0032 # DINGBAT CIRCLED SANS-SERIF DIGIT TWO
+2782>0033 # DINGBAT CIRCLED SANS-SERIF DIGIT THREE
+2783>0034 # DINGBAT CIRCLED SANS-SERIF DIGIT FOUR
+2784>0035 # DINGBAT CIRCLED SANS-SERIF DIGIT FIVE
+2785>0036 # DINGBAT CIRCLED SANS-SERIF DIGIT SIX
+2786>0037 # DINGBAT CIRCLED SANS-SERIF DIGIT SEVEN
+2787>0038 # DINGBAT CIRCLED SANS-SERIF DIGIT EIGHT
+2788>0039 # DINGBAT CIRCLED SANS-SERIF DIGIT NINE
+278A>0031 # DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT ONE
+278B>0032 # DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT TWO
+278C>0033 # DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT THREE
+278D>0034 # DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT FOUR
+278E>0035 # DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT FIVE
+278F>0036 # DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT SIX
+2790>0037 # DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT SEVEN
+2791>0038 # DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT EIGHT
+2792>0039 # DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT NINE
+A620>0030 # VAI DIGIT ZERO
+A621>0031 # VAI DIGIT ONE
+A622>0032 # VAI DIGIT TWO
+A623>0033 # VAI DIGIT THREE
+A624>0034 # VAI DIGIT FOUR
+A625>0035 # VAI DIGIT FIVE
+A626>0036 # VAI DIGIT SIX
+A627>0037 # VAI DIGIT SEVEN
+A628>0038 # VAI DIGIT EIGHT
+A629>0039 # VAI DIGIT NINE
+A8D0>0030 # SAURASHTRA DIGIT ZERO
+A8D1>0031 # SAURASHTRA DIGIT ONE
+A8D2>0032 # SAURASHTRA DIGIT TWO
+A8D3>0033 # SAURASHTRA DIGIT THREE
+A8D4>0034 # SAURASHTRA DIGIT FOUR
+A8D5>0035 # SAURASHTRA DIGIT FIVE
+A8D6>0036 # SAURASHTRA DIGIT SIX
+A8D7>0037 # SAURASHTRA DIGIT SEVEN
+A8D8>0038 # SAURASHTRA DIGIT EIGHT
+A8D9>0039 # SAURASHTRA DIGIT NINE
+A900>0030 # KAYAH LI DIGIT ZERO
+A901>0031 # KAYAH LI DIGIT ONE
+A902>0032 # KAYAH LI DIGIT TWO
+A903>0033 # KAYAH LI DIGIT THREE
+A904>0034 # KAYAH LI DIGIT FOUR
+A905>0035 # KAYAH LI DIGIT FIVE
+A906>0036 # KAYAH LI DIGIT SIX
+A907>0037 # KAYAH LI DIGIT SEVEN
+A908>0038 # KAYAH LI DIGIT EIGHT
+A909>0039 # KAYAH LI DIGIT NINE
+A9D0>0030 # JAVANESE DIGIT ZERO
+A9D1>0031 # JAVANESE DIGIT ONE
+A9D2>0032 # JAVANESE DIGIT TWO
+A9D3>0033 # JAVANESE DIGIT THREE
+A9D4>0034 # JAVANESE DIGIT FOUR
+A9D5>0035 # JAVANESE DIGIT FIVE
+A9D6>0036 # JAVANESE DIGIT SIX
+A9D7>0037 # JAVANESE DIGIT SEVEN
+A9D8>0038 # JAVANESE DIGIT EIGHT
+A9D9>0039 # JAVANESE DIGIT NINE
+AA50>0030 # CHAM DIGIT ZERO
+AA51>0031 # CHAM DIGIT ONE
+AA52>0032 # CHAM DIGIT TWO
+AA53>0033 # CHAM DIGIT THREE
+AA54>0034 # CHAM DIGIT FOUR
+AA55>0035 # CHAM DIGIT FIVE
+AA56>0036 # CHAM DIGIT SIX
+AA57>0037 # CHAM DIGIT SEVEN
+AA58>0038 # CHAM DIGIT EIGHT
+AA59>0039 # CHAM DIGIT NINE
+ABF0>0030 # MEETEI MAYEK DIGIT ZERO
+ABF1>0031 # MEETEI MAYEK DIGIT ONE
+ABF2>0032 # MEETEI MAYEK DIGIT TWO
+ABF3>0033 # MEETEI MAYEK DIGIT THREE
+ABF4>0034 # MEETEI MAYEK DIGIT FOUR
+ABF5>0035 # MEETEI MAYEK DIGIT FIVE
+ABF6>0036 # MEETEI MAYEK DIGIT SIX
+ABF7>0037 # MEETEI MAYEK DIGIT SEVEN
+ABF8>0038 # MEETEI MAYEK DIGIT EIGHT
+ABF9>0039 # MEETEI MAYEK DIGIT NINE
+104A0>0030 # OSMANYA DIGIT ZERO
+104A1>0031 # OSMANYA DIGIT ONE
+104A2>0032 # OSMANYA DIGIT TWO
+104A3>0033 # OSMANYA DIGIT THREE
+104A4>0034 # OSMANYA DIGIT FOUR
+104A5>0035 # OSMANYA DIGIT FIVE
+104A6>0036 # OSMANYA DIGIT SIX
+104A7>0037 # OSMANYA DIGIT SEVEN
+104A8>0038 # OSMANYA DIGIT EIGHT
+104A9>0039 # OSMANYA DIGIT NINE
+10A40>0031 # KHAROSHTHI DIGIT ONE
+10A41>0032 # KHAROSHTHI DIGIT TWO
+10A42>0033 # KHAROSHTHI DIGIT THREE
+10A43>0034 # KHAROSHTHI DIGIT FOUR
+10E60>0031 # RUMI DIGIT ONE
+10E61>0032 # RUMI DIGIT TWO
+10E62>0033 # RUMI DIGIT THREE
+10E63>0034 # RUMI DIGIT FOUR
+10E64>0035 # RUMI DIGIT FIVE
+10E65>0036 # RUMI DIGIT SIX
+10E66>0037 # RUMI DIGIT SEVEN
+10E67>0038 # RUMI DIGIT EIGHT
+10E68>0039 # RUMI DIGIT NINE
+11052>0031 # BRAHMI NUMBER ONE
+11053>0032 # BRAHMI NUMBER TWO
+11054>0033 # BRAHMI NUMBER THREE
+11055>0034 # BRAHMI NUMBER FOUR
+11056>0035 # BRAHMI NUMBER FIVE
+11057>0036 # BRAHMI NUMBER SIX
+11058>0037 # BRAHMI NUMBER SEVEN
+11059>0038 # BRAHMI NUMBER EIGHT
+1105A>0039 # BRAHMI NUMBER NINE
+11066>0030 # BRAHMI DIGIT ZERO
+11067>0031 # BRAHMI DIGIT ONE
+11068>0032 # BRAHMI DIGIT TWO
+11069>0033 # BRAHMI DIGIT THREE
+1106A>0034 # BRAHMI DIGIT FOUR
+1106B>0035 # BRAHMI DIGIT FIVE
+1106C>0036 # BRAHMI DIGIT SIX
+1106D>0037 # BRAHMI DIGIT SEVEN
+1106E>0038 # BRAHMI DIGIT EIGHT
+1106F>0039 # BRAHMI DIGIT NINE
+110F0>0030 # SORA SOMPENG DIGIT ZERO
+110F1>0031 # SORA SOMPENG DIGIT ONE
+110F2>0032 # SORA SOMPENG DIGIT TWO
+110F3>0033 # SORA SOMPENG DIGIT THREE
+110F4>0034 # SORA SOMPENG DIGIT FOUR
+110F5>0035 # SORA SOMPENG DIGIT FIVE
+110F6>0036 # SORA SOMPENG DIGIT SIX
+110F7>0037 # SORA SOMPENG DIGIT SEVEN
+110F8>0038 # SORA SOMPENG DIGIT EIGHT
+110F9>0039 # SORA SOMPENG DIGIT NINE
+11136>0030 # CHAKMA DIGIT ZERO
+11137>0031 # CHAKMA DIGIT ONE
+11138>0032 # CHAKMA DIGIT TWO
+11139>0033 # CHAKMA DIGIT THREE
+1113A>0034 # CHAKMA DIGIT FOUR
+1113B>0035 # CHAKMA DIGIT FIVE
+1113C>0036 # CHAKMA DIGIT SIX
+1113D>0037 # CHAKMA DIGIT SEVEN
+1113E>0038 # CHAKMA DIGIT EIGHT
+1113F>0039 # CHAKMA DIGIT NINE
+111D0>0030 # SHARADA DIGIT ZERO
+111D1>0031 # SHARADA DIGIT ONE
+111D2>0032 # SHARADA DIGIT TWO
+111D3>0033 # SHARADA DIGIT THREE
+111D4>0034 # SHARADA DIGIT FOUR
+111D5>0035 # SHARADA DIGIT FIVE
+111D6>0036 # SHARADA DIGIT SIX
+111D7>0037 # SHARADA DIGIT SEVEN
+111D8>0038 # SHARADA DIGIT EIGHT
+111D9>0039 # SHARADA DIGIT NINE
+116C0>0030 # TAKRI DIGIT ZERO
+116C1>0031 # TAKRI DIGIT ONE
+116C2>0032 # TAKRI DIGIT TWO
+116C3>0033 # TAKRI DIGIT THREE
+116C4>0034 # TAKRI DIGIT FOUR
+116C5>0035 # TAKRI DIGIT FIVE
+116C6>0036 # TAKRI DIGIT SIX
+116C7>0037 # TAKRI DIGIT SEVEN
+116C8>0038 # TAKRI DIGIT EIGHT
+116C9>0039 # TAKRI DIGIT NINE
+